From 0431f96ca87abe0fd8ea1d0acf63175ad6aeebbd Mon Sep 17 00:00:00 2001 From: Romil Date: Fri, 3 Feb 2023 16:47:11 -0800 Subject: [PATCH 001/183] Working Ray K8s node provider based on SSH --- Dockerfile_k8s | 45 + sky/skylet/providers/kubernetes/__init__.py | 2 + sky/skylet/providers/kubernetes/config.py | 348 + .../providers/kubernetes/kubectl-rsync.sh | 30 + .../providers/kubernetes/node_provider.py | 348 + sky/skylet/providers/kubernetes/utils.py | 58 + tests/playground/clean_k8s.sh | 1 + tests/playground/deployment/delete.sh | 1 + tests/playground/deployment/run.sh | 6 + .../skypilot_ssh_k8s_deployment.yaml | 54 + tests/playground/kind/cluster.yaml | 11077 ++++++++++++++++ tests/playground/kind/create_cluster.sh | 4 + tests/playground/kind/portmap_gen.py | 19 + tests/playground/ray_k8s_example_full.yaml | 213 + tests/playground/ray_k8s_sky.yaml | 265 + 15 files changed, 12471 insertions(+) create mode 100644 Dockerfile_k8s create mode 100644 sky/skylet/providers/kubernetes/__init__.py create mode 100644 sky/skylet/providers/kubernetes/config.py create mode 100644 sky/skylet/providers/kubernetes/kubectl-rsync.sh create mode 100644 sky/skylet/providers/kubernetes/node_provider.py create mode 100644 sky/skylet/providers/kubernetes/utils.py create mode 100644 tests/playground/clean_k8s.sh create mode 100644 tests/playground/deployment/delete.sh create mode 100644 tests/playground/deployment/run.sh create mode 100644 tests/playground/deployment/skypilot_ssh_k8s_deployment.yaml create mode 100644 tests/playground/kind/cluster.yaml create mode 100644 tests/playground/kind/create_cluster.sh create mode 100644 tests/playground/kind/portmap_gen.py create mode 100644 tests/playground/ray_k8s_example_full.yaml create mode 100644 tests/playground/ray_k8s_sky.yaml diff --git a/Dockerfile_k8s b/Dockerfile_k8s new file mode 100644 index 00000000000..4aa78ec9d3c --- /dev/null +++ b/Dockerfile_k8s @@ -0,0 +1,45 @@ +# docker build -t skypilot:latest -f Dockerfile_k8s ./sky +FROM continuumio/miniconda3:4.11.0 + +# Initialize conda for root user, install ssh and other local dependencies +RUN apt update -y && \ + apt install rsync sudo patch openssh-server pciutils nano -y && \ + rm -rf /var/lib/apt/lists/* && \ + apt remove -y python3 && \ + conda init + +# Setup SSH and generate hostkeys +RUN mkdir -p /var/run/sshd && \ + sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ + sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd && \ + cd /etc/ssh/ && \ + ssh-keygen -A + +# Setup new user named sky and add to sudoers. Also add /opt/conda/bin to sudo path. +RUN useradd -m -s /bin/bash sky && \ + echo "sky ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers && \ + echo 'Defaults secure_path="/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin"' > /etc/sudoers.d/sky + +# Switch to sky user +USER sky + +# Install SkyPilot pip dependencies +# Hack: we only install SkyPilot[aws] to optimize build time +RUN pip install wheel Click colorama cryptography jinja2 jsonschema && \ + pip install networkx oauth2client pandas pendulum PrettyTable && \ + pip install ray[default]==2.01 rich tabulate filelock 'grpcio<=1.43.0' && \ + pip install packaging 'protobuf<4.0.0' psutil pulp && \ + pip install awscli boto3 pycryptodome==3.12.0 && \ + pip install docker + +# Install SkyPilot. This is purposely separate from installing SkyPilot +# dependencies to optimize rebuild time +COPY . /skypilot/sky/ + +RUN cd /skypilot/ && \ + sudo mv -v sky/setup_files/* . && \ + pip install ".[aws]" + +# Set WORKDIR and initialize conda for sky user +WORKDIR /home/sky +RUN conda init diff --git a/sky/skylet/providers/kubernetes/__init__.py b/sky/skylet/providers/kubernetes/__init__.py new file mode 100644 index 00000000000..0d1311c16f9 --- /dev/null +++ b/sky/skylet/providers/kubernetes/__init__.py @@ -0,0 +1,2 @@ +from sky.skylet.providers.kubernetes.utils import core_api, log_prefix, networking_api, auth_api +from sky.skylet.providers.kubernetes.node_provider import KubernetesNodeProvider diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py new file mode 100644 index 00000000000..7b392614f68 --- /dev/null +++ b/sky/skylet/providers/kubernetes/config.py @@ -0,0 +1,348 @@ +import copy +import logging +import math +import re + +from kubernetes import client +from kubernetes.client.rest import ApiException + +from sky.skylet.providers.kubernetes import auth_api, core_api, log_prefix + +logger = logging.getLogger(__name__) + +MEMORY_SIZE_UNITS = { + "K": 2 ** 10, + "M": 2 ** 20, + "G": 2 ** 30, + "T": 2 ** 40, + "P": 2 ** 50, +} + + +class InvalidNamespaceError(ValueError): + def __init__(self, field_name, namespace): + self.message = ( + "Namespace of {} config doesn't match provided " + "namespace '{}'. Either set it to {} or remove the " + "field".format(field_name, namespace, namespace) + ) + + def __str__(self): + return self.message + + +def using_existing_msg(resource_type, name): + return "using existing {} '{}'".format(resource_type, name) + + +def updating_existing_msg(resource_type, name): + return "updating existing {} '{}'".format(resource_type, name) + + +def not_found_msg(resource_type, name): + return "{} '{}' not found, attempting to create it".format(resource_type, name) + + +def not_checking_msg(resource_type, name): + return "not checking if {} '{}' exists".format(resource_type, name) + + +def created_msg(resource_type, name): + return "successfully created {} '{}'".format(resource_type, name) + + +def not_provided_msg(resource_type): + return "no {} config provided, must already exist".format(resource_type) + + +def bootstrap_kubernetes(config): + # if not config["provider"]["use_internal_ips"]: + # return ValueError( + # "Exposing external IP addresses for ray containers isn't " + # "currently supported. Please set " + # "'use_internal_ips' to false." + # ) + + if config["provider"].get("_operator"): + namespace = config["provider"]["namespace"] + else: + namespace = _configure_namespace(config["provider"]) + + _configure_services(namespace, config["provider"]) + + if not config["provider"].get("_operator"): + # These steps are unecessary when using the Operator. + _configure_autoscaler_service_account(namespace, config["provider"]) + _configure_autoscaler_role(namespace, config["provider"]) + _configure_autoscaler_role_binding(namespace, config["provider"]) + + return config + + +def fillout_resources_kubernetes(config): + """Fills CPU and GPU resources by reading pod spec of each available node + type. + + For each node type and each of CPU/GPU, looks at container's resources + and limits, takes min of the two. The result is rounded up, as Ray does + not currently support fractional CPU. + """ + if "available_node_types" not in config: + return config + node_types = copy.deepcopy(config["available_node_types"]) + head_node_type = config["head_node_type"] + for node_type in node_types: + + node_config = node_types[node_type]["node_config"] + # The next line is for compatibility with configs like + # kubernetes/example-ingress.yaml, + # cf. KubernetesNodeProvider.create_node(). + pod = node_config.get("pod", node_config) + container_data = pod["spec"]["containers"][0] + + autodetected_resources = get_autodetected_resources(container_data) + if node_types == head_node_type: + # we only autodetect worker type node memory resource + autodetected_resources.pop("memory") + if "resources" not in config["available_node_types"][node_type]: + config["available_node_types"][node_type]["resources"] = {} + autodetected_resources.update( + config["available_node_types"][node_type]["resources"] + ) + config["available_node_types"][node_type]["resources"] = autodetected_resources + logger.debug( + "Updating the resources of node type {} to include {}.".format( + node_type, autodetected_resources + ) + ) + return config + + +def get_autodetected_resources(container_data): + container_resources = container_data.get("resources", None) + if container_resources is None: + return {"CPU": 0, "GPU": 0} + + node_type_resources = { + resource_name.upper(): get_resource(container_resources, resource_name) + for resource_name in ["cpu", "gpu"] + } + + memory_limits = get_resource(container_resources, "memory") + node_type_resources["memory"] = int(memory_limits) + + return node_type_resources + + +def get_resource(container_resources, resource_name): + limit = _get_resource(container_resources, resource_name, field_name="limits") + # float("inf") means there's no limit set + return 0 if limit == float("inf") else int(limit) + + +def _get_resource(container_resources, resource_name, field_name): + """Returns the resource quantity. + + The amount of resource is rounded up to nearest integer. + Returns float("inf") if the resource is not present. + + Args: + container_resources: Container's resource field. + resource_name: One of 'cpu', 'gpu' or memory. + field_name: One of 'requests' or 'limits'. + + Returns: + Union[int, float]: Detected resource quantity. + """ + if field_name not in container_resources: + # No limit/resource field. + return float("inf") + resources = container_resources[field_name] + # Look for keys containing the resource_name. For example, + # the key 'nvidia.com/gpu' contains the key 'gpu'. + matching_keys = [key for key in resources if resource_name in key.lower()] + if len(matching_keys) == 0: + return float("inf") + if len(matching_keys) > 1: + # Should have only one match -- mostly relevant for gpu. + raise ValueError(f"Multiple {resource_name} types not supported.") + # E.g. 'nvidia.com/gpu' or 'cpu'. + resource_key = matching_keys.pop() + resource_quantity = resources[resource_key] + if resource_name == "memory": + return _parse_memory_resource(resource_quantity) + else: + return _parse_cpu_or_gpu_resource(resource_quantity) + + +def _parse_cpu_or_gpu_resource(resource): + resource_str = str(resource) + if resource_str[-1] == "m": + # For example, '500m' rounds up to 1. + return math.ceil(int(resource_str[:-1]) / 1000) + else: + return int(resource_str) + + +def _parse_memory_resource(resource): + resource_str = str(resource) + try: + return int(resource_str) + except ValueError: + pass + memory_size = re.sub(r"([KMGTP]+)", r" \1", resource_str) + number, unit_index = [item.strip() for item in memory_size.split()] + unit_index = unit_index[0] + return float(number) * MEMORY_SIZE_UNITS[unit_index] + + +def _configure_namespace(provider_config): + namespace_field = "namespace" + if namespace_field not in provider_config: + raise ValueError("Must specify namespace in Kubernetes config.") + + namespace = provider_config[namespace_field] + field_selector = "metadata.name={}".format(namespace) + try: + namespaces = core_api().list_namespace(field_selector=field_selector).items + except ApiException: + logger.warning(log_prefix + not_checking_msg(namespace_field, namespace)) + return namespace + + if len(namespaces) > 0: + assert len(namespaces) == 1 + logger.info(log_prefix + using_existing_msg(namespace_field, namespace)) + return namespace + + logger.info(log_prefix + not_found_msg(namespace_field, namespace)) + namespace_config = client.V1Namespace(metadata=client.V1ObjectMeta(name=namespace)) + core_api().create_namespace(namespace_config) + logger.info(log_prefix + created_msg(namespace_field, namespace)) + return namespace + + +def _configure_autoscaler_service_account(namespace, provider_config): + account_field = "autoscaler_service_account" + if account_field not in provider_config: + logger.info(log_prefix + not_provided_msg(account_field)) + return + + account = provider_config[account_field] + if "namespace" not in account["metadata"]: + account["metadata"]["namespace"] = namespace + elif account["metadata"]["namespace"] != namespace: + raise InvalidNamespaceError(account_field, namespace) + + name = account["metadata"]["name"] + field_selector = "metadata.name={}".format(name) + accounts = ( + core_api() + .list_namespaced_service_account(namespace, field_selector=field_selector) + .items + ) + if len(accounts) > 0: + assert len(accounts) == 1 + logger.info(log_prefix + using_existing_msg(account_field, name)) + return + + logger.info(log_prefix + not_found_msg(account_field, name)) + core_api().create_namespaced_service_account(namespace, account) + logger.info(log_prefix + created_msg(account_field, name)) + + +def _configure_autoscaler_role(namespace, provider_config): + role_field = "autoscaler_role" + if role_field not in provider_config: + logger.info(log_prefix + not_provided_msg(role_field)) + return + + role = provider_config[role_field] + if "namespace" not in role["metadata"]: + role["metadata"]["namespace"] = namespace + elif role["metadata"]["namespace"] != namespace: + raise InvalidNamespaceError(role_field, namespace) + + name = role["metadata"]["name"] + field_selector = "metadata.name={}".format(name) + accounts = ( + auth_api().list_namespaced_role(namespace, field_selector=field_selector).items + ) + if len(accounts) > 0: + assert len(accounts) == 1 + logger.info(log_prefix + using_existing_msg(role_field, name)) + return + + logger.info(log_prefix + not_found_msg(role_field, name)) + auth_api().create_namespaced_role(namespace, role) + logger.info(log_prefix + created_msg(role_field, name)) + + +def _configure_autoscaler_role_binding(namespace, provider_config): + binding_field = "autoscaler_role_binding" + if binding_field not in provider_config: + logger.info(log_prefix + not_provided_msg(binding_field)) + return + + binding = provider_config[binding_field] + if "namespace" not in binding["metadata"]: + binding["metadata"]["namespace"] = namespace + elif binding["metadata"]["namespace"] != namespace: + raise InvalidNamespaceError(binding_field, namespace) + for subject in binding["subjects"]: + if "namespace" not in subject: + subject["namespace"] = namespace + elif subject["namespace"] != namespace: + raise InvalidNamespaceError( + binding_field + " subject '{}'".format(subject["name"]), namespace + ) + + name = binding["metadata"]["name"] + field_selector = "metadata.name={}".format(name) + accounts = ( + auth_api() + .list_namespaced_role_binding(namespace, field_selector=field_selector) + .items + ) + if len(accounts) > 0: + assert len(accounts) == 1 + logger.info(log_prefix + using_existing_msg(binding_field, name)) + return + + logger.info(log_prefix + not_found_msg(binding_field, name)) + auth_api().create_namespaced_role_binding(namespace, binding) + logger.info(log_prefix + created_msg(binding_field, name)) + + +def _configure_services(namespace, provider_config): + service_field = "services" + if service_field not in provider_config: + logger.info(log_prefix + not_provided_msg(service_field)) + return + + services = provider_config[service_field] + for service in services: + if "namespace" not in service["metadata"]: + service["metadata"]["namespace"] = namespace + elif service["metadata"]["namespace"] != namespace: + raise InvalidNamespaceError(service_field, namespace) + + name = service["metadata"]["name"] + field_selector = "metadata.name={}".format(name) + services = ( + core_api() + .list_namespaced_service(namespace, field_selector=field_selector) + .items + ) + if len(services) > 0: + assert len(services) == 1 + existing_service = services[0] + if service == existing_service: + logger.info(log_prefix + using_existing_msg("service", name)) + return + else: + logger.info(log_prefix + updating_existing_msg("service", name)) + core_api().patch_namespaced_service(name, namespace, service) + else: + logger.info(log_prefix + not_found_msg("service", name)) + core_api().create_namespaced_service(namespace, service) + logger.info(log_prefix + created_msg("service", name)) diff --git a/sky/skylet/providers/kubernetes/kubectl-rsync.sh b/sky/skylet/providers/kubernetes/kubectl-rsync.sh new file mode 100644 index 00000000000..361eb6d85c5 --- /dev/null +++ b/sky/skylet/providers/kubernetes/kubectl-rsync.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Helper script to use kubectl as a remote shell for rsync to sync files +# to/from pods that have rsync installed. Taken from: +# https://serverfault.com/questions/741670/rsync-files-to-a-kubernetes-pod/746352 + +if [ -z "$KRSYNC_STARTED" ]; then + export KRSYNC_STARTED=true + exec rsync --blocking-io --rsh "$0" "$@" +fi + +# Running as --rsh +namespace='' +pod=$1 +shift + +# If use uses pod@namespace rsync passes as: {us} -l pod namespace ... +if [ "X$pod" = "X-l" ]; then + pod=$1 + shift + # Space before $1 leads to namespace errors + namespace="-n$1" + shift +fi + +if [ -z "$KUBE_API_SERVER" ]; then + exec kubectl "$namespace" exec -i "$pod" -- "$@" +else + exec kubectl --server "$KUBE_API_SERVER" "$namespace" exec -i "$pod" -- "$@" +fi diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py new file mode 100644 index 00000000000..4a5d1b60d85 --- /dev/null +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -0,0 +1,348 @@ +import copy +import json +import logging +import subprocess +import time +from typing import Dict +from uuid import uuid4 + +from kubernetes.client.rest import ApiException + +from sky.skylet.providers.kubernetes import core_api, log_prefix, networking_api +from sky.skylet.providers.kubernetes.config import ( + bootstrap_kubernetes, + fillout_resources_kubernetes, +) +from ray.autoscaler._private.command_runner import KubernetesCommandRunner, \ + SSHCommandRunner +from ray.autoscaler._private.cli_logger import cli_logger +from ray.autoscaler.node_provider import NodeProvider +from ray.autoscaler.tags import NODE_KIND_HEAD, TAG_RAY_CLUSTER_NAME, TAG_RAY_NODE_KIND + +logger = logging.getLogger(__name__) + +MAX_TAG_RETRIES = 3 +DELAY_BEFORE_TAG_RETRY = 0.5 + +RAY_COMPONENT_LABEL = "cluster.ray.io/component" + +# Patch SSHCommandRunner to allow specifying SSH port +def set_port(self, port): + self.ssh_options.arg_dict["Port"] = port + +SSHCommandRunner.set_port = set_port + +def head_service_selector(cluster_name: str) -> Dict[str, str]: + """Selector for Operator-configured head service.""" + return {RAY_COMPONENT_LABEL: f"{cluster_name}-ray-head"} + + +def to_label_selector(tags): + label_selector = "" + for k, v in tags.items(): + if label_selector != "": + label_selector += "," + label_selector += "{}={}".format(k, v) + return label_selector + + +class KubernetesNodeProvider(NodeProvider): + def __init__(self, provider_config, cluster_name): + NodeProvider.__init__(self, provider_config, cluster_name) + self.cluster_name = cluster_name + self.namespace = provider_config["namespace"] + + def non_terminated_nodes(self, tag_filters): + # Match pods that are in the 'Pending' or 'Running' phase. + # Unfortunately there is no OR operator in field selectors, so we + # have to match on NOT any of the other phases. + field_selector = ",".join( + [ + "status.phase!=Failed", + "status.phase!=Unknown", + "status.phase!=Succeeded", + "status.phase!=Terminating", + ] + ) + + tag_filters[TAG_RAY_CLUSTER_NAME] = self.cluster_name + label_selector = to_label_selector(tag_filters) + pod_list = core_api().list_namespaced_pod( + self.namespace, field_selector=field_selector, label_selector=label_selector + ) + + # Don't return pods marked for deletion, + # i.e. pods with non-null metadata.DeletionTimestamp. + return [ + pod.metadata.name + for pod in pod_list.items + if pod.metadata.deletion_timestamp is None + ] + + def is_running(self, node_id): + pod = core_api().read_namespaced_pod(node_id, self.namespace) + return pod.status.phase == "Running" + + def is_terminated(self, node_id): + pod = core_api().read_namespaced_pod(node_id, self.namespace) + return pod.status.phase not in ["Running", "Pending"] + + def node_tags(self, node_id): + pod = core_api().read_namespaced_pod(node_id, self.namespace) + return pod.metadata.labels + + @staticmethod + def get_apiserver_ip() -> str: + output = subprocess.Popen("kubectl config view -o json".split(), + stdout=subprocess.PIPE).communicate()[0] + api_server_ip = json.loads(output)["clusters"][0]["cluster"][ + "server"].split("//")[1].split(":")[0] + return api_server_ip + + def external_ip(self, node_id): + # Extract the IP address of the API server from kubectl + return self.get_apiserver_ip() + + def external_port(self, node_id): + # Extract the NodePort of the head node's SSH service + # TODO(romilb): Implement caching here for performance + # TODO(romilb): !!! Service name is hardcoded here !!! + SVC_NAME = 'example-cluster-ray-head-ssh' + head_service = core_api().read_namespaced_service( + SVC_NAME, self.namespace) + return head_service.spec.ports[0].node_port + + def internal_ip(self, node_id): + pod = core_api().read_namespaced_pod(node_id, self.namespace) + return pod.status.pod_ip + + def get_node_id(self, ip_address, use_internal_ip=True) -> str: + # if not use_internal_ip: + # raise ValueError("Must use internal IPs with Kubernetes.") + # Overriding parent method to use ip+port as cache + def find_node_id(): + if use_internal_ip: + return self._internal_ip_cache.get(ip_address) + else: + return self._external_ip_cache.get(ip_address) + + if not find_node_id(): + all_nodes = self.non_terminated_nodes({}) + ip_func = self.internal_ip if use_internal_ip else self.external_ip + ip_cache = ( + self._internal_ip_cache if use_internal_ip else self._external_ip_cache + ) + for node_id in all_nodes: + ip_cache[ip_func(node_id)] = node_id + + if not find_node_id(): + if use_internal_ip: + known_msg = f"Worker internal IPs: {list(self._internal_ip_cache)}" + else: + known_msg = f"Worker external IP: {list(self._external_ip_cache)}" + raise ValueError(f"ip {ip_address} not found. " + known_msg) + + return find_node_id() + + def set_node_tags(self, node_ids, tags): + for _ in range(MAX_TAG_RETRIES - 1): + try: + self._set_node_tags(node_ids, tags) + return + except ApiException as e: + if e.status == 409: + logger.info( + log_prefix + "Caught a 409 error while setting" + " node tags. Retrying..." + ) + time.sleep(DELAY_BEFORE_TAG_RETRY) + continue + else: + raise + # One more try + self._set_node_tags(node_ids, tags) + + def _set_node_tags(self, node_id, tags): + pod = core_api().read_namespaced_pod(node_id, self.namespace) + pod.metadata.labels.update(tags) + core_api().patch_namespaced_pod(node_id, self.namespace, pod) + + def create_node(self, node_config, tags, count): + conf = copy.deepcopy(node_config) + pod_spec = conf.get("pod", conf) + service_spec = conf.get("service") + ingress_spec = conf.get("ingress") + node_uuid = str(uuid4()) + tags[TAG_RAY_CLUSTER_NAME] = self.cluster_name + tags["ray-node-uuid"] = node_uuid + pod_spec["metadata"]["namespace"] = self.namespace + if "labels" in pod_spec["metadata"]: + pod_spec["metadata"]["labels"].update(tags) + else: + pod_spec["metadata"]["labels"] = tags + + # Allow Operator-configured service to access the head node. + if tags[TAG_RAY_NODE_KIND] == NODE_KIND_HEAD: + head_selector = head_service_selector(self.cluster_name) + pod_spec["metadata"]["labels"].update(head_selector) + + logger.info( + log_prefix + "calling create_namespaced_pod (count={}).".format(count) + ) + new_nodes = [] + for _ in range(count): + pod = core_api().create_namespaced_pod(self.namespace, pod_spec) + new_nodes.append(pod) + + new_svcs = [] + if service_spec is not None: + logger.info( + log_prefix + "calling create_namespaced_service " + "(count={}).".format(count) + ) + + for new_node in new_nodes: + + metadata = service_spec.get("metadata", {}) + metadata["name"] = new_node.metadata.name + service_spec["metadata"] = metadata + service_spec["spec"]["selector"] = {"ray-node-uuid": node_uuid} + svc = core_api().create_namespaced_service(self.namespace, service_spec) + new_svcs.append(svc) + + if ingress_spec is not None: + logger.info( + log_prefix + "calling create_namespaced_ingress " + "(count={}).".format(count) + ) + for new_svc in new_svcs: + metadata = ingress_spec.get("metadata", {}) + metadata["name"] = new_svc.metadata.name + ingress_spec["metadata"] = metadata + ingress_spec = _add_service_name_to_service_port( + ingress_spec, new_svc.metadata.name + ) + networking_api().create_namespaced_ingress(self.namespace, ingress_spec) + + def terminate_node(self, node_id): + logger.info(log_prefix + "calling delete_namespaced_pod") + try: + core_api().delete_namespaced_pod(node_id, self.namespace) + except ApiException as e: + if e.status == 404: + logger.warning( + log_prefix + f"Tried to delete pod {node_id}," + " but the pod was not found (404)." + ) + else: + raise + try: + core_api().delete_namespaced_service(node_id, self.namespace) + except ApiException: + pass + try: + networking_api().delete_namespaced_ingress( + node_id, + self.namespace, + ) + except ApiException: + pass + + def terminate_nodes(self, node_ids): + for node_id in node_ids: + self.terminate_node(node_id) + + def get_command_runner( + self, + log_prefix, + node_id, + auth_config, + cluster_name, + process_runner, + use_internal_ip, + docker_config = None): + """Returns the CommandRunner class used to perform SSH commands. + + Args: + log_prefix(str): stores "NodeUpdater: {}: ".format(). Used + to print progress in the CommandRunner. + node_id(str): the node ID. + auth_config(dict): the authentication configs from the autoscaler + yaml file. + cluster_name(str): the name of the cluster. + process_runner(module): the module to use to run the commands + in the CommandRunner. E.g., subprocess. + use_internal_ip(bool): whether the node_id belongs to an internal ip + or external ip. + docker_config(dict): If set, the docker information of the docker + container that commands should be run on. + """ + common_args = { + "log_prefix": log_prefix, + "node_id": node_id, + "provider": self, + "auth_config": auth_config, + "cluster_name": cluster_name, + "process_runner": process_runner, + "use_internal_ip": use_internal_ip, + } + command_runner = SSHCommandRunner(**common_args) + if use_internal_ip: + port = 22 + else: + port = self.external_port(node_id) + cli_logger.print(port) + cli_logger.print(command_runner.__dict__) + with open('/tmp/log.txt', 'a') as f: + f.write(f'{node_id} port: {port}\n') + command_runner.set_port(port) + return command_runner + # def get_command_runner( + # self, + # log_prefix, + # node_id, + # auth_config, + # cluster_name, + # process_runner, + # use_internal_ip, + # docker_config=None, + # ): + # return KubernetesCommandRunner( + # log_prefix, self.namespace, node_id, auth_config, process_runner + # ) + + @staticmethod + def bootstrap_config(cluster_config): + return bootstrap_kubernetes(cluster_config) + + @staticmethod + def fillout_available_node_types_resources(cluster_config): + """Fills out missing "resources" field for available_node_types.""" + return fillout_resources_kubernetes(cluster_config) + + +def _add_service_name_to_service_port(spec, svc_name): + """Goes recursively through the ingress manifest and adds the + right serviceName next to every servicePort definition. + """ + if isinstance(spec, dict): + dict_keys = list(spec.keys()) + for k in dict_keys: + spec[k] = _add_service_name_to_service_port(spec[k], svc_name) + + if k == "serviceName" and spec[k] != svc_name: + raise ValueError( + "The value of serviceName must be set to " + "${RAY_POD_NAME}. It is automatically replaced " + "when using the autoscaler." + ) + + elif isinstance(spec, list): + spec = [_add_service_name_to_service_port(item, svc_name) for item in spec] + + elif isinstance(spec, str): + # The magic string ${RAY_POD_NAME} is replaced with + # the true service name, which is equal to the worker pod name. + if "${RAY_POD_NAME}" in spec: + spec = spec.replace("${RAY_POD_NAME}", svc_name) + return spec diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py new file mode 100644 index 00000000000..40de7bbf2a3 --- /dev/null +++ b/sky/skylet/providers/kubernetes/utils.py @@ -0,0 +1,58 @@ +import kubernetes +from kubernetes.config.config_exception import ConfigException + +_configured = False +_core_api = None +_auth_api = None +_networking_api = None +_custom_objects_api = None + + +def _load_config(): + global _configured + if _configured: + return + try: + kubernetes.config.load_incluster_config() + except ConfigException: + kubernetes.config.load_kube_config() + _configured = True + + +def core_api(): + global _core_api + if _core_api is None: + _load_config() + _core_api = kubernetes.client.CoreV1Api() + + return _core_api + + +def auth_api(): + global _auth_api + if _auth_api is None: + _load_config() + _auth_api = kubernetes.client.RbacAuthorizationV1Api() + + return _auth_api + + +def networking_api(): + global _networking_api + if _networking_api is None: + _load_config() + _networking_api = kubernetes.client.NetworkingV1Api() + + return _networking_api + + +def custom_objects_api(): + global _custom_objects_api + if _custom_objects_api is None: + _load_config() + _custom_objects_api = kubernetes.client.CustomObjectsApi() + + return _custom_objects_api + + +log_prefix = "KubernetesNodeProvider: " diff --git a/tests/playground/clean_k8s.sh b/tests/playground/clean_k8s.sh new file mode 100644 index 00000000000..66b14c18b6c --- /dev/null +++ b/tests/playground/clean_k8s.sh @@ -0,0 +1 @@ +kubectl delete all -l parent=skypilot diff --git a/tests/playground/deployment/delete.sh b/tests/playground/deployment/delete.sh new file mode 100644 index 00000000000..08c8205ce77 --- /dev/null +++ b/tests/playground/deployment/delete.sh @@ -0,0 +1 @@ +kubectl delete -f skypilot_ssh_k8s_deployment.yaml diff --git a/tests/playground/deployment/run.sh b/tests/playground/deployment/run.sh new file mode 100644 index 00000000000..d19b4f5fc9e --- /dev/null +++ b/tests/playground/deployment/run.sh @@ -0,0 +1,6 @@ +kubectl create secret generic ssh-key-secret --from-file=ssh-publickey=/home/romilb/.ssh/sky-key.pub +kubectl apply -f skypilot_ssh_k8s_deployment.yaml +# Use kubectl describe service skypilot-service to get the port of the service +kubectl describe service skypilot-service | grep NodePort +echo Run the following command to ssh into the container: +echo ssh sky@127.0.0.1 -p port -i ~/.ssh/sky-key diff --git a/tests/playground/deployment/skypilot_ssh_k8s_deployment.yaml b/tests/playground/deployment/skypilot_ssh_k8s_deployment.yaml new file mode 100644 index 00000000000..ed2715d61ab --- /dev/null +++ b/tests/playground/deployment/skypilot_ssh_k8s_deployment.yaml @@ -0,0 +1,54 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: skypilot-deployment + labels: + app: skypilot +spec: + replicas: 1 + selector: + matchLabels: + app: skypilot + template: + metadata: + labels: + app: skypilot + spec: + volumes: + - name: secret-volume + secret: + secretName: ssh-key-secret + containers: + - name: skypilot + image: skypilot:latest + imagePullPolicy: Never + env: + - name: SECRET_THING + valueFrom: + secretKeyRef: + name: ssh-key-secret + key: ssh-publickey + ports: + - containerPort: 22 + command: ["/bin/bash", "-c", "sleep 1000000000"] + volumeMounts: + - name: secret-volume + readOnly: true + mountPath: "/etc/secret-volume" + lifecycle: + postStart: + exec: + command: ["/bin/bash", "-c", "mkdir -p ~/.ssh && cp /etc/secret-volume/ssh-publickey ~/.ssh/authorized_keys && sudo service ssh restart"] +--- +apiVersion: v1 +kind: Service +metadata: + name: skypilot-service +spec: + type: NodePort + selector: + app: skypilot + ports: + - protocol: TCP + port: 22 + targetPort: 22 diff --git a/tests/playground/kind/cluster.yaml b/tests/playground/kind/cluster.yaml new file mode 100644 index 00000000000..cbeb04829cc --- /dev/null +++ b/tests/playground/kind/cluster.yaml @@ -0,0 +1,11077 @@ +apiVersion: kind.x-k8s.io/v1alpha4 +kind: Cluster +nodes: +- role: control-plane + extraPortMappings: + - containerPort: 30000 + hostPort: 30000 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30001 + hostPort: 30001 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30002 + hostPort: 30002 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30003 + hostPort: 30003 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30004 + hostPort: 30004 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30005 + hostPort: 30005 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30006 + hostPort: 30006 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30007 + hostPort: 30007 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30008 + hostPort: 30008 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30009 + hostPort: 30009 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30010 + hostPort: 30010 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30011 + hostPort: 30011 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30012 + hostPort: 30012 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30013 + hostPort: 30013 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30014 + hostPort: 30014 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30015 + hostPort: 30015 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30016 + hostPort: 30016 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30017 + hostPort: 30017 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30018 + hostPort: 30018 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30019 + hostPort: 30019 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30020 + hostPort: 30020 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30021 + hostPort: 30021 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30022 + hostPort: 30022 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30023 + hostPort: 30023 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30024 + hostPort: 30024 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30025 + hostPort: 30025 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30026 + hostPort: 30026 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30027 + hostPort: 30027 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30028 + hostPort: 30028 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30029 + hostPort: 30029 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30030 + hostPort: 30030 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30031 + hostPort: 30031 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30032 + hostPort: 30032 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30033 + hostPort: 30033 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30034 + hostPort: 30034 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30035 + hostPort: 30035 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30036 + hostPort: 30036 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30037 + hostPort: 30037 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30038 + hostPort: 30038 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30039 + hostPort: 30039 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30040 + hostPort: 30040 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30041 + hostPort: 30041 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30042 + hostPort: 30042 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30043 + hostPort: 30043 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30044 + hostPort: 30044 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30045 + hostPort: 30045 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30046 + hostPort: 30046 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30047 + hostPort: 30047 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30048 + hostPort: 30048 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30049 + hostPort: 30049 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30050 + hostPort: 30050 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30051 + hostPort: 30051 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30052 + hostPort: 30052 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30053 + hostPort: 30053 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30054 + hostPort: 30054 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30055 + hostPort: 30055 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30056 + hostPort: 30056 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30057 + hostPort: 30057 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30058 + hostPort: 30058 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30059 + hostPort: 30059 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30060 + hostPort: 30060 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30061 + hostPort: 30061 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30062 + hostPort: 30062 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30063 + hostPort: 30063 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30064 + hostPort: 30064 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30065 + hostPort: 30065 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30066 + hostPort: 30066 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30067 + hostPort: 30067 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30068 + hostPort: 30068 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30069 + hostPort: 30069 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30070 + hostPort: 30070 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30071 + hostPort: 30071 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30072 + hostPort: 30072 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30073 + hostPort: 30073 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30074 + hostPort: 30074 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30075 + hostPort: 30075 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30076 + hostPort: 30076 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30077 + hostPort: 30077 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30078 + hostPort: 30078 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30079 + hostPort: 30079 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30080 + hostPort: 30080 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30081 + hostPort: 30081 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30082 + hostPort: 30082 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30083 + hostPort: 30083 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30084 + hostPort: 30084 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30085 + hostPort: 30085 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30086 + hostPort: 30086 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30087 + hostPort: 30087 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30088 + hostPort: 30088 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30089 + hostPort: 30089 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30090 + hostPort: 30090 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30091 + hostPort: 30091 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30092 + hostPort: 30092 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30093 + hostPort: 30093 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30094 + hostPort: 30094 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30095 + hostPort: 30095 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30096 + hostPort: 30096 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30097 + hostPort: 30097 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30098 + hostPort: 30098 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30099 + hostPort: 30099 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30100 + hostPort: 30100 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30101 + hostPort: 30101 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30102 + hostPort: 30102 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30103 + hostPort: 30103 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30104 + hostPort: 30104 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30105 + hostPort: 30105 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30106 + hostPort: 30106 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30107 + hostPort: 30107 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30108 + hostPort: 30108 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30109 + hostPort: 30109 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30110 + hostPort: 30110 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30111 + hostPort: 30111 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30112 + hostPort: 30112 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30113 + hostPort: 30113 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30114 + hostPort: 30114 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30115 + hostPort: 30115 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30116 + hostPort: 30116 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30117 + hostPort: 30117 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30118 + hostPort: 30118 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30119 + hostPort: 30119 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30120 + hostPort: 30120 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30121 + hostPort: 30121 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30122 + hostPort: 30122 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30123 + hostPort: 30123 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30124 + hostPort: 30124 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30125 + hostPort: 30125 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30126 + hostPort: 30126 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30127 + hostPort: 30127 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30128 + hostPort: 30128 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30129 + hostPort: 30129 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30130 + hostPort: 30130 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30131 + hostPort: 30131 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30132 + hostPort: 30132 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30133 + hostPort: 30133 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30134 + hostPort: 30134 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30135 + hostPort: 30135 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30136 + hostPort: 30136 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30137 + hostPort: 30137 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30138 + hostPort: 30138 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30139 + hostPort: 30139 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30140 + hostPort: 30140 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30141 + hostPort: 30141 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30142 + hostPort: 30142 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30143 + hostPort: 30143 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30144 + hostPort: 30144 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30145 + hostPort: 30145 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30146 + hostPort: 30146 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30147 + hostPort: 30147 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30148 + hostPort: 30148 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30149 + hostPort: 30149 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30150 + hostPort: 30150 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30151 + hostPort: 30151 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30152 + hostPort: 30152 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30153 + hostPort: 30153 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30154 + hostPort: 30154 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30155 + hostPort: 30155 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30156 + hostPort: 30156 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30157 + hostPort: 30157 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30158 + hostPort: 30158 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30159 + hostPort: 30159 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30160 + hostPort: 30160 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30161 + hostPort: 30161 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30162 + hostPort: 30162 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30163 + hostPort: 30163 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30164 + hostPort: 30164 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30165 + hostPort: 30165 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30166 + hostPort: 30166 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30167 + hostPort: 30167 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30168 + hostPort: 30168 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30169 + hostPort: 30169 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30170 + hostPort: 30170 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30171 + hostPort: 30171 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30172 + hostPort: 30172 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30173 + hostPort: 30173 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30174 + hostPort: 30174 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30175 + hostPort: 30175 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30176 + hostPort: 30176 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30177 + hostPort: 30177 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30178 + hostPort: 30178 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30179 + hostPort: 30179 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30180 + hostPort: 30180 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30181 + hostPort: 30181 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30182 + hostPort: 30182 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30183 + hostPort: 30183 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30184 + hostPort: 30184 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30185 + hostPort: 30185 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30186 + hostPort: 30186 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30187 + hostPort: 30187 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30188 + hostPort: 30188 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30189 + hostPort: 30189 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30190 + hostPort: 30190 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30191 + hostPort: 30191 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30192 + hostPort: 30192 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30193 + hostPort: 30193 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30194 + hostPort: 30194 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30195 + hostPort: 30195 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30196 + hostPort: 30196 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30197 + hostPort: 30197 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30198 + hostPort: 30198 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30199 + hostPort: 30199 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30200 + hostPort: 30200 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30201 + hostPort: 30201 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30202 + hostPort: 30202 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30203 + hostPort: 30203 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30204 + hostPort: 30204 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30205 + hostPort: 30205 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30206 + hostPort: 30206 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30207 + hostPort: 30207 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30208 + hostPort: 30208 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30209 + hostPort: 30209 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30210 + hostPort: 30210 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30211 + hostPort: 30211 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30212 + hostPort: 30212 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30213 + hostPort: 30213 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30214 + hostPort: 30214 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30215 + hostPort: 30215 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30216 + hostPort: 30216 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30217 + hostPort: 30217 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30218 + hostPort: 30218 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30219 + hostPort: 30219 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30220 + hostPort: 30220 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30221 + hostPort: 30221 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30222 + hostPort: 30222 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30223 + hostPort: 30223 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30224 + hostPort: 30224 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30225 + hostPort: 30225 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30226 + hostPort: 30226 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30227 + hostPort: 30227 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30228 + hostPort: 30228 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30229 + hostPort: 30229 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30230 + hostPort: 30230 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30231 + hostPort: 30231 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30232 + hostPort: 30232 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30233 + hostPort: 30233 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30234 + hostPort: 30234 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30235 + hostPort: 30235 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30236 + hostPort: 30236 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30237 + hostPort: 30237 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30238 + hostPort: 30238 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30239 + hostPort: 30239 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30240 + hostPort: 30240 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30241 + hostPort: 30241 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30242 + hostPort: 30242 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30243 + hostPort: 30243 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30244 + hostPort: 30244 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30245 + hostPort: 30245 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30246 + hostPort: 30246 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30247 + hostPort: 30247 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30248 + hostPort: 30248 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30249 + hostPort: 30249 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30250 + hostPort: 30250 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30251 + hostPort: 30251 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30252 + hostPort: 30252 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30253 + hostPort: 30253 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30254 + hostPort: 30254 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30255 + hostPort: 30255 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30256 + hostPort: 30256 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30257 + hostPort: 30257 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30258 + hostPort: 30258 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30259 + hostPort: 30259 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30260 + hostPort: 30260 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30261 + hostPort: 30261 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30262 + hostPort: 30262 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30263 + hostPort: 30263 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30264 + hostPort: 30264 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30265 + hostPort: 30265 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30266 + hostPort: 30266 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30267 + hostPort: 30267 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30268 + hostPort: 30268 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30269 + hostPort: 30269 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30270 + hostPort: 30270 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30271 + hostPort: 30271 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30272 + hostPort: 30272 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30273 + hostPort: 30273 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30274 + hostPort: 30274 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30275 + hostPort: 30275 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30276 + hostPort: 30276 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30277 + hostPort: 30277 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30278 + hostPort: 30278 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30279 + hostPort: 30279 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30280 + hostPort: 30280 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30281 + hostPort: 30281 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30282 + hostPort: 30282 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30283 + hostPort: 30283 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30284 + hostPort: 30284 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30285 + hostPort: 30285 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30286 + hostPort: 30286 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30287 + hostPort: 30287 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30288 + hostPort: 30288 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30289 + hostPort: 30289 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30290 + hostPort: 30290 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30291 + hostPort: 30291 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30292 + hostPort: 30292 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30293 + hostPort: 30293 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30294 + hostPort: 30294 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30295 + hostPort: 30295 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30296 + hostPort: 30296 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30297 + hostPort: 30297 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30298 + hostPort: 30298 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30299 + hostPort: 30299 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30300 + hostPort: 30300 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30301 + hostPort: 30301 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30302 + hostPort: 30302 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30303 + hostPort: 30303 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30304 + hostPort: 30304 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30305 + hostPort: 30305 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30306 + hostPort: 30306 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30307 + hostPort: 30307 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30308 + hostPort: 30308 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30309 + hostPort: 30309 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30310 + hostPort: 30310 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30311 + hostPort: 30311 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30312 + hostPort: 30312 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30313 + hostPort: 30313 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30314 + hostPort: 30314 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30315 + hostPort: 30315 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30316 + hostPort: 30316 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30317 + hostPort: 30317 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30318 + hostPort: 30318 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30319 + hostPort: 30319 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30320 + hostPort: 30320 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30321 + hostPort: 30321 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30322 + hostPort: 30322 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30323 + hostPort: 30323 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30324 + hostPort: 30324 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30325 + hostPort: 30325 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30326 + hostPort: 30326 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30327 + hostPort: 30327 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30328 + hostPort: 30328 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30329 + hostPort: 30329 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30330 + hostPort: 30330 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30331 + hostPort: 30331 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30332 + hostPort: 30332 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30333 + hostPort: 30333 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30334 + hostPort: 30334 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30335 + hostPort: 30335 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30336 + hostPort: 30336 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30337 + hostPort: 30337 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30338 + hostPort: 30338 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30339 + hostPort: 30339 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30340 + hostPort: 30340 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30341 + hostPort: 30341 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30342 + hostPort: 30342 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30343 + hostPort: 30343 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30344 + hostPort: 30344 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30345 + hostPort: 30345 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30346 + hostPort: 30346 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30347 + hostPort: 30347 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30348 + hostPort: 30348 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30349 + hostPort: 30349 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30350 + hostPort: 30350 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30351 + hostPort: 30351 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30352 + hostPort: 30352 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30353 + hostPort: 30353 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30354 + hostPort: 30354 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30355 + hostPort: 30355 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30356 + hostPort: 30356 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30357 + hostPort: 30357 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30358 + hostPort: 30358 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30359 + hostPort: 30359 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30360 + hostPort: 30360 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30361 + hostPort: 30361 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30362 + hostPort: 30362 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30363 + hostPort: 30363 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30364 + hostPort: 30364 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30365 + hostPort: 30365 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30366 + hostPort: 30366 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30367 + hostPort: 30367 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30368 + hostPort: 30368 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30369 + hostPort: 30369 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30370 + hostPort: 30370 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30371 + hostPort: 30371 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30372 + hostPort: 30372 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30373 + hostPort: 30373 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30374 + hostPort: 30374 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30375 + hostPort: 30375 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30376 + hostPort: 30376 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30377 + hostPort: 30377 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30378 + hostPort: 30378 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30379 + hostPort: 30379 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30380 + hostPort: 30380 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30381 + hostPort: 30381 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30382 + hostPort: 30382 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30383 + hostPort: 30383 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30384 + hostPort: 30384 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30385 + hostPort: 30385 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30386 + hostPort: 30386 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30387 + hostPort: 30387 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30388 + hostPort: 30388 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30389 + hostPort: 30389 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30390 + hostPort: 30390 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30391 + hostPort: 30391 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30392 + hostPort: 30392 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30393 + hostPort: 30393 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30394 + hostPort: 30394 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30395 + hostPort: 30395 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30396 + hostPort: 30396 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30397 + hostPort: 30397 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30398 + hostPort: 30398 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30399 + hostPort: 30399 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30400 + hostPort: 30400 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30401 + hostPort: 30401 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30402 + hostPort: 30402 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30403 + hostPort: 30403 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30404 + hostPort: 30404 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30405 + hostPort: 30405 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30406 + hostPort: 30406 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30407 + hostPort: 30407 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30408 + hostPort: 30408 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30409 + hostPort: 30409 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30410 + hostPort: 30410 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30411 + hostPort: 30411 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30412 + hostPort: 30412 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30413 + hostPort: 30413 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30414 + hostPort: 30414 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30415 + hostPort: 30415 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30416 + hostPort: 30416 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30417 + hostPort: 30417 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30418 + hostPort: 30418 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30419 + hostPort: 30419 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30420 + hostPort: 30420 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30421 + hostPort: 30421 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30422 + hostPort: 30422 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30423 + hostPort: 30423 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30424 + hostPort: 30424 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30425 + hostPort: 30425 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30426 + hostPort: 30426 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30427 + hostPort: 30427 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30428 + hostPort: 30428 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30429 + hostPort: 30429 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30430 + hostPort: 30430 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30431 + hostPort: 30431 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30432 + hostPort: 30432 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30433 + hostPort: 30433 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30434 + hostPort: 30434 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30435 + hostPort: 30435 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30436 + hostPort: 30436 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30437 + hostPort: 30437 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30438 + hostPort: 30438 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30439 + hostPort: 30439 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30440 + hostPort: 30440 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30441 + hostPort: 30441 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30442 + hostPort: 30442 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30443 + hostPort: 30443 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30444 + hostPort: 30444 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30445 + hostPort: 30445 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30446 + hostPort: 30446 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30447 + hostPort: 30447 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30448 + hostPort: 30448 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30449 + hostPort: 30449 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30450 + hostPort: 30450 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30451 + hostPort: 30451 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30452 + hostPort: 30452 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30453 + hostPort: 30453 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30454 + hostPort: 30454 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30455 + hostPort: 30455 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30456 + hostPort: 30456 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30457 + hostPort: 30457 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30458 + hostPort: 30458 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30459 + hostPort: 30459 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30460 + hostPort: 30460 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30461 + hostPort: 30461 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30462 + hostPort: 30462 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30463 + hostPort: 30463 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30464 + hostPort: 30464 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30465 + hostPort: 30465 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30466 + hostPort: 30466 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30467 + hostPort: 30467 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30468 + hostPort: 30468 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30469 + hostPort: 30469 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30470 + hostPort: 30470 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30471 + hostPort: 30471 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30472 + hostPort: 30472 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30473 + hostPort: 30473 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30474 + hostPort: 30474 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30475 + hostPort: 30475 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30476 + hostPort: 30476 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30477 + hostPort: 30477 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30478 + hostPort: 30478 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30479 + hostPort: 30479 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30480 + hostPort: 30480 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30481 + hostPort: 30481 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30482 + hostPort: 30482 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30483 + hostPort: 30483 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30484 + hostPort: 30484 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30485 + hostPort: 30485 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30486 + hostPort: 30486 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30487 + hostPort: 30487 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30488 + hostPort: 30488 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30489 + hostPort: 30489 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30490 + hostPort: 30490 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30491 + hostPort: 30491 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30492 + hostPort: 30492 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30493 + hostPort: 30493 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30494 + hostPort: 30494 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30495 + hostPort: 30495 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30496 + hostPort: 30496 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30497 + hostPort: 30497 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30498 + hostPort: 30498 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30499 + hostPort: 30499 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30500 + hostPort: 30500 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30501 + hostPort: 30501 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30502 + hostPort: 30502 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30503 + hostPort: 30503 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30504 + hostPort: 30504 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30505 + hostPort: 30505 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30506 + hostPort: 30506 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30507 + hostPort: 30507 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30508 + hostPort: 30508 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30509 + hostPort: 30509 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30510 + hostPort: 30510 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30511 + hostPort: 30511 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30512 + hostPort: 30512 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30513 + hostPort: 30513 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30514 + hostPort: 30514 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30515 + hostPort: 30515 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30516 + hostPort: 30516 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30517 + hostPort: 30517 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30518 + hostPort: 30518 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30519 + hostPort: 30519 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30520 + hostPort: 30520 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30521 + hostPort: 30521 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30522 + hostPort: 30522 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30523 + hostPort: 30523 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30524 + hostPort: 30524 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30525 + hostPort: 30525 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30526 + hostPort: 30526 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30527 + hostPort: 30527 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30528 + hostPort: 30528 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30529 + hostPort: 30529 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30530 + hostPort: 30530 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30531 + hostPort: 30531 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30532 + hostPort: 30532 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30533 + hostPort: 30533 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30534 + hostPort: 30534 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30535 + hostPort: 30535 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30536 + hostPort: 30536 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30537 + hostPort: 30537 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30538 + hostPort: 30538 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30539 + hostPort: 30539 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30540 + hostPort: 30540 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30541 + hostPort: 30541 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30542 + hostPort: 30542 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30543 + hostPort: 30543 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30544 + hostPort: 30544 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30545 + hostPort: 30545 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30546 + hostPort: 30546 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30547 + hostPort: 30547 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30548 + hostPort: 30548 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30549 + hostPort: 30549 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30550 + hostPort: 30550 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30551 + hostPort: 30551 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30552 + hostPort: 30552 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30553 + hostPort: 30553 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30554 + hostPort: 30554 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30555 + hostPort: 30555 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30556 + hostPort: 30556 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30557 + hostPort: 30557 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30558 + hostPort: 30558 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30559 + hostPort: 30559 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30560 + hostPort: 30560 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30561 + hostPort: 30561 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30562 + hostPort: 30562 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30563 + hostPort: 30563 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30564 + hostPort: 30564 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30565 + hostPort: 30565 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30566 + hostPort: 30566 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30567 + hostPort: 30567 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30568 + hostPort: 30568 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30569 + hostPort: 30569 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30570 + hostPort: 30570 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30571 + hostPort: 30571 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30572 + hostPort: 30572 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30573 + hostPort: 30573 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30574 + hostPort: 30574 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30575 + hostPort: 30575 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30576 + hostPort: 30576 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30577 + hostPort: 30577 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30578 + hostPort: 30578 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30579 + hostPort: 30579 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30580 + hostPort: 30580 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30581 + hostPort: 30581 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30582 + hostPort: 30582 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30583 + hostPort: 30583 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30584 + hostPort: 30584 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30585 + hostPort: 30585 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30586 + hostPort: 30586 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30587 + hostPort: 30587 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30588 + hostPort: 30588 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30589 + hostPort: 30589 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30590 + hostPort: 30590 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30591 + hostPort: 30591 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30592 + hostPort: 30592 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30593 + hostPort: 30593 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30594 + hostPort: 30594 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30595 + hostPort: 30595 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30596 + hostPort: 30596 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30597 + hostPort: 30597 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30598 + hostPort: 30598 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30599 + hostPort: 30599 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30600 + hostPort: 30600 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30601 + hostPort: 30601 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30602 + hostPort: 30602 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30603 + hostPort: 30603 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30604 + hostPort: 30604 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30605 + hostPort: 30605 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30606 + hostPort: 30606 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30607 + hostPort: 30607 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30608 + hostPort: 30608 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30609 + hostPort: 30609 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30610 + hostPort: 30610 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30611 + hostPort: 30611 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30612 + hostPort: 30612 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30613 + hostPort: 30613 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30614 + hostPort: 30614 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30615 + hostPort: 30615 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30616 + hostPort: 30616 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30617 + hostPort: 30617 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30618 + hostPort: 30618 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30619 + hostPort: 30619 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30620 + hostPort: 30620 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30621 + hostPort: 30621 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30622 + hostPort: 30622 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30623 + hostPort: 30623 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30624 + hostPort: 30624 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30625 + hostPort: 30625 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30626 + hostPort: 30626 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30627 + hostPort: 30627 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30628 + hostPort: 30628 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30629 + hostPort: 30629 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30630 + hostPort: 30630 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30631 + hostPort: 30631 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30632 + hostPort: 30632 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30633 + hostPort: 30633 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30634 + hostPort: 30634 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30635 + hostPort: 30635 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30636 + hostPort: 30636 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30637 + hostPort: 30637 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30638 + hostPort: 30638 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30639 + hostPort: 30639 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30640 + hostPort: 30640 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30641 + hostPort: 30641 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30642 + hostPort: 30642 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30643 + hostPort: 30643 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30644 + hostPort: 30644 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30645 + hostPort: 30645 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30646 + hostPort: 30646 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30647 + hostPort: 30647 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30648 + hostPort: 30648 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30649 + hostPort: 30649 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30650 + hostPort: 30650 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30651 + hostPort: 30651 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30652 + hostPort: 30652 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30653 + hostPort: 30653 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30654 + hostPort: 30654 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30655 + hostPort: 30655 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30656 + hostPort: 30656 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30657 + hostPort: 30657 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30658 + hostPort: 30658 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30659 + hostPort: 30659 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30660 + hostPort: 30660 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30661 + hostPort: 30661 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30662 + hostPort: 30662 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30663 + hostPort: 30663 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30664 + hostPort: 30664 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30665 + hostPort: 30665 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30666 + hostPort: 30666 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30667 + hostPort: 30667 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30668 + hostPort: 30668 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30669 + hostPort: 30669 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30670 + hostPort: 30670 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30671 + hostPort: 30671 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30672 + hostPort: 30672 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30673 + hostPort: 30673 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30674 + hostPort: 30674 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30675 + hostPort: 30675 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30676 + hostPort: 30676 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30677 + hostPort: 30677 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30678 + hostPort: 30678 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30679 + hostPort: 30679 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30680 + hostPort: 30680 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30681 + hostPort: 30681 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30682 + hostPort: 30682 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30683 + hostPort: 30683 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30684 + hostPort: 30684 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30685 + hostPort: 30685 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30686 + hostPort: 30686 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30687 + hostPort: 30687 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30688 + hostPort: 30688 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30689 + hostPort: 30689 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30690 + hostPort: 30690 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30691 + hostPort: 30691 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30692 + hostPort: 30692 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30693 + hostPort: 30693 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30694 + hostPort: 30694 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30695 + hostPort: 30695 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30696 + hostPort: 30696 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30697 + hostPort: 30697 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30698 + hostPort: 30698 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30699 + hostPort: 30699 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30700 + hostPort: 30700 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30701 + hostPort: 30701 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30702 + hostPort: 30702 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30703 + hostPort: 30703 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30704 + hostPort: 30704 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30705 + hostPort: 30705 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30706 + hostPort: 30706 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30707 + hostPort: 30707 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30708 + hostPort: 30708 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30709 + hostPort: 30709 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30710 + hostPort: 30710 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30711 + hostPort: 30711 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30712 + hostPort: 30712 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30713 + hostPort: 30713 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30714 + hostPort: 30714 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30715 + hostPort: 30715 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30716 + hostPort: 30716 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30717 + hostPort: 30717 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30718 + hostPort: 30718 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30719 + hostPort: 30719 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30720 + hostPort: 30720 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30721 + hostPort: 30721 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30722 + hostPort: 30722 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30723 + hostPort: 30723 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30724 + hostPort: 30724 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30725 + hostPort: 30725 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30726 + hostPort: 30726 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30727 + hostPort: 30727 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30728 + hostPort: 30728 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30729 + hostPort: 30729 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30730 + hostPort: 30730 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30731 + hostPort: 30731 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30732 + hostPort: 30732 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30733 + hostPort: 30733 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30734 + hostPort: 30734 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30735 + hostPort: 30735 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30736 + hostPort: 30736 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30737 + hostPort: 30737 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30738 + hostPort: 30738 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30739 + hostPort: 30739 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30740 + hostPort: 30740 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30741 + hostPort: 30741 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30742 + hostPort: 30742 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30743 + hostPort: 30743 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30744 + hostPort: 30744 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30745 + hostPort: 30745 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30746 + hostPort: 30746 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30747 + hostPort: 30747 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30748 + hostPort: 30748 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30749 + hostPort: 30749 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30750 + hostPort: 30750 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30751 + hostPort: 30751 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30752 + hostPort: 30752 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30753 + hostPort: 30753 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30754 + hostPort: 30754 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30755 + hostPort: 30755 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30756 + hostPort: 30756 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30757 + hostPort: 30757 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30758 + hostPort: 30758 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30759 + hostPort: 30759 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30760 + hostPort: 30760 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30761 + hostPort: 30761 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30762 + hostPort: 30762 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30763 + hostPort: 30763 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30764 + hostPort: 30764 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30765 + hostPort: 30765 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30766 + hostPort: 30766 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30767 + hostPort: 30767 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30768 + hostPort: 30768 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30769 + hostPort: 30769 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30770 + hostPort: 30770 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30771 + hostPort: 30771 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30772 + hostPort: 30772 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30773 + hostPort: 30773 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30774 + hostPort: 30774 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30775 + hostPort: 30775 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30776 + hostPort: 30776 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30777 + hostPort: 30777 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30778 + hostPort: 30778 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30779 + hostPort: 30779 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30780 + hostPort: 30780 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30781 + hostPort: 30781 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30782 + hostPort: 30782 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30783 + hostPort: 30783 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30784 + hostPort: 30784 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30785 + hostPort: 30785 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30786 + hostPort: 30786 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30787 + hostPort: 30787 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30788 + hostPort: 30788 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30789 + hostPort: 30789 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30790 + hostPort: 30790 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30791 + hostPort: 30791 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30792 + hostPort: 30792 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30793 + hostPort: 30793 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30794 + hostPort: 30794 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30795 + hostPort: 30795 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30796 + hostPort: 30796 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30797 + hostPort: 30797 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30798 + hostPort: 30798 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30799 + hostPort: 30799 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30800 + hostPort: 30800 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30801 + hostPort: 30801 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30802 + hostPort: 30802 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30803 + hostPort: 30803 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30804 + hostPort: 30804 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30805 + hostPort: 30805 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30806 + hostPort: 30806 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30807 + hostPort: 30807 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30808 + hostPort: 30808 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30809 + hostPort: 30809 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30810 + hostPort: 30810 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30811 + hostPort: 30811 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30812 + hostPort: 30812 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30813 + hostPort: 30813 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30814 + hostPort: 30814 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30815 + hostPort: 30815 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30816 + hostPort: 30816 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30817 + hostPort: 30817 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30818 + hostPort: 30818 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30819 + hostPort: 30819 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30820 + hostPort: 30820 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30821 + hostPort: 30821 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30822 + hostPort: 30822 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30823 + hostPort: 30823 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30824 + hostPort: 30824 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30825 + hostPort: 30825 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30826 + hostPort: 30826 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30827 + hostPort: 30827 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30828 + hostPort: 30828 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30829 + hostPort: 30829 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30830 + hostPort: 30830 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30831 + hostPort: 30831 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30832 + hostPort: 30832 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30833 + hostPort: 30833 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30834 + hostPort: 30834 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30835 + hostPort: 30835 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30836 + hostPort: 30836 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30837 + hostPort: 30837 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30838 + hostPort: 30838 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30839 + hostPort: 30839 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30840 + hostPort: 30840 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30841 + hostPort: 30841 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30842 + hostPort: 30842 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30843 + hostPort: 30843 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30844 + hostPort: 30844 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30845 + hostPort: 30845 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30846 + hostPort: 30846 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30847 + hostPort: 30847 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30848 + hostPort: 30848 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30849 + hostPort: 30849 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30850 + hostPort: 30850 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30851 + hostPort: 30851 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30852 + hostPort: 30852 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30853 + hostPort: 30853 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30854 + hostPort: 30854 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30855 + hostPort: 30855 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30856 + hostPort: 30856 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30857 + hostPort: 30857 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30858 + hostPort: 30858 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30859 + hostPort: 30859 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30860 + hostPort: 30860 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30861 + hostPort: 30861 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30862 + hostPort: 30862 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30863 + hostPort: 30863 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30864 + hostPort: 30864 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30865 + hostPort: 30865 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30866 + hostPort: 30866 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30867 + hostPort: 30867 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30868 + hostPort: 30868 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30869 + hostPort: 30869 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30870 + hostPort: 30870 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30871 + hostPort: 30871 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30872 + hostPort: 30872 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30873 + hostPort: 30873 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30874 + hostPort: 30874 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30875 + hostPort: 30875 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30876 + hostPort: 30876 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30877 + hostPort: 30877 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30878 + hostPort: 30878 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30879 + hostPort: 30879 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30880 + hostPort: 30880 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30881 + hostPort: 30881 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30882 + hostPort: 30882 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30883 + hostPort: 30883 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30884 + hostPort: 30884 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30885 + hostPort: 30885 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30886 + hostPort: 30886 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30887 + hostPort: 30887 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30888 + hostPort: 30888 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30889 + hostPort: 30889 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30890 + hostPort: 30890 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30891 + hostPort: 30891 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30892 + hostPort: 30892 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30893 + hostPort: 30893 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30894 + hostPort: 30894 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30895 + hostPort: 30895 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30896 + hostPort: 30896 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30897 + hostPort: 30897 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30898 + hostPort: 30898 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30899 + hostPort: 30899 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30900 + hostPort: 30900 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30901 + hostPort: 30901 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30902 + hostPort: 30902 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30903 + hostPort: 30903 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30904 + hostPort: 30904 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30905 + hostPort: 30905 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30906 + hostPort: 30906 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30907 + hostPort: 30907 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30908 + hostPort: 30908 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30909 + hostPort: 30909 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30910 + hostPort: 30910 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30911 + hostPort: 30911 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30912 + hostPort: 30912 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30913 + hostPort: 30913 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30914 + hostPort: 30914 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30915 + hostPort: 30915 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30916 + hostPort: 30916 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30917 + hostPort: 30917 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30918 + hostPort: 30918 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30919 + hostPort: 30919 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30920 + hostPort: 30920 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30921 + hostPort: 30921 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30922 + hostPort: 30922 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30923 + hostPort: 30923 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30924 + hostPort: 30924 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30925 + hostPort: 30925 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30926 + hostPort: 30926 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30927 + hostPort: 30927 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30928 + hostPort: 30928 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30929 + hostPort: 30929 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30930 + hostPort: 30930 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30931 + hostPort: 30931 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30932 + hostPort: 30932 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30933 + hostPort: 30933 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30934 + hostPort: 30934 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30935 + hostPort: 30935 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30936 + hostPort: 30936 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30937 + hostPort: 30937 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30938 + hostPort: 30938 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30939 + hostPort: 30939 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30940 + hostPort: 30940 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30941 + hostPort: 30941 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30942 + hostPort: 30942 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30943 + hostPort: 30943 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30944 + hostPort: 30944 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30945 + hostPort: 30945 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30946 + hostPort: 30946 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30947 + hostPort: 30947 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30948 + hostPort: 30948 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30949 + hostPort: 30949 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30950 + hostPort: 30950 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30951 + hostPort: 30951 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30952 + hostPort: 30952 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30953 + hostPort: 30953 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30954 + hostPort: 30954 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30955 + hostPort: 30955 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30956 + hostPort: 30956 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30957 + hostPort: 30957 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30958 + hostPort: 30958 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30959 + hostPort: 30959 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30960 + hostPort: 30960 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30961 + hostPort: 30961 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30962 + hostPort: 30962 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30963 + hostPort: 30963 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30964 + hostPort: 30964 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30965 + hostPort: 30965 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30966 + hostPort: 30966 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30967 + hostPort: 30967 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30968 + hostPort: 30968 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30969 + hostPort: 30969 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30970 + hostPort: 30970 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30971 + hostPort: 30971 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30972 + hostPort: 30972 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30973 + hostPort: 30973 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30974 + hostPort: 30974 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30975 + hostPort: 30975 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30976 + hostPort: 30976 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30977 + hostPort: 30977 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30978 + hostPort: 30978 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30979 + hostPort: 30979 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30980 + hostPort: 30980 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30981 + hostPort: 30981 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30982 + hostPort: 30982 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30983 + hostPort: 30983 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30984 + hostPort: 30984 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30985 + hostPort: 30985 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30986 + hostPort: 30986 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30987 + hostPort: 30987 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30988 + hostPort: 30988 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30989 + hostPort: 30989 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30990 + hostPort: 30990 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30991 + hostPort: 30991 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30992 + hostPort: 30992 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30993 + hostPort: 30993 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30994 + hostPort: 30994 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30995 + hostPort: 30995 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30996 + hostPort: 30996 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30997 + hostPort: 30997 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30998 + hostPort: 30998 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 30999 + hostPort: 30999 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31000 + hostPort: 31000 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31001 + hostPort: 31001 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31002 + hostPort: 31002 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31003 + hostPort: 31003 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31004 + hostPort: 31004 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31005 + hostPort: 31005 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31006 + hostPort: 31006 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31007 + hostPort: 31007 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31008 + hostPort: 31008 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31009 + hostPort: 31009 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31010 + hostPort: 31010 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31011 + hostPort: 31011 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31012 + hostPort: 31012 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31013 + hostPort: 31013 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31014 + hostPort: 31014 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31015 + hostPort: 31015 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31016 + hostPort: 31016 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31017 + hostPort: 31017 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31018 + hostPort: 31018 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31019 + hostPort: 31019 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31020 + hostPort: 31020 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31021 + hostPort: 31021 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31022 + hostPort: 31022 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31023 + hostPort: 31023 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31024 + hostPort: 31024 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31025 + hostPort: 31025 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31026 + hostPort: 31026 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31027 + hostPort: 31027 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31028 + hostPort: 31028 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31029 + hostPort: 31029 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31030 + hostPort: 31030 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31031 + hostPort: 31031 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31032 + hostPort: 31032 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31033 + hostPort: 31033 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31034 + hostPort: 31034 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31035 + hostPort: 31035 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31036 + hostPort: 31036 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31037 + hostPort: 31037 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31038 + hostPort: 31038 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31039 + hostPort: 31039 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31040 + hostPort: 31040 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31041 + hostPort: 31041 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31042 + hostPort: 31042 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31043 + hostPort: 31043 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31044 + hostPort: 31044 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31045 + hostPort: 31045 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31046 + hostPort: 31046 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31047 + hostPort: 31047 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31048 + hostPort: 31048 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31049 + hostPort: 31049 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31050 + hostPort: 31050 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31051 + hostPort: 31051 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31052 + hostPort: 31052 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31053 + hostPort: 31053 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31054 + hostPort: 31054 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31055 + hostPort: 31055 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31056 + hostPort: 31056 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31057 + hostPort: 31057 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31058 + hostPort: 31058 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31059 + hostPort: 31059 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31060 + hostPort: 31060 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31061 + hostPort: 31061 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31062 + hostPort: 31062 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31063 + hostPort: 31063 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31064 + hostPort: 31064 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31065 + hostPort: 31065 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31066 + hostPort: 31066 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31067 + hostPort: 31067 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31068 + hostPort: 31068 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31069 + hostPort: 31069 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31070 + hostPort: 31070 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31071 + hostPort: 31071 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31072 + hostPort: 31072 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31073 + hostPort: 31073 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31074 + hostPort: 31074 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31075 + hostPort: 31075 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31076 + hostPort: 31076 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31077 + hostPort: 31077 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31078 + hostPort: 31078 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31079 + hostPort: 31079 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31080 + hostPort: 31080 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31081 + hostPort: 31081 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31082 + hostPort: 31082 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31083 + hostPort: 31083 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31084 + hostPort: 31084 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31085 + hostPort: 31085 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31086 + hostPort: 31086 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31087 + hostPort: 31087 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31088 + hostPort: 31088 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31089 + hostPort: 31089 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31090 + hostPort: 31090 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31091 + hostPort: 31091 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31092 + hostPort: 31092 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31093 + hostPort: 31093 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31094 + hostPort: 31094 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31095 + hostPort: 31095 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31096 + hostPort: 31096 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31097 + hostPort: 31097 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31098 + hostPort: 31098 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31099 + hostPort: 31099 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31100 + hostPort: 31100 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31101 + hostPort: 31101 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31102 + hostPort: 31102 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31103 + hostPort: 31103 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31104 + hostPort: 31104 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31105 + hostPort: 31105 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31106 + hostPort: 31106 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31107 + hostPort: 31107 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31108 + hostPort: 31108 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31109 + hostPort: 31109 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31110 + hostPort: 31110 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31111 + hostPort: 31111 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31112 + hostPort: 31112 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31113 + hostPort: 31113 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31114 + hostPort: 31114 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31115 + hostPort: 31115 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31116 + hostPort: 31116 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31117 + hostPort: 31117 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31118 + hostPort: 31118 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31119 + hostPort: 31119 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31120 + hostPort: 31120 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31121 + hostPort: 31121 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31122 + hostPort: 31122 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31123 + hostPort: 31123 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31124 + hostPort: 31124 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31125 + hostPort: 31125 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31126 + hostPort: 31126 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31127 + hostPort: 31127 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31128 + hostPort: 31128 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31129 + hostPort: 31129 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31130 + hostPort: 31130 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31131 + hostPort: 31131 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31132 + hostPort: 31132 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31133 + hostPort: 31133 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31134 + hostPort: 31134 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31135 + hostPort: 31135 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31136 + hostPort: 31136 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31137 + hostPort: 31137 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31138 + hostPort: 31138 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31139 + hostPort: 31139 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31140 + hostPort: 31140 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31141 + hostPort: 31141 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31142 + hostPort: 31142 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31143 + hostPort: 31143 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31144 + hostPort: 31144 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31145 + hostPort: 31145 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31146 + hostPort: 31146 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31147 + hostPort: 31147 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31148 + hostPort: 31148 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31149 + hostPort: 31149 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31150 + hostPort: 31150 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31151 + hostPort: 31151 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31152 + hostPort: 31152 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31153 + hostPort: 31153 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31154 + hostPort: 31154 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31155 + hostPort: 31155 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31156 + hostPort: 31156 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31157 + hostPort: 31157 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31158 + hostPort: 31158 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31159 + hostPort: 31159 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31160 + hostPort: 31160 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31161 + hostPort: 31161 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31162 + hostPort: 31162 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31163 + hostPort: 31163 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31164 + hostPort: 31164 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31165 + hostPort: 31165 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31166 + hostPort: 31166 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31167 + hostPort: 31167 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31168 + hostPort: 31168 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31169 + hostPort: 31169 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31170 + hostPort: 31170 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31171 + hostPort: 31171 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31172 + hostPort: 31172 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31173 + hostPort: 31173 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31174 + hostPort: 31174 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31175 + hostPort: 31175 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31176 + hostPort: 31176 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31177 + hostPort: 31177 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31178 + hostPort: 31178 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31179 + hostPort: 31179 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31180 + hostPort: 31180 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31181 + hostPort: 31181 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31182 + hostPort: 31182 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31183 + hostPort: 31183 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31184 + hostPort: 31184 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31185 + hostPort: 31185 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31186 + hostPort: 31186 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31187 + hostPort: 31187 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31188 + hostPort: 31188 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31189 + hostPort: 31189 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31190 + hostPort: 31190 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31191 + hostPort: 31191 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31192 + hostPort: 31192 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31193 + hostPort: 31193 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31194 + hostPort: 31194 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31195 + hostPort: 31195 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31196 + hostPort: 31196 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31197 + hostPort: 31197 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31198 + hostPort: 31198 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31199 + hostPort: 31199 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31200 + hostPort: 31200 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31201 + hostPort: 31201 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31202 + hostPort: 31202 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31203 + hostPort: 31203 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31204 + hostPort: 31204 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31205 + hostPort: 31205 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31206 + hostPort: 31206 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31207 + hostPort: 31207 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31208 + hostPort: 31208 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31209 + hostPort: 31209 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31210 + hostPort: 31210 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31211 + hostPort: 31211 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31212 + hostPort: 31212 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31213 + hostPort: 31213 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31214 + hostPort: 31214 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31215 + hostPort: 31215 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31216 + hostPort: 31216 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31217 + hostPort: 31217 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31218 + hostPort: 31218 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31219 + hostPort: 31219 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31220 + hostPort: 31220 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31221 + hostPort: 31221 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31222 + hostPort: 31222 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31223 + hostPort: 31223 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31224 + hostPort: 31224 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31225 + hostPort: 31225 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31226 + hostPort: 31226 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31227 + hostPort: 31227 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31228 + hostPort: 31228 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31229 + hostPort: 31229 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31230 + hostPort: 31230 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31231 + hostPort: 31231 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31232 + hostPort: 31232 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31233 + hostPort: 31233 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31234 + hostPort: 31234 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31235 + hostPort: 31235 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31236 + hostPort: 31236 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31237 + hostPort: 31237 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31238 + hostPort: 31238 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31239 + hostPort: 31239 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31240 + hostPort: 31240 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31241 + hostPort: 31241 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31242 + hostPort: 31242 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31243 + hostPort: 31243 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31244 + hostPort: 31244 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31245 + hostPort: 31245 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31246 + hostPort: 31246 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31247 + hostPort: 31247 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31248 + hostPort: 31248 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31249 + hostPort: 31249 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31250 + hostPort: 31250 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31251 + hostPort: 31251 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31252 + hostPort: 31252 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31253 + hostPort: 31253 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31254 + hostPort: 31254 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31255 + hostPort: 31255 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31256 + hostPort: 31256 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31257 + hostPort: 31257 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31258 + hostPort: 31258 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31259 + hostPort: 31259 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31260 + hostPort: 31260 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31261 + hostPort: 31261 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31262 + hostPort: 31262 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31263 + hostPort: 31263 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31264 + hostPort: 31264 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31265 + hostPort: 31265 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31266 + hostPort: 31266 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31267 + hostPort: 31267 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31268 + hostPort: 31268 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31269 + hostPort: 31269 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31270 + hostPort: 31270 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31271 + hostPort: 31271 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31272 + hostPort: 31272 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31273 + hostPort: 31273 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31274 + hostPort: 31274 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31275 + hostPort: 31275 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31276 + hostPort: 31276 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31277 + hostPort: 31277 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31278 + hostPort: 31278 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31279 + hostPort: 31279 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31280 + hostPort: 31280 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31281 + hostPort: 31281 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31282 + hostPort: 31282 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31283 + hostPort: 31283 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31284 + hostPort: 31284 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31285 + hostPort: 31285 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31286 + hostPort: 31286 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31287 + hostPort: 31287 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31288 + hostPort: 31288 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31289 + hostPort: 31289 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31290 + hostPort: 31290 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31291 + hostPort: 31291 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31292 + hostPort: 31292 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31293 + hostPort: 31293 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31294 + hostPort: 31294 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31295 + hostPort: 31295 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31296 + hostPort: 31296 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31297 + hostPort: 31297 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31298 + hostPort: 31298 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31299 + hostPort: 31299 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31300 + hostPort: 31300 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31301 + hostPort: 31301 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31302 + hostPort: 31302 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31303 + hostPort: 31303 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31304 + hostPort: 31304 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31305 + hostPort: 31305 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31306 + hostPort: 31306 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31307 + hostPort: 31307 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31308 + hostPort: 31308 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31309 + hostPort: 31309 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31310 + hostPort: 31310 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31311 + hostPort: 31311 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31312 + hostPort: 31312 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31313 + hostPort: 31313 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31314 + hostPort: 31314 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31315 + hostPort: 31315 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31316 + hostPort: 31316 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31317 + hostPort: 31317 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31318 + hostPort: 31318 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31319 + hostPort: 31319 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31320 + hostPort: 31320 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31321 + hostPort: 31321 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31322 + hostPort: 31322 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31323 + hostPort: 31323 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31324 + hostPort: 31324 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31325 + hostPort: 31325 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31326 + hostPort: 31326 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31327 + hostPort: 31327 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31328 + hostPort: 31328 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31329 + hostPort: 31329 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31330 + hostPort: 31330 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31331 + hostPort: 31331 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31332 + hostPort: 31332 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31333 + hostPort: 31333 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31334 + hostPort: 31334 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31335 + hostPort: 31335 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31336 + hostPort: 31336 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31337 + hostPort: 31337 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31338 + hostPort: 31338 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31339 + hostPort: 31339 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31340 + hostPort: 31340 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31341 + hostPort: 31341 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31342 + hostPort: 31342 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31343 + hostPort: 31343 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31344 + hostPort: 31344 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31345 + hostPort: 31345 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31346 + hostPort: 31346 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31347 + hostPort: 31347 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31348 + hostPort: 31348 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31349 + hostPort: 31349 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31350 + hostPort: 31350 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31351 + hostPort: 31351 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31352 + hostPort: 31352 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31353 + hostPort: 31353 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31354 + hostPort: 31354 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31355 + hostPort: 31355 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31356 + hostPort: 31356 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31357 + hostPort: 31357 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31358 + hostPort: 31358 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31359 + hostPort: 31359 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31360 + hostPort: 31360 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31361 + hostPort: 31361 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31362 + hostPort: 31362 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31363 + hostPort: 31363 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31364 + hostPort: 31364 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31365 + hostPort: 31365 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31366 + hostPort: 31366 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31367 + hostPort: 31367 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31368 + hostPort: 31368 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31369 + hostPort: 31369 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31370 + hostPort: 31370 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31371 + hostPort: 31371 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31372 + hostPort: 31372 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31373 + hostPort: 31373 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31374 + hostPort: 31374 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31375 + hostPort: 31375 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31376 + hostPort: 31376 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31377 + hostPort: 31377 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31378 + hostPort: 31378 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31379 + hostPort: 31379 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31380 + hostPort: 31380 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31381 + hostPort: 31381 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31382 + hostPort: 31382 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31383 + hostPort: 31383 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31384 + hostPort: 31384 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31385 + hostPort: 31385 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31386 + hostPort: 31386 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31387 + hostPort: 31387 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31388 + hostPort: 31388 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31389 + hostPort: 31389 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31390 + hostPort: 31390 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31391 + hostPort: 31391 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31392 + hostPort: 31392 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31393 + hostPort: 31393 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31394 + hostPort: 31394 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31395 + hostPort: 31395 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31396 + hostPort: 31396 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31397 + hostPort: 31397 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31398 + hostPort: 31398 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31399 + hostPort: 31399 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31400 + hostPort: 31400 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31401 + hostPort: 31401 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31402 + hostPort: 31402 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31403 + hostPort: 31403 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31404 + hostPort: 31404 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31405 + hostPort: 31405 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31406 + hostPort: 31406 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31407 + hostPort: 31407 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31408 + hostPort: 31408 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31409 + hostPort: 31409 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31410 + hostPort: 31410 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31411 + hostPort: 31411 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31412 + hostPort: 31412 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31413 + hostPort: 31413 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31414 + hostPort: 31414 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31415 + hostPort: 31415 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31416 + hostPort: 31416 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31417 + hostPort: 31417 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31418 + hostPort: 31418 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31419 + hostPort: 31419 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31420 + hostPort: 31420 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31421 + hostPort: 31421 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31422 + hostPort: 31422 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31423 + hostPort: 31423 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31424 + hostPort: 31424 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31425 + hostPort: 31425 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31426 + hostPort: 31426 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31427 + hostPort: 31427 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31428 + hostPort: 31428 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31429 + hostPort: 31429 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31430 + hostPort: 31430 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31431 + hostPort: 31431 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31432 + hostPort: 31432 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31433 + hostPort: 31433 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31434 + hostPort: 31434 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31435 + hostPort: 31435 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31436 + hostPort: 31436 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31437 + hostPort: 31437 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31438 + hostPort: 31438 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31439 + hostPort: 31439 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31440 + hostPort: 31440 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31441 + hostPort: 31441 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31442 + hostPort: 31442 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31443 + hostPort: 31443 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31444 + hostPort: 31444 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31445 + hostPort: 31445 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31446 + hostPort: 31446 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31447 + hostPort: 31447 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31448 + hostPort: 31448 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31449 + hostPort: 31449 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31450 + hostPort: 31450 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31451 + hostPort: 31451 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31452 + hostPort: 31452 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31453 + hostPort: 31453 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31454 + hostPort: 31454 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31455 + hostPort: 31455 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31456 + hostPort: 31456 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31457 + hostPort: 31457 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31458 + hostPort: 31458 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31459 + hostPort: 31459 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31460 + hostPort: 31460 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31461 + hostPort: 31461 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31462 + hostPort: 31462 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31463 + hostPort: 31463 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31464 + hostPort: 31464 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31465 + hostPort: 31465 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31466 + hostPort: 31466 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31467 + hostPort: 31467 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31468 + hostPort: 31468 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31469 + hostPort: 31469 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31470 + hostPort: 31470 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31471 + hostPort: 31471 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31472 + hostPort: 31472 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31473 + hostPort: 31473 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31474 + hostPort: 31474 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31475 + hostPort: 31475 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31476 + hostPort: 31476 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31477 + hostPort: 31477 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31478 + hostPort: 31478 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31479 + hostPort: 31479 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31480 + hostPort: 31480 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31481 + hostPort: 31481 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31482 + hostPort: 31482 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31483 + hostPort: 31483 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31484 + hostPort: 31484 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31485 + hostPort: 31485 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31486 + hostPort: 31486 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31487 + hostPort: 31487 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31488 + hostPort: 31488 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31489 + hostPort: 31489 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31490 + hostPort: 31490 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31491 + hostPort: 31491 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31492 + hostPort: 31492 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31493 + hostPort: 31493 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31494 + hostPort: 31494 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31495 + hostPort: 31495 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31496 + hostPort: 31496 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31497 + hostPort: 31497 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31498 + hostPort: 31498 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31499 + hostPort: 31499 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31500 + hostPort: 31500 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31501 + hostPort: 31501 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31502 + hostPort: 31502 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31503 + hostPort: 31503 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31504 + hostPort: 31504 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31505 + hostPort: 31505 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31506 + hostPort: 31506 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31507 + hostPort: 31507 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31508 + hostPort: 31508 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31509 + hostPort: 31509 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31510 + hostPort: 31510 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31511 + hostPort: 31511 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31512 + hostPort: 31512 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31513 + hostPort: 31513 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31514 + hostPort: 31514 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31515 + hostPort: 31515 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31516 + hostPort: 31516 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31517 + hostPort: 31517 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31518 + hostPort: 31518 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31519 + hostPort: 31519 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31520 + hostPort: 31520 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31521 + hostPort: 31521 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31522 + hostPort: 31522 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31523 + hostPort: 31523 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31524 + hostPort: 31524 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31525 + hostPort: 31525 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31526 + hostPort: 31526 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31527 + hostPort: 31527 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31528 + hostPort: 31528 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31529 + hostPort: 31529 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31530 + hostPort: 31530 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31531 + hostPort: 31531 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31532 + hostPort: 31532 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31533 + hostPort: 31533 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31534 + hostPort: 31534 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31535 + hostPort: 31535 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31536 + hostPort: 31536 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31537 + hostPort: 31537 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31538 + hostPort: 31538 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31539 + hostPort: 31539 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31540 + hostPort: 31540 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31541 + hostPort: 31541 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31542 + hostPort: 31542 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31543 + hostPort: 31543 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31544 + hostPort: 31544 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31545 + hostPort: 31545 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31546 + hostPort: 31546 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31547 + hostPort: 31547 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31548 + hostPort: 31548 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31549 + hostPort: 31549 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31550 + hostPort: 31550 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31551 + hostPort: 31551 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31552 + hostPort: 31552 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31553 + hostPort: 31553 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31554 + hostPort: 31554 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31555 + hostPort: 31555 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31556 + hostPort: 31556 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31557 + hostPort: 31557 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31558 + hostPort: 31558 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31559 + hostPort: 31559 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31560 + hostPort: 31560 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31561 + hostPort: 31561 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31562 + hostPort: 31562 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31563 + hostPort: 31563 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31564 + hostPort: 31564 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31565 + hostPort: 31565 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31566 + hostPort: 31566 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31567 + hostPort: 31567 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31568 + hostPort: 31568 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31569 + hostPort: 31569 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31570 + hostPort: 31570 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31571 + hostPort: 31571 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31572 + hostPort: 31572 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31573 + hostPort: 31573 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31574 + hostPort: 31574 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31575 + hostPort: 31575 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31576 + hostPort: 31576 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31577 + hostPort: 31577 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31578 + hostPort: 31578 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31579 + hostPort: 31579 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31580 + hostPort: 31580 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31581 + hostPort: 31581 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31582 + hostPort: 31582 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31583 + hostPort: 31583 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31584 + hostPort: 31584 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31585 + hostPort: 31585 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31586 + hostPort: 31586 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31587 + hostPort: 31587 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31588 + hostPort: 31588 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31589 + hostPort: 31589 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31590 + hostPort: 31590 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31591 + hostPort: 31591 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31592 + hostPort: 31592 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31593 + hostPort: 31593 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31594 + hostPort: 31594 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31595 + hostPort: 31595 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31596 + hostPort: 31596 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31597 + hostPort: 31597 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31598 + hostPort: 31598 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31599 + hostPort: 31599 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31600 + hostPort: 31600 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31601 + hostPort: 31601 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31602 + hostPort: 31602 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31603 + hostPort: 31603 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31604 + hostPort: 31604 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31605 + hostPort: 31605 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31606 + hostPort: 31606 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31607 + hostPort: 31607 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31608 + hostPort: 31608 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31609 + hostPort: 31609 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31610 + hostPort: 31610 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31611 + hostPort: 31611 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31612 + hostPort: 31612 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31613 + hostPort: 31613 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31614 + hostPort: 31614 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31615 + hostPort: 31615 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31616 + hostPort: 31616 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31617 + hostPort: 31617 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31618 + hostPort: 31618 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31619 + hostPort: 31619 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31620 + hostPort: 31620 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31621 + hostPort: 31621 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31622 + hostPort: 31622 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31623 + hostPort: 31623 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31624 + hostPort: 31624 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31625 + hostPort: 31625 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31626 + hostPort: 31626 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31627 + hostPort: 31627 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31628 + hostPort: 31628 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31629 + hostPort: 31629 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31630 + hostPort: 31630 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31631 + hostPort: 31631 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31632 + hostPort: 31632 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31633 + hostPort: 31633 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31634 + hostPort: 31634 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31635 + hostPort: 31635 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31636 + hostPort: 31636 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31637 + hostPort: 31637 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31638 + hostPort: 31638 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31639 + hostPort: 31639 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31640 + hostPort: 31640 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31641 + hostPort: 31641 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31642 + hostPort: 31642 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31643 + hostPort: 31643 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31644 + hostPort: 31644 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31645 + hostPort: 31645 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31646 + hostPort: 31646 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31647 + hostPort: 31647 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31648 + hostPort: 31648 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31649 + hostPort: 31649 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31650 + hostPort: 31650 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31651 + hostPort: 31651 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31652 + hostPort: 31652 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31653 + hostPort: 31653 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31654 + hostPort: 31654 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31655 + hostPort: 31655 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31656 + hostPort: 31656 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31657 + hostPort: 31657 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31658 + hostPort: 31658 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31659 + hostPort: 31659 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31660 + hostPort: 31660 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31661 + hostPort: 31661 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31662 + hostPort: 31662 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31663 + hostPort: 31663 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31664 + hostPort: 31664 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31665 + hostPort: 31665 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31666 + hostPort: 31666 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31667 + hostPort: 31667 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31668 + hostPort: 31668 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31669 + hostPort: 31669 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31670 + hostPort: 31670 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31671 + hostPort: 31671 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31672 + hostPort: 31672 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31673 + hostPort: 31673 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31674 + hostPort: 31674 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31675 + hostPort: 31675 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31676 + hostPort: 31676 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31677 + hostPort: 31677 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31678 + hostPort: 31678 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31679 + hostPort: 31679 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31680 + hostPort: 31680 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31681 + hostPort: 31681 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31682 + hostPort: 31682 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31683 + hostPort: 31683 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31684 + hostPort: 31684 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31685 + hostPort: 31685 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31686 + hostPort: 31686 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31687 + hostPort: 31687 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31688 + hostPort: 31688 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31689 + hostPort: 31689 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31690 + hostPort: 31690 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31691 + hostPort: 31691 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31692 + hostPort: 31692 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31693 + hostPort: 31693 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31694 + hostPort: 31694 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31695 + hostPort: 31695 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31696 + hostPort: 31696 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31697 + hostPort: 31697 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31698 + hostPort: 31698 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31699 + hostPort: 31699 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31700 + hostPort: 31700 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31701 + hostPort: 31701 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31702 + hostPort: 31702 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31703 + hostPort: 31703 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31704 + hostPort: 31704 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31705 + hostPort: 31705 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31706 + hostPort: 31706 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31707 + hostPort: 31707 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31708 + hostPort: 31708 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31709 + hostPort: 31709 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31710 + hostPort: 31710 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31711 + hostPort: 31711 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31712 + hostPort: 31712 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31713 + hostPort: 31713 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31714 + hostPort: 31714 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31715 + hostPort: 31715 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31716 + hostPort: 31716 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31717 + hostPort: 31717 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31718 + hostPort: 31718 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31719 + hostPort: 31719 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31720 + hostPort: 31720 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31721 + hostPort: 31721 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31722 + hostPort: 31722 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31723 + hostPort: 31723 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31724 + hostPort: 31724 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31725 + hostPort: 31725 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31726 + hostPort: 31726 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31727 + hostPort: 31727 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31728 + hostPort: 31728 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31729 + hostPort: 31729 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31730 + hostPort: 31730 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31731 + hostPort: 31731 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31732 + hostPort: 31732 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31733 + hostPort: 31733 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31734 + hostPort: 31734 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31735 + hostPort: 31735 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31736 + hostPort: 31736 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31737 + hostPort: 31737 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31738 + hostPort: 31738 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31739 + hostPort: 31739 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31740 + hostPort: 31740 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31741 + hostPort: 31741 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31742 + hostPort: 31742 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31743 + hostPort: 31743 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31744 + hostPort: 31744 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31745 + hostPort: 31745 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31746 + hostPort: 31746 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31747 + hostPort: 31747 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31748 + hostPort: 31748 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31749 + hostPort: 31749 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31750 + hostPort: 31750 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31751 + hostPort: 31751 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31752 + hostPort: 31752 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31753 + hostPort: 31753 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31754 + hostPort: 31754 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31755 + hostPort: 31755 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31756 + hostPort: 31756 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31757 + hostPort: 31757 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31758 + hostPort: 31758 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31759 + hostPort: 31759 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31760 + hostPort: 31760 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31761 + hostPort: 31761 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31762 + hostPort: 31762 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31763 + hostPort: 31763 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31764 + hostPort: 31764 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31765 + hostPort: 31765 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31766 + hostPort: 31766 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31767 + hostPort: 31767 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31768 + hostPort: 31768 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31769 + hostPort: 31769 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31770 + hostPort: 31770 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31771 + hostPort: 31771 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31772 + hostPort: 31772 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31773 + hostPort: 31773 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31774 + hostPort: 31774 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31775 + hostPort: 31775 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31776 + hostPort: 31776 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31777 + hostPort: 31777 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31778 + hostPort: 31778 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31779 + hostPort: 31779 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31780 + hostPort: 31780 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31781 + hostPort: 31781 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31782 + hostPort: 31782 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31783 + hostPort: 31783 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31784 + hostPort: 31784 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31785 + hostPort: 31785 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31786 + hostPort: 31786 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31787 + hostPort: 31787 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31788 + hostPort: 31788 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31789 + hostPort: 31789 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31790 + hostPort: 31790 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31791 + hostPort: 31791 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31792 + hostPort: 31792 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31793 + hostPort: 31793 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31794 + hostPort: 31794 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31795 + hostPort: 31795 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31796 + hostPort: 31796 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31797 + hostPort: 31797 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31798 + hostPort: 31798 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31799 + hostPort: 31799 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31800 + hostPort: 31800 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31801 + hostPort: 31801 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31802 + hostPort: 31802 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31803 + hostPort: 31803 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31804 + hostPort: 31804 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31805 + hostPort: 31805 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31806 + hostPort: 31806 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31807 + hostPort: 31807 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31808 + hostPort: 31808 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31809 + hostPort: 31809 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31810 + hostPort: 31810 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31811 + hostPort: 31811 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31812 + hostPort: 31812 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31813 + hostPort: 31813 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31814 + hostPort: 31814 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31815 + hostPort: 31815 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31816 + hostPort: 31816 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31817 + hostPort: 31817 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31818 + hostPort: 31818 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31819 + hostPort: 31819 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31820 + hostPort: 31820 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31821 + hostPort: 31821 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31822 + hostPort: 31822 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31823 + hostPort: 31823 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31824 + hostPort: 31824 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31825 + hostPort: 31825 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31826 + hostPort: 31826 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31827 + hostPort: 31827 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31828 + hostPort: 31828 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31829 + hostPort: 31829 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31830 + hostPort: 31830 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31831 + hostPort: 31831 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31832 + hostPort: 31832 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31833 + hostPort: 31833 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31834 + hostPort: 31834 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31835 + hostPort: 31835 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31836 + hostPort: 31836 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31837 + hostPort: 31837 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31838 + hostPort: 31838 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31839 + hostPort: 31839 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31840 + hostPort: 31840 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31841 + hostPort: 31841 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31842 + hostPort: 31842 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31843 + hostPort: 31843 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31844 + hostPort: 31844 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31845 + hostPort: 31845 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31846 + hostPort: 31846 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31847 + hostPort: 31847 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31848 + hostPort: 31848 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31849 + hostPort: 31849 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31850 + hostPort: 31850 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31851 + hostPort: 31851 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31852 + hostPort: 31852 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31853 + hostPort: 31853 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31854 + hostPort: 31854 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31855 + hostPort: 31855 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31856 + hostPort: 31856 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31857 + hostPort: 31857 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31858 + hostPort: 31858 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31859 + hostPort: 31859 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31860 + hostPort: 31860 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31861 + hostPort: 31861 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31862 + hostPort: 31862 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31863 + hostPort: 31863 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31864 + hostPort: 31864 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31865 + hostPort: 31865 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31866 + hostPort: 31866 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31867 + hostPort: 31867 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31868 + hostPort: 31868 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31869 + hostPort: 31869 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31870 + hostPort: 31870 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31871 + hostPort: 31871 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31872 + hostPort: 31872 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31873 + hostPort: 31873 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31874 + hostPort: 31874 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31875 + hostPort: 31875 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31876 + hostPort: 31876 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31877 + hostPort: 31877 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31878 + hostPort: 31878 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31879 + hostPort: 31879 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31880 + hostPort: 31880 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31881 + hostPort: 31881 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31882 + hostPort: 31882 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31883 + hostPort: 31883 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31884 + hostPort: 31884 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31885 + hostPort: 31885 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31886 + hostPort: 31886 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31887 + hostPort: 31887 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31888 + hostPort: 31888 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31889 + hostPort: 31889 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31890 + hostPort: 31890 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31891 + hostPort: 31891 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31892 + hostPort: 31892 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31893 + hostPort: 31893 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31894 + hostPort: 31894 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31895 + hostPort: 31895 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31896 + hostPort: 31896 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31897 + hostPort: 31897 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31898 + hostPort: 31898 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31899 + hostPort: 31899 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31900 + hostPort: 31900 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31901 + hostPort: 31901 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31902 + hostPort: 31902 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31903 + hostPort: 31903 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31904 + hostPort: 31904 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31905 + hostPort: 31905 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31906 + hostPort: 31906 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31907 + hostPort: 31907 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31908 + hostPort: 31908 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31909 + hostPort: 31909 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31910 + hostPort: 31910 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31911 + hostPort: 31911 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31912 + hostPort: 31912 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31913 + hostPort: 31913 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31914 + hostPort: 31914 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31915 + hostPort: 31915 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31916 + hostPort: 31916 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31917 + hostPort: 31917 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31918 + hostPort: 31918 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31919 + hostPort: 31919 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31920 + hostPort: 31920 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31921 + hostPort: 31921 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31922 + hostPort: 31922 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31923 + hostPort: 31923 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31924 + hostPort: 31924 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31925 + hostPort: 31925 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31926 + hostPort: 31926 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31927 + hostPort: 31927 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31928 + hostPort: 31928 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31929 + hostPort: 31929 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31930 + hostPort: 31930 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31931 + hostPort: 31931 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31932 + hostPort: 31932 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31933 + hostPort: 31933 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31934 + hostPort: 31934 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31935 + hostPort: 31935 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31936 + hostPort: 31936 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31937 + hostPort: 31937 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31938 + hostPort: 31938 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31939 + hostPort: 31939 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31940 + hostPort: 31940 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31941 + hostPort: 31941 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31942 + hostPort: 31942 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31943 + hostPort: 31943 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31944 + hostPort: 31944 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31945 + hostPort: 31945 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31946 + hostPort: 31946 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31947 + hostPort: 31947 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31948 + hostPort: 31948 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31949 + hostPort: 31949 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31950 + hostPort: 31950 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31951 + hostPort: 31951 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31952 + hostPort: 31952 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31953 + hostPort: 31953 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31954 + hostPort: 31954 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31955 + hostPort: 31955 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31956 + hostPort: 31956 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31957 + hostPort: 31957 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31958 + hostPort: 31958 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31959 + hostPort: 31959 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31960 + hostPort: 31960 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31961 + hostPort: 31961 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31962 + hostPort: 31962 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31963 + hostPort: 31963 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31964 + hostPort: 31964 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31965 + hostPort: 31965 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31966 + hostPort: 31966 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31967 + hostPort: 31967 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31968 + hostPort: 31968 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31969 + hostPort: 31969 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31970 + hostPort: 31970 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31971 + hostPort: 31971 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31972 + hostPort: 31972 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31973 + hostPort: 31973 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31974 + hostPort: 31974 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31975 + hostPort: 31975 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31976 + hostPort: 31976 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31977 + hostPort: 31977 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31978 + hostPort: 31978 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31979 + hostPort: 31979 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31980 + hostPort: 31980 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31981 + hostPort: 31981 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31982 + hostPort: 31982 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31983 + hostPort: 31983 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31984 + hostPort: 31984 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31985 + hostPort: 31985 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31986 + hostPort: 31986 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31987 + hostPort: 31987 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31988 + hostPort: 31988 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31989 + hostPort: 31989 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31990 + hostPort: 31990 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31991 + hostPort: 31991 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31992 + hostPort: 31992 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31993 + hostPort: 31993 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31994 + hostPort: 31994 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31995 + hostPort: 31995 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31996 + hostPort: 31996 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31997 + hostPort: 31997 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31998 + hostPort: 31998 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 31999 + hostPort: 31999 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32000 + hostPort: 32000 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32001 + hostPort: 32001 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32002 + hostPort: 32002 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32003 + hostPort: 32003 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32004 + hostPort: 32004 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32005 + hostPort: 32005 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32006 + hostPort: 32006 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32007 + hostPort: 32007 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32008 + hostPort: 32008 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32009 + hostPort: 32009 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32010 + hostPort: 32010 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32011 + hostPort: 32011 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32012 + hostPort: 32012 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32013 + hostPort: 32013 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32014 + hostPort: 32014 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32015 + hostPort: 32015 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32016 + hostPort: 32016 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32017 + hostPort: 32017 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32018 + hostPort: 32018 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32019 + hostPort: 32019 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32020 + hostPort: 32020 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32021 + hostPort: 32021 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32022 + hostPort: 32022 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32023 + hostPort: 32023 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32024 + hostPort: 32024 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32025 + hostPort: 32025 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32026 + hostPort: 32026 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32027 + hostPort: 32027 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32028 + hostPort: 32028 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32029 + hostPort: 32029 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32030 + hostPort: 32030 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32031 + hostPort: 32031 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32032 + hostPort: 32032 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32033 + hostPort: 32033 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32034 + hostPort: 32034 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32035 + hostPort: 32035 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32036 + hostPort: 32036 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32037 + hostPort: 32037 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32038 + hostPort: 32038 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32039 + hostPort: 32039 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32040 + hostPort: 32040 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32041 + hostPort: 32041 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32042 + hostPort: 32042 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32043 + hostPort: 32043 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32044 + hostPort: 32044 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32045 + hostPort: 32045 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32046 + hostPort: 32046 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32047 + hostPort: 32047 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32048 + hostPort: 32048 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32049 + hostPort: 32049 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32050 + hostPort: 32050 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32051 + hostPort: 32051 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32052 + hostPort: 32052 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32053 + hostPort: 32053 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32054 + hostPort: 32054 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32055 + hostPort: 32055 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32056 + hostPort: 32056 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32057 + hostPort: 32057 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32058 + hostPort: 32058 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32059 + hostPort: 32059 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32060 + hostPort: 32060 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32061 + hostPort: 32061 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32062 + hostPort: 32062 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32063 + hostPort: 32063 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32064 + hostPort: 32064 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32065 + hostPort: 32065 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32066 + hostPort: 32066 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32067 + hostPort: 32067 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32068 + hostPort: 32068 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32069 + hostPort: 32069 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32070 + hostPort: 32070 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32071 + hostPort: 32071 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32072 + hostPort: 32072 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32073 + hostPort: 32073 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32074 + hostPort: 32074 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32075 + hostPort: 32075 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32076 + hostPort: 32076 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32077 + hostPort: 32077 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32078 + hostPort: 32078 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32079 + hostPort: 32079 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32080 + hostPort: 32080 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32081 + hostPort: 32081 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32082 + hostPort: 32082 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32083 + hostPort: 32083 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32084 + hostPort: 32084 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32085 + hostPort: 32085 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32086 + hostPort: 32086 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32087 + hostPort: 32087 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32088 + hostPort: 32088 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32089 + hostPort: 32089 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32090 + hostPort: 32090 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32091 + hostPort: 32091 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32092 + hostPort: 32092 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32093 + hostPort: 32093 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32094 + hostPort: 32094 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32095 + hostPort: 32095 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32096 + hostPort: 32096 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32097 + hostPort: 32097 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32098 + hostPort: 32098 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32099 + hostPort: 32099 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32100 + hostPort: 32100 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32101 + hostPort: 32101 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32102 + hostPort: 32102 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32103 + hostPort: 32103 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32104 + hostPort: 32104 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32105 + hostPort: 32105 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32106 + hostPort: 32106 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32107 + hostPort: 32107 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32108 + hostPort: 32108 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32109 + hostPort: 32109 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32110 + hostPort: 32110 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32111 + hostPort: 32111 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32112 + hostPort: 32112 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32113 + hostPort: 32113 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32114 + hostPort: 32114 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32115 + hostPort: 32115 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32116 + hostPort: 32116 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32117 + hostPort: 32117 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32118 + hostPort: 32118 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32119 + hostPort: 32119 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32120 + hostPort: 32120 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32121 + hostPort: 32121 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32122 + hostPort: 32122 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32123 + hostPort: 32123 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32124 + hostPort: 32124 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32125 + hostPort: 32125 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32126 + hostPort: 32126 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32127 + hostPort: 32127 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32128 + hostPort: 32128 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32129 + hostPort: 32129 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32130 + hostPort: 32130 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32131 + hostPort: 32131 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32132 + hostPort: 32132 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32133 + hostPort: 32133 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32134 + hostPort: 32134 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32135 + hostPort: 32135 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32136 + hostPort: 32136 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32137 + hostPort: 32137 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32138 + hostPort: 32138 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32139 + hostPort: 32139 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32140 + hostPort: 32140 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32141 + hostPort: 32141 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32142 + hostPort: 32142 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32143 + hostPort: 32143 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32144 + hostPort: 32144 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32145 + hostPort: 32145 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32146 + hostPort: 32146 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32147 + hostPort: 32147 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32148 + hostPort: 32148 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32149 + hostPort: 32149 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32150 + hostPort: 32150 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32151 + hostPort: 32151 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32152 + hostPort: 32152 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32153 + hostPort: 32153 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32154 + hostPort: 32154 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32155 + hostPort: 32155 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32156 + hostPort: 32156 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32157 + hostPort: 32157 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32158 + hostPort: 32158 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32159 + hostPort: 32159 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32160 + hostPort: 32160 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32161 + hostPort: 32161 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32162 + hostPort: 32162 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32163 + hostPort: 32163 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32164 + hostPort: 32164 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32165 + hostPort: 32165 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32166 + hostPort: 32166 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32167 + hostPort: 32167 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32168 + hostPort: 32168 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32169 + hostPort: 32169 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32170 + hostPort: 32170 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32171 + hostPort: 32171 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32172 + hostPort: 32172 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32173 + hostPort: 32173 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32174 + hostPort: 32174 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32175 + hostPort: 32175 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32176 + hostPort: 32176 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32177 + hostPort: 32177 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32178 + hostPort: 32178 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32179 + hostPort: 32179 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32180 + hostPort: 32180 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32181 + hostPort: 32181 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32182 + hostPort: 32182 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32183 + hostPort: 32183 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32184 + hostPort: 32184 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32185 + hostPort: 32185 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32186 + hostPort: 32186 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32187 + hostPort: 32187 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32188 + hostPort: 32188 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32189 + hostPort: 32189 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32190 + hostPort: 32190 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32191 + hostPort: 32191 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32192 + hostPort: 32192 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32193 + hostPort: 32193 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32194 + hostPort: 32194 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32195 + hostPort: 32195 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32196 + hostPort: 32196 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32197 + hostPort: 32197 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32198 + hostPort: 32198 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32199 + hostPort: 32199 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32200 + hostPort: 32200 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32201 + hostPort: 32201 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32202 + hostPort: 32202 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32203 + hostPort: 32203 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32204 + hostPort: 32204 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32205 + hostPort: 32205 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32206 + hostPort: 32206 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32207 + hostPort: 32207 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32208 + hostPort: 32208 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32209 + hostPort: 32209 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32210 + hostPort: 32210 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32211 + hostPort: 32211 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32212 + hostPort: 32212 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32213 + hostPort: 32213 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32214 + hostPort: 32214 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32215 + hostPort: 32215 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32216 + hostPort: 32216 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32217 + hostPort: 32217 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32218 + hostPort: 32218 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32219 + hostPort: 32219 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32220 + hostPort: 32220 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32221 + hostPort: 32221 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32222 + hostPort: 32222 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32223 + hostPort: 32223 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32224 + hostPort: 32224 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32225 + hostPort: 32225 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32226 + hostPort: 32226 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32227 + hostPort: 32227 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32228 + hostPort: 32228 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32229 + hostPort: 32229 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32230 + hostPort: 32230 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32231 + hostPort: 32231 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32232 + hostPort: 32232 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32233 + hostPort: 32233 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32234 + hostPort: 32234 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32235 + hostPort: 32235 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32236 + hostPort: 32236 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32237 + hostPort: 32237 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32238 + hostPort: 32238 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32239 + hostPort: 32239 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32240 + hostPort: 32240 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32241 + hostPort: 32241 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32242 + hostPort: 32242 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32243 + hostPort: 32243 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32244 + hostPort: 32244 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32245 + hostPort: 32245 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32246 + hostPort: 32246 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32247 + hostPort: 32247 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32248 + hostPort: 32248 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32249 + hostPort: 32249 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32250 + hostPort: 32250 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32251 + hostPort: 32251 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32252 + hostPort: 32252 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32253 + hostPort: 32253 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32254 + hostPort: 32254 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32255 + hostPort: 32255 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32256 + hostPort: 32256 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32257 + hostPort: 32257 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32258 + hostPort: 32258 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32259 + hostPort: 32259 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32260 + hostPort: 32260 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32261 + hostPort: 32261 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32262 + hostPort: 32262 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32263 + hostPort: 32263 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32264 + hostPort: 32264 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32265 + hostPort: 32265 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32266 + hostPort: 32266 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32267 + hostPort: 32267 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32268 + hostPort: 32268 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32269 + hostPort: 32269 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32270 + hostPort: 32270 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32271 + hostPort: 32271 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32272 + hostPort: 32272 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32273 + hostPort: 32273 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32274 + hostPort: 32274 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32275 + hostPort: 32275 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32276 + hostPort: 32276 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32277 + hostPort: 32277 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32278 + hostPort: 32278 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32279 + hostPort: 32279 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32280 + hostPort: 32280 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32281 + hostPort: 32281 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32282 + hostPort: 32282 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32283 + hostPort: 32283 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32284 + hostPort: 32284 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32285 + hostPort: 32285 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32286 + hostPort: 32286 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32287 + hostPort: 32287 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32288 + hostPort: 32288 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32289 + hostPort: 32289 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32290 + hostPort: 32290 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32291 + hostPort: 32291 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32292 + hostPort: 32292 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32293 + hostPort: 32293 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32294 + hostPort: 32294 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32295 + hostPort: 32295 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32296 + hostPort: 32296 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32297 + hostPort: 32297 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32298 + hostPort: 32298 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32299 + hostPort: 32299 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32300 + hostPort: 32300 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32301 + hostPort: 32301 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32302 + hostPort: 32302 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32303 + hostPort: 32303 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32304 + hostPort: 32304 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32305 + hostPort: 32305 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32306 + hostPort: 32306 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32307 + hostPort: 32307 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32308 + hostPort: 32308 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32309 + hostPort: 32309 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32310 + hostPort: 32310 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32311 + hostPort: 32311 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32312 + hostPort: 32312 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32313 + hostPort: 32313 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32314 + hostPort: 32314 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32315 + hostPort: 32315 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32316 + hostPort: 32316 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32317 + hostPort: 32317 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32318 + hostPort: 32318 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32319 + hostPort: 32319 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32320 + hostPort: 32320 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32321 + hostPort: 32321 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32322 + hostPort: 32322 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32323 + hostPort: 32323 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32324 + hostPort: 32324 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32325 + hostPort: 32325 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32326 + hostPort: 32326 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32327 + hostPort: 32327 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32328 + hostPort: 32328 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32329 + hostPort: 32329 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32330 + hostPort: 32330 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32331 + hostPort: 32331 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32332 + hostPort: 32332 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32333 + hostPort: 32333 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32334 + hostPort: 32334 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32335 + hostPort: 32335 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32336 + hostPort: 32336 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32337 + hostPort: 32337 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32338 + hostPort: 32338 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32339 + hostPort: 32339 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32340 + hostPort: 32340 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32341 + hostPort: 32341 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32342 + hostPort: 32342 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32343 + hostPort: 32343 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32344 + hostPort: 32344 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32345 + hostPort: 32345 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32346 + hostPort: 32346 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32347 + hostPort: 32347 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32348 + hostPort: 32348 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32349 + hostPort: 32349 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32350 + hostPort: 32350 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32351 + hostPort: 32351 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32352 + hostPort: 32352 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32353 + hostPort: 32353 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32354 + hostPort: 32354 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32355 + hostPort: 32355 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32356 + hostPort: 32356 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32357 + hostPort: 32357 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32358 + hostPort: 32358 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32359 + hostPort: 32359 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32360 + hostPort: 32360 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32361 + hostPort: 32361 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32362 + hostPort: 32362 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32363 + hostPort: 32363 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32364 + hostPort: 32364 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32365 + hostPort: 32365 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32366 + hostPort: 32366 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32367 + hostPort: 32367 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32368 + hostPort: 32368 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32369 + hostPort: 32369 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32370 + hostPort: 32370 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32371 + hostPort: 32371 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32372 + hostPort: 32372 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32373 + hostPort: 32373 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32374 + hostPort: 32374 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32375 + hostPort: 32375 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32376 + hostPort: 32376 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32377 + hostPort: 32377 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32378 + hostPort: 32378 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32379 + hostPort: 32379 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32380 + hostPort: 32380 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32381 + hostPort: 32381 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32382 + hostPort: 32382 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32383 + hostPort: 32383 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32384 + hostPort: 32384 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32385 + hostPort: 32385 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32386 + hostPort: 32386 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32387 + hostPort: 32387 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32388 + hostPort: 32388 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32389 + hostPort: 32389 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32390 + hostPort: 32390 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32391 + hostPort: 32391 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32392 + hostPort: 32392 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32393 + hostPort: 32393 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32394 + hostPort: 32394 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32395 + hostPort: 32395 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32396 + hostPort: 32396 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32397 + hostPort: 32397 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32398 + hostPort: 32398 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32399 + hostPort: 32399 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32400 + hostPort: 32400 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32401 + hostPort: 32401 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32402 + hostPort: 32402 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32403 + hostPort: 32403 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32404 + hostPort: 32404 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32405 + hostPort: 32405 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32406 + hostPort: 32406 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32407 + hostPort: 32407 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32408 + hostPort: 32408 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32409 + hostPort: 32409 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32410 + hostPort: 32410 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32411 + hostPort: 32411 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32412 + hostPort: 32412 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32413 + hostPort: 32413 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32414 + hostPort: 32414 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32415 + hostPort: 32415 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32416 + hostPort: 32416 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32417 + hostPort: 32417 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32418 + hostPort: 32418 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32419 + hostPort: 32419 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32420 + hostPort: 32420 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32421 + hostPort: 32421 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32422 + hostPort: 32422 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32423 + hostPort: 32423 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32424 + hostPort: 32424 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32425 + hostPort: 32425 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32426 + hostPort: 32426 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32427 + hostPort: 32427 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32428 + hostPort: 32428 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32429 + hostPort: 32429 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32430 + hostPort: 32430 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32431 + hostPort: 32431 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32432 + hostPort: 32432 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32433 + hostPort: 32433 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32434 + hostPort: 32434 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32435 + hostPort: 32435 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32436 + hostPort: 32436 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32437 + hostPort: 32437 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32438 + hostPort: 32438 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32439 + hostPort: 32439 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32440 + hostPort: 32440 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32441 + hostPort: 32441 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32442 + hostPort: 32442 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32443 + hostPort: 32443 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32444 + hostPort: 32444 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32445 + hostPort: 32445 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32446 + hostPort: 32446 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32447 + hostPort: 32447 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32448 + hostPort: 32448 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32449 + hostPort: 32449 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32450 + hostPort: 32450 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32451 + hostPort: 32451 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32452 + hostPort: 32452 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32453 + hostPort: 32453 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32454 + hostPort: 32454 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32455 + hostPort: 32455 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32456 + hostPort: 32456 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32457 + hostPort: 32457 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32458 + hostPort: 32458 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32459 + hostPort: 32459 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32460 + hostPort: 32460 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32461 + hostPort: 32461 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32462 + hostPort: 32462 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32463 + hostPort: 32463 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32464 + hostPort: 32464 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32465 + hostPort: 32465 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32466 + hostPort: 32466 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32467 + hostPort: 32467 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32468 + hostPort: 32468 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32469 + hostPort: 32469 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32470 + hostPort: 32470 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32471 + hostPort: 32471 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32472 + hostPort: 32472 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32473 + hostPort: 32473 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32474 + hostPort: 32474 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32475 + hostPort: 32475 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32476 + hostPort: 32476 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32477 + hostPort: 32477 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32478 + hostPort: 32478 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32479 + hostPort: 32479 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32480 + hostPort: 32480 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32481 + hostPort: 32481 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32482 + hostPort: 32482 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32483 + hostPort: 32483 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32484 + hostPort: 32484 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32485 + hostPort: 32485 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32486 + hostPort: 32486 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32487 + hostPort: 32487 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32488 + hostPort: 32488 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32489 + hostPort: 32489 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32490 + hostPort: 32490 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32491 + hostPort: 32491 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32492 + hostPort: 32492 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32493 + hostPort: 32493 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32494 + hostPort: 32494 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32495 + hostPort: 32495 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32496 + hostPort: 32496 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32497 + hostPort: 32497 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32498 + hostPort: 32498 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32499 + hostPort: 32499 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32500 + hostPort: 32500 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32501 + hostPort: 32501 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32502 + hostPort: 32502 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32503 + hostPort: 32503 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32504 + hostPort: 32504 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32505 + hostPort: 32505 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32506 + hostPort: 32506 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32507 + hostPort: 32507 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32508 + hostPort: 32508 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32509 + hostPort: 32509 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32510 + hostPort: 32510 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32511 + hostPort: 32511 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32512 + hostPort: 32512 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32513 + hostPort: 32513 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32514 + hostPort: 32514 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32515 + hostPort: 32515 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32516 + hostPort: 32516 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32517 + hostPort: 32517 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32518 + hostPort: 32518 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32519 + hostPort: 32519 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32520 + hostPort: 32520 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32521 + hostPort: 32521 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32522 + hostPort: 32522 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32523 + hostPort: 32523 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32524 + hostPort: 32524 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32525 + hostPort: 32525 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32526 + hostPort: 32526 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32527 + hostPort: 32527 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32528 + hostPort: 32528 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32529 + hostPort: 32529 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32530 + hostPort: 32530 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32531 + hostPort: 32531 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32532 + hostPort: 32532 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32533 + hostPort: 32533 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32534 + hostPort: 32534 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32535 + hostPort: 32535 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32536 + hostPort: 32536 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32537 + hostPort: 32537 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32538 + hostPort: 32538 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32539 + hostPort: 32539 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32540 + hostPort: 32540 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32541 + hostPort: 32541 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32542 + hostPort: 32542 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32543 + hostPort: 32543 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32544 + hostPort: 32544 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32545 + hostPort: 32545 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32546 + hostPort: 32546 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32547 + hostPort: 32547 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32548 + hostPort: 32548 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32549 + hostPort: 32549 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32550 + hostPort: 32550 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32551 + hostPort: 32551 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32552 + hostPort: 32552 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32553 + hostPort: 32553 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32554 + hostPort: 32554 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32555 + hostPort: 32555 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32556 + hostPort: 32556 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32557 + hostPort: 32557 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32558 + hostPort: 32558 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32559 + hostPort: 32559 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32560 + hostPort: 32560 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32561 + hostPort: 32561 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32562 + hostPort: 32562 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32563 + hostPort: 32563 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32564 + hostPort: 32564 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32565 + hostPort: 32565 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32566 + hostPort: 32566 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32567 + hostPort: 32567 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32568 + hostPort: 32568 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32569 + hostPort: 32569 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32570 + hostPort: 32570 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32571 + hostPort: 32571 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32572 + hostPort: 32572 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32573 + hostPort: 32573 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32574 + hostPort: 32574 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32575 + hostPort: 32575 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32576 + hostPort: 32576 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32577 + hostPort: 32577 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32578 + hostPort: 32578 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32579 + hostPort: 32579 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32580 + hostPort: 32580 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32581 + hostPort: 32581 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32582 + hostPort: 32582 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32583 + hostPort: 32583 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32584 + hostPort: 32584 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32585 + hostPort: 32585 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32586 + hostPort: 32586 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32587 + hostPort: 32587 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32588 + hostPort: 32588 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32589 + hostPort: 32589 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32590 + hostPort: 32590 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32591 + hostPort: 32591 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32592 + hostPort: 32592 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32593 + hostPort: 32593 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32594 + hostPort: 32594 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32595 + hostPort: 32595 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32596 + hostPort: 32596 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32597 + hostPort: 32597 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32598 + hostPort: 32598 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32599 + hostPort: 32599 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32600 + hostPort: 32600 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32601 + hostPort: 32601 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32602 + hostPort: 32602 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32603 + hostPort: 32603 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32604 + hostPort: 32604 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32605 + hostPort: 32605 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32606 + hostPort: 32606 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32607 + hostPort: 32607 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32608 + hostPort: 32608 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32609 + hostPort: 32609 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32610 + hostPort: 32610 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32611 + hostPort: 32611 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32612 + hostPort: 32612 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32613 + hostPort: 32613 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32614 + hostPort: 32614 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32615 + hostPort: 32615 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32616 + hostPort: 32616 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32617 + hostPort: 32617 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32618 + hostPort: 32618 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32619 + hostPort: 32619 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32620 + hostPort: 32620 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32621 + hostPort: 32621 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32622 + hostPort: 32622 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32623 + hostPort: 32623 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32624 + hostPort: 32624 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32625 + hostPort: 32625 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32626 + hostPort: 32626 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32627 + hostPort: 32627 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32628 + hostPort: 32628 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32629 + hostPort: 32629 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32630 + hostPort: 32630 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32631 + hostPort: 32631 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32632 + hostPort: 32632 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32633 + hostPort: 32633 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32634 + hostPort: 32634 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32635 + hostPort: 32635 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32636 + hostPort: 32636 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32637 + hostPort: 32637 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32638 + hostPort: 32638 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32639 + hostPort: 32639 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32640 + hostPort: 32640 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32641 + hostPort: 32641 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32642 + hostPort: 32642 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32643 + hostPort: 32643 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32644 + hostPort: 32644 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32645 + hostPort: 32645 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32646 + hostPort: 32646 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32647 + hostPort: 32647 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32648 + hostPort: 32648 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32649 + hostPort: 32649 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32650 + hostPort: 32650 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32651 + hostPort: 32651 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32652 + hostPort: 32652 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32653 + hostPort: 32653 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32654 + hostPort: 32654 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32655 + hostPort: 32655 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32656 + hostPort: 32656 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32657 + hostPort: 32657 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32658 + hostPort: 32658 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32659 + hostPort: 32659 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32660 + hostPort: 32660 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32661 + hostPort: 32661 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32662 + hostPort: 32662 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32663 + hostPort: 32663 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32664 + hostPort: 32664 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32665 + hostPort: 32665 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32666 + hostPort: 32666 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32667 + hostPort: 32667 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32668 + hostPort: 32668 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32669 + hostPort: 32669 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32670 + hostPort: 32670 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32671 + hostPort: 32671 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32672 + hostPort: 32672 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32673 + hostPort: 32673 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32674 + hostPort: 32674 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32675 + hostPort: 32675 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32676 + hostPort: 32676 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32677 + hostPort: 32677 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32678 + hostPort: 32678 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32679 + hostPort: 32679 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32680 + hostPort: 32680 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32681 + hostPort: 32681 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32682 + hostPort: 32682 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32683 + hostPort: 32683 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32684 + hostPort: 32684 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32685 + hostPort: 32685 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32686 + hostPort: 32686 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32687 + hostPort: 32687 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32688 + hostPort: 32688 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32689 + hostPort: 32689 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32690 + hostPort: 32690 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32691 + hostPort: 32691 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32692 + hostPort: 32692 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32693 + hostPort: 32693 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32694 + hostPort: 32694 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32695 + hostPort: 32695 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32696 + hostPort: 32696 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32697 + hostPort: 32697 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32698 + hostPort: 32698 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32699 + hostPort: 32699 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32700 + hostPort: 32700 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32701 + hostPort: 32701 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32702 + hostPort: 32702 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32703 + hostPort: 32703 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32704 + hostPort: 32704 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32705 + hostPort: 32705 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32706 + hostPort: 32706 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32707 + hostPort: 32707 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32708 + hostPort: 32708 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32709 + hostPort: 32709 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32710 + hostPort: 32710 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32711 + hostPort: 32711 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32712 + hostPort: 32712 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32713 + hostPort: 32713 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32714 + hostPort: 32714 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32715 + hostPort: 32715 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32716 + hostPort: 32716 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32717 + hostPort: 32717 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32718 + hostPort: 32718 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32719 + hostPort: 32719 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32720 + hostPort: 32720 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32721 + hostPort: 32721 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32722 + hostPort: 32722 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32723 + hostPort: 32723 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32724 + hostPort: 32724 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32725 + hostPort: 32725 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32726 + hostPort: 32726 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32727 + hostPort: 32727 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32728 + hostPort: 32728 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32729 + hostPort: 32729 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32730 + hostPort: 32730 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32731 + hostPort: 32731 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32732 + hostPort: 32732 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32733 + hostPort: 32733 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32734 + hostPort: 32734 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32735 + hostPort: 32735 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32736 + hostPort: 32736 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32737 + hostPort: 32737 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32738 + hostPort: 32738 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32739 + hostPort: 32739 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32740 + hostPort: 32740 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32741 + hostPort: 32741 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32742 + hostPort: 32742 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32743 + hostPort: 32743 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32744 + hostPort: 32744 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32745 + hostPort: 32745 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32746 + hostPort: 32746 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32747 + hostPort: 32747 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32748 + hostPort: 32748 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32749 + hostPort: 32749 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32750 + hostPort: 32750 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32751 + hostPort: 32751 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32752 + hostPort: 32752 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32753 + hostPort: 32753 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32754 + hostPort: 32754 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32755 + hostPort: 32755 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32756 + hostPort: 32756 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32757 + hostPort: 32757 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32758 + hostPort: 32758 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32759 + hostPort: 32759 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32760 + hostPort: 32760 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32761 + hostPort: 32761 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32762 + hostPort: 32762 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32763 + hostPort: 32763 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32764 + hostPort: 32764 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32765 + hostPort: 32765 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32766 + hostPort: 32766 + listenAddress: "0.0.0.0" + protocol: tcp + - containerPort: 32767 + hostPort: 32767 + listenAddress: "0.0.0.0" + protocol: tcp diff --git a/tests/playground/kind/create_cluster.sh b/tests/playground/kind/create_cluster.sh new file mode 100644 index 00000000000..5566a24d99c --- /dev/null +++ b/tests/playground/kind/create_cluster.sh @@ -0,0 +1,4 @@ +kind delete cluster +kind create cluster --config cluster.yaml +# Load local skypilot image +kind load docker-image skypilot:latest diff --git a/tests/playground/kind/portmap_gen.py b/tests/playground/kind/portmap_gen.py new file mode 100644 index 00000000000..1fd526a7f80 --- /dev/null +++ b/tests/playground/kind/portmap_gen.py @@ -0,0 +1,19 @@ +preamble = """ +apiVersion: kind.x-k8s.io/v1alpha4 +kind: Cluster +nodes: +- role: control-plane + extraPortMappings:""" +suffix = """- role: worker""" +with open('cluster.yaml', 'w') as f: + f.write(preamble) + for port in range(30000, 32768): + f.write(f""" + - containerPort: {port} + hostPort: {port} + listenAddress: "0.0.0.0" + protocol: tcp""") + f.write("\n") + f.write(suffix) + + diff --git a/tests/playground/ray_k8s_example_full.yaml b/tests/playground/ray_k8s_example_full.yaml new file mode 100644 index 00000000000..93c42bab9cf --- /dev/null +++ b/tests/playground/ray_k8s_example_full.yaml @@ -0,0 +1,213 @@ +# A unique identifier for the head node and workers of this cluster. +cluster_name: example-cluster + +# The maximum number of workers nodes to launch in addition to the head +# node. +max_workers: 0 + +# The autoscaler will scale up the cluster faster with higher upscaling speed. +# E.g., if the task requires adding more nodes then autoscaler will gradually +# scale up the cluster in chunks of upscaling_speed*currently_running_nodes. +# This number should be > 0. +upscaling_speed: 1.0 + +# If a node is idle for this many minutes, it will be removed. +idle_timeout_minutes: 5 + +# Kubernetes resources that need to be configured for the autoscaler to be +# able to manage the Ray cluster. If any of the provided resources don't +# exist, the autoscaler will attempt to create them. If this fails, you may +# not have the required permissions and will have to request them to be +# created by your cluster administrator. +provider: + type: kubernetes + + # Exposing external IP addresses for ray pods isn't currently supported. + use_internal_ips: true + + # Namespace to use for all resources created. + namespace: ray + + # ServiceAccount created by the autoscaler for the head node pod that it + # runs in. If this field isn't provided, the head pod config below must + # contain a user-created service account with the proper permissions. + autoscaler_service_account: + apiVersion: v1 + kind: ServiceAccount + metadata: + name: autoscaler + + # Role created by the autoscaler for the head node pod that it runs in. + # If this field isn't provided, the role referenced in + # autoscaler_role_binding must exist and have at least these permissions. + autoscaler_role: + kind: Role + apiVersion: rbac.authorization.k8s.io/v1 + metadata: + name: autoscaler + rules: + - apiGroups: [""] + resources: ["pods", "pods/status", "pods/exec"] + verbs: ["get", "watch", "list", "create", "delete", "patch"] + + # RoleBinding created by the autoscaler for the head node pod that it runs + # in. If this field isn't provided, the head pod config below must contain + # a user-created service account with the proper permissions. + autoscaler_role_binding: + apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + name: autoscaler + subjects: + - kind: ServiceAccount + name: autoscaler + roleRef: + kind: Role + name: autoscaler + apiGroup: rbac.authorization.k8s.io + + services: + # Service that maps to the head node of the Ray cluster. + - apiVersion: v1 + kind: Service + metadata: + # NOTE: If you're running multiple Ray clusters with services + # on one Kubernetes cluster, they must have unique service + # names. + name: example-cluster-ray-head + spec: + # This selector must match the head node pod's selector below. + selector: + component: example-cluster-ray-head + ports: + - name: client + protocol: TCP + port: 10001 + targetPort: 10001 + - name: dashboard + protocol: TCP + port: 8265 + targetPort: 8265 + - name: ray-serve + protocol: TCP + port: 8000 + targetPort: 8000 + +# Specify the pod type for the ray head node (as configured below). +head_node_type: head_node +# Specify the allowed pod types for this ray cluster and the resources they provide. +available_node_types: + worker_node: + # Minimum number of Ray workers of this Pod type. + min_workers: 0 + # Maximum number of Ray workers of this Pod type. Takes precedence over min_workers. + max_workers: 2 + # User-specified custom resources for use by Ray. Object with string keys and integer values. + # (Ray detects CPU and GPU from pod spec resource requests and limits, so no need to fill those here.) + resources: {"example-resource-a": 1, "example-resource-b": 2} + node_config: + apiVersion: v1 + kind: Pod + metadata: + # Automatically generates a name for the pod with this prefix. + generateName: example-cluster-ray-worker- + spec: + restartPolicy: Never + volumes: + - name: dshm + emptyDir: + medium: Memory + containers: + - name: ray-node + imagePullPolicy: Always + image: rayproject/ray:latest + command: ["/bin/bash", "-c", "--"] + args: ["trap : TERM INT; sleep infinity & wait;"] + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. + volumeMounts: + - mountPath: /dev/shm + name: dshm + resources: + requests: + cpu: 1000m + memory: 1024Mi + limits: + # The maximum memory that this pod is allowed to use. The + # limit will be detected by ray and split to use 10% for + # redis, 30% for the shared memory object store, and the + # rest for application memory. If this limit is not set and + # the object store size is not set manually, ray will + # allocate a very large object store in each pod that may + # cause problems for other pods. + memory: 1024Mi + head_node: + node_config: + apiVersion: v1 + kind: Pod + metadata: + # Automatically generates a name for the pod with this prefix. + generateName: example-cluster-ray-head- + # Must match the head node service selector above if a head node + # service is required. + labels: + component: example-cluster-ray-head + spec: + # Change this if you altered the autoscaler_service_account above + # or want to provide your own. + serviceAccountName: autoscaler + + restartPolicy: Never + + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. + volumes: + - name: dshm + emptyDir: + medium: Memory + containers: + - name: ray-node + imagePullPolicy: Always + image: rayproject/ray:latest + # Do not change this command - it keeps the pod alive until it is + # explicitly killed. + command: ["/bin/bash", "-c", "--"] + args: ['trap : TERM INT; sleep infinity & wait;'] + ports: + - containerPort: 6379 # Redis port + - containerPort: 10001 # Used by Ray Client + - containerPort: 8265 # Used by Ray Dashboard + + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. + volumeMounts: + - mountPath: /dev/shm + name: dshm + resources: + requests: + cpu: 1000m + memory: 1024Mi + limits: + # The maximum memory that this pod is allowed to use. The + # limit will be detected by ray and split to use 10% for + # redis, 30% for the shared memory object store, and the + # rest for application memory. If this limit is not set and + # the object store size is not set manually, ray will + # allocate a very large object store in each pod that may + # cause problems for other pods. + memory: 1024Mi + + +# Command to start ray on the head node. You don't need to change this. +# Note dashboard-host is set to 0.0.0.0 so that kubernetes can port forward. +head_start_ray_commands: + - ray stop + - ulimit -n 65536; ray start --head --port=6379 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host 0.0.0.0 + +# Command to start ray on worker nodes. You don't need to change this. +worker_start_ray_commands: + - ray stop + - ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 diff --git a/tests/playground/ray_k8s_sky.yaml b/tests/playground/ray_k8s_sky.yaml new file mode 100644 index 00000000000..e686e91ff08 --- /dev/null +++ b/tests/playground/ray_k8s_sky.yaml @@ -0,0 +1,265 @@ +# run with ray up ray_k8s_sky.yaml --no-config-cache +# An unique identifier for the head node and workers of this cluster. +cluster_name: example-cluster + +# The maximum number of workers nodes to launch in addition to the head +# node. +min_workers: 0 +max_workers: 0 + +# Kubernetes resources that need to be configured for the autoscaler to be +# able to manage the Ray cluster. If any of the provided resources don't +# exist, the autoscaler will attempt to create them. If this fails, you may +# not have the required permissions and will have to request them to be +# created by your cluster administrator. +provider: + type: external + module: sky.skylet.providers.kubernetes.KubernetesNodeProvider + + # Use False if running from outside of k8s cluster + use_internal_ips: false + + # Namespace to use for all resources created. + namespace: default + + # ServiceAccount created by the autoscaler for the head node pod that it + # runs in. If this field isn't provided, the head pod config below must + # contain a user-created service account with the proper permissions. + autoscaler_service_account: + apiVersion: v1 + kind: ServiceAccount + metadata: + labels: + parent: skypilot + name: autoscaler + + # Role created by the autoscaler for the head node pod that it runs in. + # If this field isn't provided, the role referenced in + # autoscaler_role_binding must exist and have at least these permissions. + autoscaler_role: + kind: Role + apiVersion: rbac.authorization.k8s.io/v1 + metadata: + labels: + parent: skypilot + name: autoscaler + rules: + - apiGroups: [""] + resources: ["pods", "pods/status", "pods/exec"] + verbs: ["get", "watch", "list", "create", "delete", "patch"] + + # RoleBinding created by the autoscaler for the head node pod that it runs + # in. If this field isn't provided, the head pod config below must contain + # a user-created service account with the proper permissions. + autoscaler_role_binding: + apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + labels: + parent: skypilot + name: autoscaler + subjects: + - kind: ServiceAccount + name: autoscaler + roleRef: + kind: Role + name: autoscaler + apiGroup: rbac.authorization.k8s.io + + services: + # Service to expose the head node pod's SSH port. + - apiVersion: v1 + kind: Service + metadata: + labels: + parent: skypilot + name: example-cluster-ray-head-ssh + spec: + type: NodePort + selector: + component: example-cluster-ray-head + ports: + - protocol: TCP + port: 22 + targetPort: 22 + # Service that maps to the head node of the Ray cluster. + - apiVersion: v1 + kind: Service + metadata: + labels: + parent: skypilot + # NOTE: If you're running multiple Ray clusters with services + # on one Kubernetes cluster, they must have unique service + # names. + name: example-cluster-ray-head + spec: + # This selector must match the head node pod's selector below. + selector: + component: example-cluster-ray-head + ports: + - name: client + protocol: TCP + port: 10001 + targetPort: 10001 + - name: dashboard + protocol: TCP + port: 8265 + targetPort: 8265 + +# Specify the pod type for the ray head node (as configured below). +head_node_type: head_node +# Specify the allowed pod types for this ray cluster and the resources they provide. +available_node_types: + worker_node: + # Minimum number of Ray workers of this Pod type. + min_workers: 0 + # Maximum number of Ray workers of this Pod type. Takes precedence over min_workers. + max_workers: 0 + # User-specified custom resources for use by Ray. Object with string keys and integer values. + # (Ray detects CPU and GPU from pod spec resource requests and limits, so no need to fill those here.) + resources: {"example-resource-a": 1, "example-resource-b": 2} + node_config: + apiVersion: v1 + kind: Pod + metadata: + labels: + parent: skypilot + # Automatically generates a name for the pod with this prefix. + generateName: example-cluster-ray-worker- + spec: + restartPolicy: Never + volumes: + - name: secret-volume + secret: + secretName: ssh-key-secret + - name: dshm + emptyDir: + medium: Memory + containers: + - name: ray-node + imagePullPolicy: Never + image: skypilot:latest + command: ["/bin/bash", "-c", "--"] + args: ["trap : TERM INT; sleep infinity & wait;"] + lifecycle: + postStart: + exec: + command: ["/bin/bash", "-c", "mkdir -p ~/.ssh && cp /etc/secret-volume/ssh-publickey ~/.ssh/authorized_keys && sudo service ssh restart"] + ports: + - containerPort: 22 # Used for SSH + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. + volumeMounts: + - name: secret-volume + readOnly: true + mountPath: "/etc/secret-volume" + - mountPath: /dev/shm + name: dshm + resources: + requests: + cpu: 1000m + memory: 1024Mi + limits: + # The maximum memory that this pod is allowed to use. The + # limit will be detected by ray and split to use 10% for + # redis, 30% for the shared memory object store, and the + # rest for application memory. If this limit is not set and + # the object store size is not set manually, ray will + # allocate a very large object store in each pod that may + # cause problems for other pods. + memory: 1024Mi + head_node: + node_config: + apiVersion: v1 + kind: Pod + metadata: + # Automatically generates a name for the pod with this prefix. + generateName: example-cluster-ray-head- + # Must match the head node service selector above if a head node + # service is required. + labels: + parent: skypilot + component: example-cluster-ray-head + spec: + # Change this if you altered the autoscaler_service_account above + # or want to provide your own. + serviceAccountName: autoscaler + + restartPolicy: Never + + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. + volumes: + - name: secret-volume + secret: + secretName: ssh-key-secret + - name: dshm + emptyDir: + medium: Memory + containers: + - name: ray-node + imagePullPolicy: Never + image: skypilot:latest + # Do not change this command - it keeps the pod alive until it is + # explicitly killed. + command: ["/bin/bash", "-c", "--"] + args: ['trap : TERM INT; sleep infinity & wait;'] + ports: + - containerPort: 22 # Used for SSH + - containerPort: 6379 # Redis port + - containerPort: 10001 # Used by Ray Client + - containerPort: 8265 # Used by Ray Dashboard + + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. + volumeMounts: + - name: secret-volume + readOnly: true + mountPath: "/etc/secret-volume" + - mountPath: /dev/shm + name: dshm + lifecycle: + postStart: + exec: + command: ["/bin/bash", "-c", "mkdir -p ~/.ssh && cp /etc/secret-volume/ssh-publickey ~/.ssh/authorized_keys && sudo service ssh restart"] + resources: + requests: + cpu: 1000m + memory: 1024Mi + limits: + # The maximum memory that this pod is allowed to use. The + # limit will be detected by ray and split to use 10% for + # redis, 30% for the shared memory object store, and the + # rest for application memory. If this limit is not set and + # the object store size is not set manually, ray will + # allocate a very large object store in each pod that may + # cause problems for other pods. + memory: 1024Mi + +# Command to start ray on the head node. You don't need to change this. +# Note dashboard-host is set to 0.0.0.0 so that kubernetes can port forward. +head_start_ray_commands: + - ray stop + - ulimit -n 65536; ray start --head --port=6379 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host 0.0.0.0 --object-store-memory 78643201 + +# Command to start ray on worker nodes. You don't need to change this. +worker_start_ray_commands: + - ray stop + - ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 --object-store-memory 78643201 + + +auth: + ssh_user: sky + ssh_private_key: ~/.ssh/sky-key + +# These fields are required for external cloud providers. +setup_commands: [] +head_setup_commands: [] +worker_setup_commands: [] +cluster_synced_files: [] +file_mounts_sync_continuously: False +file_mounts: {} +initialization_commands: [] From 197acea53b49546b41487b3ee74c05ca7c9aff1b Mon Sep 17 00:00:00 2001 From: Romil Date: Sat, 4 Feb 2023 19:02:31 -0800 Subject: [PATCH 002/183] wip --- sky/__init__.py | 2 + sky/authentication.py | 20 ++ sky/backends/backend_utils.py | 9 + sky/backends/cloud_vm_ray_backend.py | 34 ++ sky/clouds/__init__.py | 2 + sky/clouds/kubernetes.py | 205 +++++++++++ sky/clouds/service_catalog/__init__.py | 2 +- .../service_catalog/kubernetes_catalog.py | 122 +++++++ sky/registry.py | 1 + sky/setup_files/MANIFEST.in | 1 + sky/templates/kubernetes-ray.yml.j2 | 317 ++++++++++++++++++ 11 files changed, 714 insertions(+), 1 deletion(-) create mode 100644 sky/clouds/kubernetes.py create mode 100644 sky/clouds/service_catalog/kubernetes_catalog.py create mode 100644 sky/templates/kubernetes-ray.yml.j2 diff --git a/sky/__init__.py b/sky/__init__.py index 3462fd67dc5..b57fe7fa670 100644 --- a/sky/__init__.py +++ b/sky/__init__.py @@ -30,6 +30,7 @@ GCP = clouds.GCP Lambda = clouds.Lambda Local = clouds.Local +Kubernetes = clouds.Kubernetes optimize = Optimizer.optimize __all__ = [ @@ -43,6 +44,7 @@ 'OptimizeTarget', 'backends', 'benchmark', + 'Kubernetes', 'list_accelerators', '__root_dir__', 'Storage', diff --git a/sky/authentication.py b/sky/authentication.py index ae1aaa66c8c..26ab56663d4 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -334,3 +334,23 @@ def setup_lambda_authentication(config: Dict[str, Any]) -> Dict[str, Any]: config['file_mounts'] = file_mounts return config + + +def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: + get_or_generate_keys() + + # Run kubectl command to add the public key to the cluster. + public_key_path = os.path.expanduser(PUBLIC_SSH_KEY_PATH) + # TODO(romilb): Change 'ssh-key-secret' to a unique name. + cmd = f"kubectl create secret generic ssh-key-secret --from-file=ssh-publickey={public_key_path}" + subprocess.run(cmd, shell=True, check=True) + + # Need to use ~ relative path because Ray uses the same + # path for finding the public key path on both local and head node. + config['auth']['ssh_public_key'] = PUBLIC_SSH_KEY_PATH + + file_mounts = config['file_mounts'] + file_mounts[PUBLIC_SSH_KEY_PATH] = PUBLIC_SSH_KEY_PATH + config['file_mounts'] = file_mounts + + return config diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index c033bb788b4..e47aeae7d12 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -945,6 +945,8 @@ def _add_auth_to_cluster_config(cloud: clouds.Cloud, cluster_config_file: str): config = auth.setup_azure_authentication(config) elif isinstance(cloud, clouds.Lambda): config = auth.setup_lambda_authentication(config) + elif isinstance(cloud, clouds.Kubernetes): + config = auth.setup_kubernetes_authentication(config) else: assert isinstance(cloud, clouds.Local), cloud # Local cluster case, authentication is already filled by the user @@ -1815,11 +1817,18 @@ def _query_status_lambda( return [] +def _query_status_kubernetes( + cluster: str, + ray_config: Dict[str, Any], # pylint: disable=unused-argument +) -> List[global_user_state.ClusterStatus]: + raise NotImplementedError + _QUERY_STATUS_FUNCS = { 'AWS': _query_status_aws, 'GCP': _query_status_gcp, 'Azure': _query_status_azure, 'Lambda': _query_status_lambda, + 'Kubernetes': _query_status_kubernetes, } diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 787c12f3d39..68a01901408 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -116,6 +116,7 @@ def _get_cluster_config_template(cloud): clouds.GCP: 'gcp-ray.yml.j2', clouds.Lambda: 'lambda-ray.yml.j2', clouds.Local: 'local-ray.yml.j2', + clouds.Kubernetes: 'kubernetes-ray.yml.j2', } return cloud_to_template[type(cloud)] @@ -814,6 +815,34 @@ def _update_blocklist_on_lambda_error( self._blocked_resources.add( launchable_resources.copy(region=r.name, zone=None)) + def _update_blocklist_on_kubernetes_error( + self, launchable_resources: 'resources_lib.Resources', region, + zones, stdout, stderr): + del zones # Unused. + style = colorama.Style + stdout_splits = stdout.split('\n') + stderr_splits = stderr.split('\n') + errors = [ + s.strip() + for s in stdout_splits + stderr_splits + if 'KubernetesError:' in s.strip() + ] + if not errors: + logger.info('====== stdout ======') + for s in stdout_splits: + print(s) + logger.info('====== stderr ======') + for s in stderr_splits: + print(s) + with ux_utils.print_exception_no_traceback(): + raise RuntimeError('Errors occurred during provisioning; ' + 'check logs above.') + + logger.warning(f'Got error(s) in {region.name}:') + messages = '\n\t'.join(errors) + logger.warning(f'{style.DIM}\t{messages}{style.RESET_ALL}') + self._blocked_resources.add(launchable_resources.copy(zone=None)) + def _update_blocklist_on_local_error( self, launchable_resources: 'resources_lib.Resources', region, zones, stdout, stderr): @@ -875,6 +904,7 @@ def _update_blocklist_on_error( clouds.GCP: self._update_blocklist_on_gcp_error, clouds.Lambda: self._update_blocklist_on_lambda_error, clouds.Local: self._update_blocklist_on_local_error, + clouds.Kubernetes: self._update_blocklist_on_kubernetes_error, } cloud = launchable_resources.cloud cloud_type = type(cloud) @@ -935,6 +965,10 @@ def _yield_region_zones(self, to_provision: resources_lib.Resources, local_regions = clouds.Local.regions() region = local_regions[0].name zones = None + elif cloud.is_same_cloud(clouds.Kubernetes()): + regions = clouds.Kubernetes.regions() + region = regions[0].name + zones = None else: assert False, cloud assert region == prev_resources.region, ( diff --git a/sky/clouds/__init__.py b/sky/clouds/__init__.py index ca442dcb5b3..02c03c1c399 100644 --- a/sky/clouds/__init__.py +++ b/sky/clouds/__init__.py @@ -9,6 +9,7 @@ from sky.clouds.gcp import GCP from sky.clouds.lambda_cloud import Lambda from sky.clouds.local import Local +from sky.clouds.kubernetes import Kubernetes __all__ = [ 'AWS', @@ -17,6 +18,7 @@ 'GCP', 'Lambda', 'Local', + 'Kubernetes', 'CloudImplementationFeatures', 'Region', 'Zone', diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py new file mode 100644 index 00000000000..62f9da7417d --- /dev/null +++ b/sky/clouds/kubernetes.py @@ -0,0 +1,205 @@ +import json +import os +import typing +from typing import Dict, Iterator, List, Optional, Tuple + +from sky import clouds +from sky.clouds import service_catalog + +if typing.TYPE_CHECKING: + # Renaming to avoid shadowing variables. + from sky import resources as resources_lib + +_CREDENTIAL_FILES = [ + 'config', +] + + +@clouds.CLOUD_REGISTRY.register +class Kubernetes(clouds.Cloud): + + _REPR = 'Kubernetes' + _regions: List[clouds.Region] = ['kubernetes'] + + @classmethod + def regions(cls) -> List[clouds.Region]: + return cls._regions + + @classmethod + def regions_with_offering(cls, instance_type: Optional[str], + accelerators: Optional[Dict[str, int]], + use_spot: bool, region: Optional[str], + zone: Optional[str]) -> List[clouds.Region]: + del accelerators, zone # unused + if use_spot: + return [] + if instance_type is None: + # Fall back to default regions + regions = cls.regions() + else: + regions = service_catalog.get_region_zones_for_instance_type( + instance_type, use_spot, 'kubernetes') + + if region is not None: + regions = [r for r in regions if r.name == region] + return regions + + @classmethod + def region_zones_provision_loop( + cls, + *, + instance_type: Optional[str] = None, + accelerators: Optional[Dict[str, int]] = None, + use_spot: bool = False, + ) -> Iterator[Tuple[clouds.Region, List[clouds.Zone]]]: + regions = cls.regions_with_offering(instance_type, + accelerators, + use_spot, + region=None, + zone=None) + for region in regions: + yield region, region.zones + + def instance_type_to_hourly_cost(self, + instance_type: str, + use_spot: bool, + region: Optional[str] = None, + zone: Optional[str] = None) -> float: + return service_catalog.get_hourly_cost(instance_type, + use_spot=use_spot, + region=region, + zone=zone, + clouds='kubernetes') + + def accelerators_to_hourly_cost(self, + accelerators: Dict[str, int], + use_spot: bool, + region: Optional[str] = None, + zone: Optional[str] = None) -> float: + del accelerators, use_spot, region, zone # unused + return 0.0 + + def get_egress_cost(self, num_gigabytes: float) -> float: + return 0.0 + + def __repr__(self): + return self._REPR + + def is_same_cloud(self, other: clouds.Cloud) -> bool: + return isinstance(other, Kubernetes) + + @classmethod + def get_default_instance_type(cls) -> str: + return 'cpu1' + + @classmethod + def get_accelerators_from_instance_type( + cls, + instance_type: str, + ) -> Optional[Dict[str, int]]: + return service_catalog.get_accelerators_from_instance_type( + instance_type, clouds='kubernetes') + + @classmethod + def get_vcpus_from_instance_type( + cls, + instance_type: str, + ) -> Optional[float]: + return service_catalog.get_vcpus_from_instance_type(instance_type, + clouds='kubernetes') + + @classmethod + def get_zone_shell_cmd(cls) -> Optional[str]: + return None + + def make_deploy_resources_variables( + self, resources: 'resources_lib.Resources', + region: Optional['clouds.Region'], + zones: Optional[List['clouds.Zone']]) -> Dict[str, Optional[str]]: + del zones + if region is None: + region = self._get_default_region() + + r = resources + acc_dict = self.get_accelerators_from_instance_type(r.instance_type) + if acc_dict is not None: + custom_resources = json.dumps(acc_dict, separators=(',', ':')) + else: + custom_resources = None + + # TODO: return number of CPUs and memory here + return { + 'instance_type': resources.instance_type, + 'custom_resources': custom_resources, + 'region': region.name, + } + + def get_feasible_launchable_resources(self, + resources: 'resources_lib.Resources'): + if resources.use_spot: + return ([], []) + fuzzy_candidate_list: List[str] = [] + if resources.instance_type is not None: + assert resources.is_launchable(), resources + resources = resources.copy(accelerators=None) + return ([resources], fuzzy_candidate_list) + + def _make(instance_list): + resource_list = [] + for instance_type in instance_list: + r = resources.copy( + cloud=Kubernetes(), + instance_type=instance_type, + accelerators=None, + ) + resource_list.append(r) + return resource_list + + # Currently, handle a filter on accelerators only. + accelerators = resources.accelerators + if accelerators is None: + # No requirements to filter, so just return a default VM type. + return (_make([Kubernetes.get_default_instance_type()]), + fuzzy_candidate_list) + + assert len(accelerators) == 1, resources + acc, acc_count = list(accelerators.items())[0] + (instance_list, fuzzy_candidate_list + ) = service_catalog.get_instance_type_for_accelerator( + acc, + acc_count, + use_spot=resources.use_spot, + region=resources.region, + zone=resources.zone, + clouds='kubernetes') + if instance_list is None: + return ([], fuzzy_candidate_list) + return (_make(instance_list), fuzzy_candidate_list) + + def check_credentials(self) -> Tuple[bool, Optional[str]]: + # TODO(romilb): Check credential validity using k8s api + return (os.path.exists(os.path.expanduser(f'~/.kube/config')), + "Kubeconfig doesn't exist") + + def get_credential_file_mounts(self) -> Dict[str, str]: + return { + f'~/.kube/{filename}': f'~/.kube/{filename}' + for filename in _CREDENTIAL_FILES + if os.path.exists(os.path.expanduser(f'~/.kube/{filename}')) + } + + def instance_type_exists(self, instance_type: str) -> bool: + return service_catalog.instance_type_exists(instance_type, 'kubernetes') + + def validate_region_zone(self, region: Optional[str], zone: Optional[str]): + return service_catalog.validate_region_zone(region, + zone, + clouds='kubernetes') + + def accelerator_in_region_or_zone(self, + accelerator: str, + acc_count: int, + region: Optional[str] = None, + zone: Optional[str] = None) -> bool: + return service_catalog.accelerator_in_region_or_zone( + accelerator, acc_count, region, zone, 'kubernetes') diff --git a/sky/clouds/service_catalog/__init__.py b/sky/clouds/service_catalog/__init__.py index 23d90faf30b..074fdfe5971 100644 --- a/sky/clouds/service_catalog/__init__.py +++ b/sky/clouds/service_catalog/__init__.py @@ -15,7 +15,7 @@ from sky.clouds.service_catalog import common CloudFilter = Optional[Union[List[str], str]] -_ALL_CLOUDS = ('aws', 'azure', 'gcp', 'lambda') +_ALL_CLOUDS = ('aws', 'azure', 'gcp', 'lambda', 'kubernetes') def _map_clouds_catalog(clouds: CloudFilter, method_name: str, *args, **kwargs): diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py new file mode 100644 index 00000000000..926b4e0989a --- /dev/null +++ b/sky/clouds/service_catalog/kubernetes_catalog.py @@ -0,0 +1,122 @@ +"""Kubernetes Offerings Catalog. + +This module loads the service catalog file and can be used to query +instance types and pricing information for Kubernetes. + +TODO: This module should dynamically fetch resources from k8s instead of using + a static catalog. +""" +import colorama +import os +import typing +from typing import Dict, List, Optional, Tuple + +import pandas as pd + +from sky import sky_logging +from sky.clouds.service_catalog import common +from sky.utils import ux_utils + +if typing.TYPE_CHECKING: + from sky.clouds import cloud + +logger = sky_logging.init_logger(__name__) + +_DEFAULT_NUM_VCPUS = 1 +_DEFAULT_INSTANCE_TYPE = 'cpu1' + +_df = common.read_catalog('kubernetes/vms.csv') + +def instance_type_exists(instance_type: str) -> bool: + return common.instance_type_exists_impl(_df, instance_type) + +def validate_region_zone( + region: Optional[str], + zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]: + if zone is not None: + with ux_utils.print_exception_no_traceback(): + raise ValueError('Kubernetes does not support zones.') + return common.validate_region_zone_impl('kubernetes', _df, region, zone) + + +def accelerator_in_region_or_zone(acc_name: str, + acc_count: int, + region: Optional[str] = None, + zone: Optional[str] = None) -> bool: + if zone is not None: + with ux_utils.print_exception_no_traceback(): + raise ValueError('Kubernetes does not support zones.') + return common.accelerator_in_region_or_zone_impl(_df, acc_name, acc_count, + region, zone) + + +def get_hourly_cost(instance_type: str, + use_spot: bool = False, + region: Optional[str] = None, + zone: Optional[str] = None) -> float: + """Returns the cost, or the cheapest cost among all zones for spot.""" + assert not use_spot, 'Kubernetes does not support spot instances.' + if zone is not None: + with ux_utils.print_exception_no_traceback(): + raise ValueError('Kubernetes does not support zones.') + return common.get_hourly_cost_impl(_df, instance_type, use_spot, region, + zone) + + +def get_vcpus_from_instance_type(instance_type: str) -> Optional[float]: + return common.get_vcpus_from_instance_type_impl(_df, instance_type) + + +def get_default_instance_type(cpus: Optional[str] = None) -> Optional[str]: + if cpus is None: + cpus = str(_DEFAULT_NUM_VCPUS) + df = _df[_df['InstanceType'].eq(_DEFAULT_INSTANCE_TYPE)] + instance = common.get_instance_type_for_cpus_impl(df, cpus) + if not instance: + instance = common.get_instance_type_for_cpus_impl(_df, cpus) + return instance + + +def get_accelerators_from_instance_type( + instance_type: str) -> Optional[Dict[str, int]]: + return common.get_accelerators_from_instance_type_impl(_df, instance_type) + + +def get_instance_type_for_accelerator( + acc_name: str, + acc_count: int, + cpus: Optional[str] = None, + use_spot: bool = False, + region: Optional[str] = None, + zone: Optional[str] = None) -> Tuple[Optional[List[str]], List[str]]: + """ + Returns a list of instance types satisfying the required count of + accelerators with sorted prices and a list of candidates with fuzzy search. + """ + if zone is not None: + with ux_utils.print_exception_no_traceback(): + raise ValueError('Kubernetes does not support zones.') + return common.get_instance_type_for_accelerator_impl(df=_df, + acc_name=acc_name, + acc_count=acc_count, + cpus=cpus, + use_spot=use_spot, + region=region, + zone=zone) + + +def get_region_zones_for_instance_type(instance_type: str, + use_spot: bool) -> List['cloud.Region']: + df = _df[_df['InstanceType'] == instance_type] + return common.get_region_zones(df, use_spot) + + +def list_accelerators( + gpus_only: bool, + name_filter: Optional[str], + region_filter: Optional[str], + case_sensitive: bool = True +) -> Dict[str, List[common.InstanceTypeInfo]]: + """Returns all Kubernetes 'instances' offering GPUs.""" + return common.list_accelerators_impl('Kubernetes', _df, gpus_only, name_filter, + region_filter, case_sensitive) diff --git a/sky/registry.py b/sky/registry.py index 553464f21f2..ad72b402a7f 100644 --- a/sky/registry.py +++ b/sky/registry.py @@ -14,6 +14,7 @@ clouds.Azure(), clouds.GCP(), clouds.Lambda(), + clouds.Kubernetes(), ] diff --git a/sky/setup_files/MANIFEST.in b/sky/setup_files/MANIFEST.in index f6ba9e298de..2f222537a6a 100644 --- a/sky/setup_files/MANIFEST.in +++ b/sky/setup_files/MANIFEST.in @@ -5,6 +5,7 @@ include sky/skylet/providers/aws/cloudwatch/* include sky/skylet/providers/azure/* include sky/skylet/providers/gcp/* include sky/skylet/providers/lambda_cloud/* +include sky/skylet/providers/kubernetes/* include sky/skylet/ray_patches/*.patch include sky/templates/* include sky/setup_files/* diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 new file mode 100644 index 00000000000..c8869a91555 --- /dev/null +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -0,0 +1,317 @@ +cluster_name: {{cluster_name}} + +# The maximum number of workers nodes to launch in addition to the head +# node. +max_workers: {{num_nodes - 1}} +upscaling_speed: {{num_nodes - 1}} +idle_timeout_minutes: 60 + + +# Kubernetes resources that need to be configured for the autoscaler to be +# able to manage the Ray cluster. If any of the provided resources don't +# exist, the autoscaler will attempt to create them. If this fails, you may +# not have the required permissions and will have to request them to be +# created by your cluster administrator. +provider: + type: external + module: sky.skylet.providers.kubernetes.KubernetesNodeProvider + + # Use False if running from outside of k8s cluster + use_internal_ips: false + + # Namespace to use for all resources created. + namespace: default + + # ServiceAccount created by the autoscaler for the head node pod that it + # runs in. If this field isn't provided, the head pod config below must + # contain a user-created service account with the proper permissions. + autoscaler_service_account: + apiVersion: v1 + kind: ServiceAccount + metadata: + labels: + parent: skypilot + name: autoscaler + + # Role created by the autoscaler for the head node pod that it runs in. + # If this field isn't provided, the role referenced in + # autoscaler_role_binding must exist and have at least these permissions. + autoscaler_role: + kind: Role + apiVersion: rbac.authorization.k8s.io/v1 + metadata: + labels: + parent: skypilot + name: autoscaler + rules: + - apiGroups: [""] + resources: ["pods", "pods/status", "pods/exec"] + verbs: ["get", "watch", "list", "create", "delete", "patch"] + + # RoleBinding created by the autoscaler for the head node pod that it runs + # in. If this field isn't provided, the head pod config below must contain + # a user-created service account with the proper permissions. + autoscaler_role_binding: + apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + labels: + parent: skypilot + name: autoscaler + subjects: + - kind: ServiceAccount + name: autoscaler + roleRef: + kind: Role + name: autoscaler + apiGroup: rbac.authorization.k8s.io + + services: + # Service to expose the head node pod's SSH port. + - apiVersion: v1 + kind: Service + metadata: + labels: + parent: skypilot + name: example-cluster-ray-head-ssh + spec: + type: NodePort + selector: + component: example-cluster-ray-head + ports: + - protocol: TCP + port: 22 + targetPort: 22 + # Service that maps to the head node of the Ray cluster. + - apiVersion: v1 + kind: Service + metadata: + labels: + parent: skypilot + # NOTE: If you're running multiple Ray clusters with services + # on one Kubernetes cluster, they must have unique service + # names. + name: example-cluster-ray-head + spec: + # This selector must match the head node pod's selector below. + selector: + component: example-cluster-ray-head + ports: + - name: client + protocol: TCP + port: 10001 + targetPort: 10001 + - name: dashboard + protocol: TCP + port: 8265 + targetPort: 8265 + +# Specify the pod type for the ray head node (as configured below). +head_node_type: head_node +# Specify the allowed pod types for this ray cluster and the resources they provide. +available_node_types: + worker_node: + # Minimum number of Ray workers of this Pod type. + min_workers: {{num_nodes - 1}} + # Maximum number of Ray workers of this Pod type. Takes precedence over min_workers. + max_workers: {{num_nodes - 1}} + # User-specified custom resources for use by Ray. Object with string keys and integer values. + # (Ray detects CPU and GPU from pod spec resource requests and limits, so no need to fill those here.) + resources: {"example-resource-a": 1, "example-resource-b": 2} + node_config: + apiVersion: v1 + kind: Pod + metadata: + labels: + parent: skypilot + # Automatically generates a name for the pod with this prefix. + generateName: example-cluster-ray-worker- + spec: + restartPolicy: Never + volumes: + - name: secret-volume + secret: + secretName: ssh-key-secret + - name: dshm + emptyDir: + medium: Memory + containers: + - name: ray-node + imagePullPolicy: Never + image: skypilot:latest + command: ["/bin/bash", "-c", "--"] + args: ["trap : TERM INT; sleep infinity & wait;"] + lifecycle: + postStart: + exec: + command: ["/bin/bash", "-c", "mkdir -p ~/.ssh && cp /etc/secret-volume/ssh-publickey ~/.ssh/authorized_keys && sudo service ssh restart"] + ports: + - containerPort: 22 # Used for SSH + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. + volumeMounts: + - name: secret-volume + readOnly: true + mountPath: "/etc/secret-volume" + - mountPath: /dev/shm + name: dshm + resources: + requests: + cpu: 1000m + memory: 1024Mi + limits: + # The maximum memory that this pod is allowed to use. The + # limit will be detected by ray and split to use 10% for + # redis, 30% for the shared memory object store, and the + # rest for application memory. If this limit is not set and + # the object store size is not set manually, ray will + # allocate a very large object store in each pod that may + # cause problems for other pods. + memory: 1024Mi + head_node: + node_config: + apiVersion: v1 + kind: Pod + metadata: + # Automatically generates a name for the pod with this prefix. + generateName: example-cluster-ray-head- + # Must match the head node service selector above if a head node + # service is required. + labels: + parent: skypilot + component: example-cluster-ray-head + spec: + # Change this if you altered the autoscaler_service_account above + # or want to provide your own. + serviceAccountName: autoscaler + + restartPolicy: Never + + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. + volumes: + - name: secret-volume + secret: + secretName: ssh-key-secret + - name: dshm + emptyDir: + medium: Memory + containers: + - name: ray-node + imagePullPolicy: Never + image: skypilot:latest + # Do not change this command - it keeps the pod alive until it is + # explicitly killed. + command: ["/bin/bash", "-c", "--"] + args: ['trap : TERM INT; sleep infinity & wait;'] + ports: + - containerPort: 22 # Used for SSH + - containerPort: 6379 # Redis port + - containerPort: 10001 # Used by Ray Client + - containerPort: 8265 # Used by Ray Dashboard + + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. + volumeMounts: + - name: secret-volume + readOnly: true + mountPath: "/etc/secret-volume" + - mountPath: /dev/shm + name: dshm + lifecycle: + postStart: + exec: + command: ["/bin/bash", "-c", "mkdir -p ~/.ssh && cp /etc/secret-volume/ssh-publickey ~/.ssh/authorized_keys && sudo service ssh restart"] + resources: + requests: + cpu: 1000m + memory: 1024Mi + limits: + # The maximum memory that this pod is allowed to use. The + # limit will be detected by ray and split to use 10% for + # redis, 30% for the shared memory object store, and the + # rest for application memory. If this limit is not set and + # the object store size is not set manually, ray will + # allocate a very large object store in each pod that may + # cause problems for other pods. + memory: 1024Mi + +setup_commands: + # Disable `unattended-upgrades` to prevent apt-get from hanging. It should be called at the beginning before the process started to avoid being blocked. (This is a temporary fix.) + # Create ~/.ssh/config file in case the file does not exist in the image. + # Line 'rm ..': there is another installation of pip. + # Line 'sudo bash ..': set the ulimit as suggested by ray docs for performance. https://docs.ray.io/en/latest/cluster/vms/user-guides/large-cluster-best-practices.html#system-configuration + # Line 'sudo grep ..': set the number of threads per process to unlimited to avoid ray job submit stucking issue when the number of running ray jobs increase. + # Line 'mkdir -p ..': disable host key check + # Line 'python3 -c ..': patch the buggy ray files and enable `-o allow_other` option for `goofys` + - sudo systemctl stop unattended-upgrades || true; + sudo systemctl disable unattended-upgrades || true; + sudo sed -i 's/Unattended-Upgrade "1"/Unattended-Upgrade "0"/g' /etc/apt/apt.conf.d/20auto-upgrades || true; + sudo kill -9 `sudo lsof /var/lib/dpkg/lock-frontend | awk '{print $2}' | tail -n 1` || true; + sudo pkill -9 apt-get; + sudo pkill -9 dpkg; + sudo dpkg --configure -a; + mkdir -p ~/.ssh; touch ~/.ssh/config; + pip3 --version > /dev/null 2>&1 || (curl -sSL https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py && echo "PATH=$HOME/.local/bin:$PATH" >> ~/.bashrc); + (type -a python | grep -q python3) || echo 'alias python=python3' >> ~/.bashrc; + (type -a pip | grep -q pip3) || echo 'alias pip=pip3' >> ~/.bashrc; + which conda > /dev/null 2>&1 || (wget -nc https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && bash Miniconda3-latest-Linux-x86_64.sh -b && eval "$(~/miniconda3/bin/conda shell.bash hook)" && conda init && conda config --set auto_activate_base true); + source ~/.bashrc; + (pip3 list | grep ray | grep {{ray_version}} 2>&1 > /dev/null || pip3 install -U ray[default]=={{ray_version}}) && mkdir -p ~/sky_workdir && mkdir -p ~/.sky/sky_app && touch ~/.sudo_as_admin_successful; + (pip3 list | grep skypilot && [ "$(cat {{sky_remote_path}}/current_sky_wheel_hash)" == "{{sky_wheel_hash}}" ]) || (pip3 uninstall skypilot -y; pip3 install "$(echo {{sky_remote_path}}/{{sky_wheel_hash}}/skypilot-{{sky_version}}*.whl)" && echo "{{sky_wheel_hash}}" > {{sky_remote_path}}/current_sky_wheel_hash || exit 1); + sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf'; + sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload; + mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n" >> ~/.ssh/config; + python3 -c "from sky.skylet.ray_patches import patch; patch()" || exit 1; + [ -f /etc/fuse.conf ] && sudo sed -i 's/#user_allow_other/user_allow_other/g' /etc/fuse.conf || (sudo sh -c 'echo "user_allow_other" > /etc/fuse.conf'); + +# Command to start ray on the head node. You don't need to change this. +# NOTE: these are very performance-sensitive. Each new item opens/closes an SSH +# connection, which is expensive. Try your best to co-locate commands into fewer +# items! The same comment applies for worker_start_ray_commands. +# +# Increment the following for catching performance bugs easier: +# current num items (num SSH connections): 2 +# Note dashboard-host is set to 0.0.0.0 so that kubernetes can port forward. +head_start_ray_commands: + # Start skylet daemon. (Should not place it in the head_setup_commands, otherwise it will run before skypilot is installed.) + - ((ps aux | grep -v nohup | grep -v grep | grep -q -- "python3 -m sky.skylet.skylet") || nohup python3 -m sky.skylet.skylet >> ~/.sky/skylet.log 2>&1 &); + ray stop; RAY_SCHEDULER_EVENTS=0 ray start --disable-usage-stats --dashboard-host 0.0.0.0 --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; + which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; + +{%- if num_nodes > 1 %} +worker_start_ray_commands: + - ray stop; RAY_SCHEDULER_EVENTS=0 ray start --disable-usage-stats --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; + which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; +{%- else %} +worker_start_ray_commands: [] +{%- endif %} + +head_node: {} +worker_nodes: {} + +# Format: `REMOTE_PATH : LOCAL_PATH` +file_mounts: { + "{{sky_ray_yaml_remote_path}}": "{{sky_ray_yaml_local_path}}", + "{{sky_remote_path}}/{{sky_wheel_hash}}": "{{sky_local_path}}", +{%- for remote_path, local_path in credentials.items() %} + "{{remote_path}}": "{{local_path}}", +{%- endfor %} +} + +auth: + ssh_user: sky + ssh_private_key: {{ssh_private_key}} + +# These fields are required for external cloud providers. +setup_commands: [] +head_setup_commands: [] +worker_setup_commands: [] +cluster_synced_files: [] +file_mounts_sync_continuously: False +initialization_commands: [] +rsync_exclude: [] + From f06b22d29792e8cea33d81bb2b0d5b5a2fffba0c Mon Sep 17 00:00:00 2001 From: Romil Date: Tue, 7 Feb 2023 14:00:56 -0800 Subject: [PATCH 003/183] working provisioning with SkyPilot and ssh config --- sky/authentication.py | 14 ++++- sky/backends/backend_utils.py | 63 +++++++++++++++++-- sky/backends/cloud_vm_ray_backend.py | 41 +++++++++++- sky/clouds/kubernetes.py | 33 +++++++--- .../service_catalog/kubernetes_catalog.py | 7 ++- sky/skylet/providers/kubernetes/__init__.py | 2 +- .../providers/kubernetes/node_provider.py | 15 +++-- sky/skylet/providers/kubernetes/utils.py | 8 +++ sky/templates/kubernetes-ray.yml.j2 | 14 ++--- sky/utils/command_runner.py | 9 ++- sky/utils/ux_utils.py | 6 +- 11 files changed, 171 insertions(+), 41 deletions(-) diff --git a/sky/authentication.py b/sky/authentication.py index 26ab56663d4..02184a7e4c7 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -342,8 +342,18 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: # Run kubectl command to add the public key to the cluster. public_key_path = os.path.expanduser(PUBLIC_SSH_KEY_PATH) # TODO(romilb): Change 'ssh-key-secret' to a unique name. - cmd = f"kubectl create secret generic ssh-key-secret --from-file=ssh-publickey={public_key_path}" - subprocess.run(cmd, shell=True, check=True) + key_label = 'ssh-key-secret' + cmd = f"kubectl create secret generic {key_label} --from-file=ssh-publickey={public_key_path}" + try: + subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) + except subprocess.CalledProcessError as e: + output = e.output.decode('utf-8') + print(output) + if 'already exists' in output: + logger.warning(f'Key {key_label} already exists in Kubernetes cluster, continuing...') + pass + else: + raise e # Need to use ~ relative path because Ray uses the same # path for finding the public key path on both local and head node. diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index e47aeae7d12..4a2c224da37 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -358,7 +358,7 @@ class SSHConfigHelper(object): @classmethod def _get_generated_config(cls, autogen_comment: str, host_name: str, ip: str, username: str, ssh_key_path: str, - proxy_command: Optional[str]): + proxy_command: Optional[str], port: int): if proxy_command is not None: proxy = f'ProxyCommand {proxy_command}' else: @@ -380,7 +380,7 @@ def _get_generated_config(cls, autogen_comment: str, host_name: str, StrictHostKeyChecking no UserKnownHostsFile=/dev/null GlobalKnownHostsFile=/dev/null - Port 22 + Port {port} {proxy} """.rstrip()) return codegen @@ -392,6 +392,7 @@ def add_cluster( cluster_name: str, ips: List[str], auth_config: Dict[str, str], + ports: List[int] ): """Add authentication information for cluster to local SSH config file. @@ -448,8 +449,10 @@ def add_cluster( os.chmod(config_path, 0o644) proxy_command = auth_config.get('ssh_proxy_command', None) + head_port = ports[0] codegen = cls._get_generated_config(sky_autogen_comment, host_name, ip, - username, key_path, proxy_command) + username, key_path, proxy_command, + head_port) # Add (or overwrite) the new config. if overwrite: @@ -483,6 +486,7 @@ def _add_multinode_config( external_worker_ips: List[str], auth_config: Dict[str, str], ): + # TODO(romilb): Make this work with multinode! username = auth_config['ssh_user'] key_path = os.path.expanduser(auth_config['ssh_private_key']) host_name = cluster_name @@ -549,7 +553,7 @@ def _add_multinode_config( logger.warning(f'Using {host_name} to identify host instead.') codegens[idx] = cls._get_generated_config( sky_autogen_comment, host_name, external_worker_ips[idx], - username, key_path, proxy_command) + username, key_path, proxy_command, port = 22) # All workers go to SKY_USER_FILE_PATH/ssh/{cluster_name} for i, line in enumerate(extra_config): @@ -562,14 +566,14 @@ def _add_multinode_config( overwrite_begin_idxs[idx] = i - 1 codegens[idx] = cls._get_generated_config( sky_autogen_comment, host_name, external_worker_ips[idx], - username, key_path, proxy_command) + username, key_path, proxy_command, port = 22) # This checks if all codegens have been created. for idx, ip in enumerate(external_worker_ips): if not codegens[idx]: codegens[idx] = cls._get_generated_config( sky_autogen_comment, worker_names[idx], ip, username, - key_path, proxy_command) + key_path, proxy_command, port = 22) for idx in range(len(external_worker_ips)): # Add (or overwrite) the new config. @@ -1446,6 +1450,53 @@ def get_head_ip( return head_ip +@timeline.event +def get_head_ssh_port( + handle: backends.Backend.ResourceHandle, + use_cache: bool = True, + max_attempts: int = 1, +) -> str: + """Returns the ip of the head node.""" + # Use port 22 for everything except Kubernetes + if not isinstance(handle.launched_resources.cloud, clouds.Kubernetes): + return 22 + if use_cache: + if handle.head_ssh_port is None: + # This happens for INIT clusters (e.g., exit 1 in setup). + with ux_utils.print_exception_no_traceback(): + raise ValueError( + 'Cluster\'s head SSH oirt not found; is it up? To fix: ' + 'run a successful launch first (`sky launch`) to ensure' + ' the cluster status is UP (`sky status`).') + head_ssh_port = handle.head_ssh_port + else: + # TODO(romilb): Only supports headnode for now! No multinode! + svc_name = f'{handle.get_cluster_name()}-ray-head-ssh' + head_ssh_port = clouds.Kubernetes.get_port(svc_name, 'default') + return head_ssh_port + + +@timeline.event +def get_head_ip( + handle: backends.Backend.ResourceHandle, + use_cached_head_ip: bool = True, + max_attempts: int = 1, +) -> str: + """Returns the ip of the head node.""" + if use_cached_head_ip: + if handle.head_ip is None: + # This happens for INIT clusters (e.g., exit 1 in setup). + with ux_utils.print_exception_no_traceback(): + raise ValueError( + 'Cluster\'s head IP not found; is it up? To fix: ' + 'run a successful launch first (`sky launch`) to ensure' + ' the cluster status is UP (`sky status`).') + head_ip = handle.head_ip + else: + head_ip = _query_head_ip_with_retries(handle.cluster_yaml, max_attempts) + return head_ip + + def run_command_and_handle_ssh_failure( runner: command_runner.SSHCommandRunner, command: str, diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 68a01901408..8a180d8a8a0 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -967,7 +967,7 @@ def _yield_region_zones(self, to_provision: resources_lib.Resources, zones = None elif cloud.is_same_cloud(clouds.Kubernetes()): regions = clouds.Kubernetes.regions() - region = regions[0].name + region = regions[0] zones = None else: assert False, cloud @@ -1816,6 +1816,7 @@ def __init__( cluster_yaml: str, stable_internal_external_ips: Optional[List[Tuple[str, str]]] = None, + stable_ssh_ports: Optional[List[int]] = None, launched_nodes: Optional[int] = None, launched_resources: Optional[resources_lib.Resources] = None, tpu_create_script: Optional[str] = None, @@ -1827,6 +1828,7 @@ def __init__( # List of (internal_ip, external_ip) tuples for all the nodes # in the cluster, sorted by the external ips. self.stable_internal_external_ips = stable_internal_external_ips + self.stable_ssh_ports = stable_ssh_ports self.launched_nodes = launched_nodes self.launched_resources = launched_resources self.tpu_create_script = tpu_create_script @@ -1839,6 +1841,8 @@ def __repr__(self): f'\n\thead_ip={self.head_ip},' '\n\tstable_internal_external_ips=' f'{self.stable_internal_external_ips},' + '\n\tstable_ssh_ports=' + f'{self.stable_ssh_ports},' '\n\tcluster_yaml=' f'{self.cluster_yaml}, ' f'\n\tlaunched_resources={self.launched_nodes}x ' @@ -1911,6 +1915,17 @@ def _update_cluster_region(self): self.launched_resources = self.launched_resources.copy( region=region) + def _update_stable_ssh_ports(self): + if isinstance(self.launched_resources.cloud, clouds.Kubernetes): + head_port = backend_utils.get_head_ssh_port(self, use_cache=False) + # TODO(romilb): Multinode doesn't work with Kubernetes yet. + worker_ports = [22] * self.launched_nodes + ports = [head_port] + worker_ports + else: + # Use port 22 for other clouds + ports = [22] * len(self.external_ips()) + self.stable_ssh_ports = ports + def _update_stable_cluster_ips(self, max_attempts: int = 1) -> List[str]: cluster_external_ips = backend_utils.get_node_ips( @@ -1979,6 +1994,15 @@ def external_ips(self, return [ips[1] for ips in self.stable_internal_external_ips] return None + def external_ssh_ports(self, + max_attempts: int = _FETCH_IP_MAX_ATTEMPTS, + use_cached_ports: bool = True) -> Optional[List[str]]: + if not use_cached_ports: + self._update_stable_ssh_ports() + if self.stable_ssh_ports is not None: + return self.stable_ssh_ports + return None + def get_hourly_price(self) -> float: hourly_cost = (self.launched_resources.get_cost(3600) * self.launched_nodes) @@ -1995,6 +2019,13 @@ def head_ip(self): return external_ips[0] return None + @property + def head_ssh_port(self): + external_ssh_ports = self.external_ssh_ports() + if external_ssh_ports is not None: + return external_ssh_ports[0] + return None + def __setstate__(self, state): self._version = self._VERSION @@ -2227,6 +2258,8 @@ def _provision(self, ip_list = handle.external_ips(max_attempts=_FETCH_IP_MAX_ATTEMPTS, use_cached_ips=False) + ssh_port_list = handle.external_ssh_ports(max_attempts=_FETCH_IP_MAX_ATTEMPTS, + use_cached_ports=False) if 'tpu_name' in config_dict: self._set_tpu_name(handle, config_dict['tpu_name']) @@ -2307,7 +2340,7 @@ def _provision(self, auth_config = common_utils.read_yaml( handle.cluster_yaml)['auth'] backend_utils.SSHConfigHelper.add_cluster( - cluster_name, ip_list, auth_config) + cluster_name, ip_list, auth_config, ssh_port_list) common_utils.remove_file_if_exists(lock_path) return handle @@ -3189,9 +3222,11 @@ def run_on_head( """Runs 'cmd' on the cluster's head node.""" head_ip = backend_utils.get_head_ip(handle, use_cached_head_ip, _FETCH_IP_MAX_ATTEMPTS) + head_ssh_port = backend_utils.get_head_ssh_port(handle, use_cached_head_ip, + _FETCH_IP_MAX_ATTEMPTS) ssh_credentials = backend_utils.ssh_credential_from_yaml( handle.cluster_yaml) - runner = command_runner.SSHCommandRunner(head_ip, **ssh_credentials) + runner = command_runner.SSHCommandRunner(head_ip, port=head_ssh_port, **ssh_credentials) if under_remote_workdir: cmd = f'cd {SKY_REMOTE_WORKDIR} && {cmd}' diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 62f9da7417d..080af787d25 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -20,6 +20,16 @@ class Kubernetes(clouds.Cloud): _REPR = 'Kubernetes' _regions: List[clouds.Region] = ['kubernetes'] + _CLOUD_UNSUPPORTED_FEATURES = { + clouds.CloudImplementationFeatures.STOP: 'Kubernetes does not support stopping VMs.', + clouds.CloudImplementationFeatures.AUTOSTOP: 'Kubernetes does not support stopping VMs.', + clouds.CloudImplementationFeatures.MULTI_NODE: 'Multi-node is not supported by the Kubernetes implementation yet.', + } + + @classmethod + def _cloud_unsupported_features( + cls) -> Dict[clouds.CloudImplementationFeatures, str]: + return cls._CLOUD_UNSUPPORTED_FEATURES @classmethod def regions(cls) -> List[clouds.Region]: @@ -88,6 +98,11 @@ def __repr__(self): def is_same_cloud(self, other: clouds.Cloud) -> bool: return isinstance(other, Kubernetes) + @classmethod + def get_port(cls, svc_name, namespace): + from sky.skylet.providers.kubernetes.utils import get_port + return get_port(svc_name, namespace) + @classmethod def get_default_instance_type(cls) -> str: return 'cpu1' @@ -178,15 +193,19 @@ def _make(instance_list): def check_credentials(self) -> Tuple[bool, Optional[str]]: # TODO(romilb): Check credential validity using k8s api - return (os.path.exists(os.path.expanduser(f'~/.kube/config')), - "Kubeconfig doesn't exist") + if os.path.exists(os.path.expanduser(f'~/.kube/config')): + return True, None + else: + return False, "Kubeconfig doesn't exist" def get_credential_file_mounts(self) -> Dict[str, str]: - return { - f'~/.kube/{filename}': f'~/.kube/{filename}' - for filename in _CREDENTIAL_FILES - if os.path.exists(os.path.expanduser(f'~/.kube/{filename}')) - } + return {} + # TODO(romilb): Fix the file mounts optimization ('config' here clashes with azure config file) + # return { + # f'~/.kube/{filename}': f'~/.kube/{filename}' + # for filename in _CREDENTIAL_FILES + # if os.path.exists(os.path.expanduser(f'~/.kube/{filename}')) + # } def instance_type_exists(self, instance_type: str) -> bool: return service_catalog.instance_type_exists(instance_type, 'kubernetes') diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py index 926b4e0989a..7861c9b258d 100644 --- a/sky/clouds/service_catalog/kubernetes_catalog.py +++ b/sky/clouds/service_catalog/kubernetes_catalog.py @@ -33,9 +33,10 @@ def instance_type_exists(instance_type: str) -> bool: def validate_region_zone( region: Optional[str], zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]: - if zone is not None: - with ux_utils.print_exception_no_traceback(): - raise ValueError('Kubernetes does not support zones.') + # if zone is not None: + # breakpoint() + # with ux_utils.print_exception_no_traceback(): + # raise ValueError('Kubernetes does not support zones.') return common.validate_region_zone_impl('kubernetes', _df, region, zone) diff --git a/sky/skylet/providers/kubernetes/__init__.py b/sky/skylet/providers/kubernetes/__init__.py index 0d1311c16f9..f3113ec817b 100644 --- a/sky/skylet/providers/kubernetes/__init__.py +++ b/sky/skylet/providers/kubernetes/__init__.py @@ -1,2 +1,2 @@ -from sky.skylet.providers.kubernetes.utils import core_api, log_prefix, networking_api, auth_api +from sky.skylet.providers.kubernetes.utils import core_api, log_prefix, networking_api, auth_api, get_head_ssh_port, get_port from sky.skylet.providers.kubernetes.node_provider import KubernetesNodeProvider diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 4a5d1b60d85..2395c8e1590 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -8,13 +8,12 @@ from kubernetes.client.rest import ApiException -from sky.skylet.providers.kubernetes import core_api, log_prefix, networking_api +from sky.skylet.providers.kubernetes import core_api, log_prefix, networking_api, get_head_ssh_port from sky.skylet.providers.kubernetes.config import ( bootstrap_kubernetes, fillout_resources_kubernetes, ) -from ray.autoscaler._private.command_runner import KubernetesCommandRunner, \ - SSHCommandRunner +from ray.autoscaler._private.command_runner import SSHCommandRunner from ray.autoscaler._private.cli_logger import cli_logger from ray.autoscaler.node_provider import NodeProvider from ray.autoscaler.tags import NODE_KIND_HEAD, TAG_RAY_CLUSTER_NAME, TAG_RAY_NODE_KIND @@ -106,11 +105,11 @@ def external_ip(self, node_id): def external_port(self, node_id): # Extract the NodePort of the head node's SSH service # TODO(romilb): Implement caching here for performance - # TODO(romilb): !!! Service name is hardcoded here !!! - SVC_NAME = 'example-cluster-ray-head-ssh' - head_service = core_api().read_namespaced_service( - SVC_NAME, self.namespace) - return head_service.spec.ports[0].node_port + # + # Node id is str e.g., example-cluster-ray-head-v89lb + cli_logger.print("GETTING HEAD NODE SSH! MULTINODE WOULD FAIL!") + cluster_name = node_id.split('-ray-head')[0] + return get_head_ssh_port(cluster_name, self.namespace) def internal_ip(self, node_id): pod = core_api().read_namespaced_pod(node_id, self.namespace) diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 40de7bbf2a3..0182777b882 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -54,5 +54,13 @@ def custom_objects_api(): return _custom_objects_api +def get_head_ssh_port(cluster_name, namespace): + svc_name = f'{cluster_name}-ray-head-ssh' + return get_port(svc_name, namespace) + +def get_port(svc_name, namespace): + head_service = core_api().read_namespaced_service( + svc_name, namespace) + return head_service.spec.ports[0].node_port log_prefix = "KubernetesNodeProvider: " diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index c8869a91555..50cc938ba56 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -73,11 +73,11 @@ provider: metadata: labels: parent: skypilot - name: example-cluster-ray-head-ssh + name: {{cluster_name}}-ray-head-ssh spec: type: NodePort selector: - component: example-cluster-ray-head + component: {{cluster_name}}-ray-head ports: - protocol: TCP port: 22 @@ -91,11 +91,11 @@ provider: # NOTE: If you're running multiple Ray clusters with services # on one Kubernetes cluster, they must have unique service # names. - name: example-cluster-ray-head + name: {{cluster_name}}-ray-head spec: # This selector must match the head node pod's selector below. selector: - component: example-cluster-ray-head + component: {{cluster_name}}-ray-head ports: - name: client protocol: TCP @@ -125,7 +125,7 @@ available_node_types: labels: parent: skypilot # Automatically generates a name for the pod with this prefix. - generateName: example-cluster-ray-worker- + generateName: {{cluster_name}}-ray-worker- spec: restartPolicy: Never volumes: @@ -175,12 +175,12 @@ available_node_types: kind: Pod metadata: # Automatically generates a name for the pod with this prefix. - generateName: example-cluster-ray-head- + generateName: {{cluster_name}}-ray-head- # Must match the head node service selector above if a head node # service is required. labels: parent: skypilot - component: example-cluster-ray-head + component: {{cluster_name}}-ray-head spec: # Change this if you altered the autoscaler_service_account above # or want to provide your own. diff --git a/sky/utils/command_runner.py b/sky/utils/command_runner.py index 5fd37edf592..2972050554d 100644 --- a/sky/utils/command_runner.py +++ b/sky/utils/command_runner.py @@ -43,11 +43,14 @@ def ssh_options_list(ssh_private_key: Optional[str], ssh_control_name: Optional[str], *, ssh_proxy_command: Optional[str] = None, - timeout: int = 30) -> List[str]: + timeout: int = 30, + port: int = 22) -> List[str]: """Returns a list of sane options for 'ssh'.""" # Forked from Ray SSHOptions: # https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/_private/command_runner.py arg_dict = { + # SSH port + 'Port': port, # Supresses initial fingerprint verification. 'StrictHostKeyChecking': 'no', # SSH IP and fingerprint pairs no longer added to known_hosts. @@ -118,6 +121,7 @@ def __init__( ssh_private_key: str, ssh_control_name: Optional[str] = '__default__', ssh_proxy_command: Optional[str] = None, + port: int = 22, ): """Initialize SSHCommandRunner. @@ -145,6 +149,7 @@ def __init__( None if ssh_control_name is None else hashlib.md5( ssh_control_name.encode()).hexdigest()[:_HASH_MAX_LENGTH]) self._ssh_proxy_command = ssh_proxy_command + self.port = port @staticmethod def make_runner_list( @@ -180,6 +185,7 @@ def _ssh_base_command(self, *, ssh_mode: SshMode, self.ssh_private_key, self.ssh_control_name, ssh_proxy_command=self._ssh_proxy_command, + port = self.port, ) + [f'{self.ssh_user}@{self.ip}'] def run( @@ -331,6 +337,7 @@ def rsync( self.ssh_private_key, self.ssh_control_name, ssh_proxy_command=self._ssh_proxy_command, + port = self.port, )) rsync_command.append(f'-e "ssh {ssh_options}"') # To support spaces in the path, we need to quote source and target. diff --git a/sky/utils/ux_utils.py b/sky/utils/ux_utils.py index 550d8c1f122..8d3ced7cc5a 100644 --- a/sky/utils/ux_utils.py +++ b/sky/utils/ux_utils.py @@ -29,10 +29,10 @@ def print_exception_no_traceback(): if error(): raise ValueError('...') """ - original_tracelimit = getattr(sys, 'tracebacklimit', 1000) - sys.tracebacklimit = 0 + # original_tracelimit = getattr(sys, 'tracebacklimit', 1000) + # sys.tracebacklimit = 0 yield - sys.tracebacklimit = original_tracelimit + # sys.tracebacklimit = original_tracelimit @contextlib.contextmanager From cf1ddecca8ed74edcf4c5e8ab69f6dedccf9ab88 Mon Sep 17 00:00:00 2001 From: Romil Date: Tue, 7 Feb 2023 18:05:12 -0800 Subject: [PATCH 004/183] working provisioning with SkyPilot and ssh config --- sky/backends/cloud_vm_ray_backend.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 8a180d8a8a0..dd0cfd0f730 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2523,7 +2523,9 @@ def _exec_code_on_head( fore = colorama.Fore ssh_credentials = backend_utils.ssh_credential_from_yaml( handle.cluster_yaml) + head_ssh_port = backend_utils.get_head_ssh_port(handle) runner = command_runner.SSHCommandRunner(handle.head_ip, + port=head_ssh_port, **ssh_credentials) with tempfile.NamedTemporaryFile('w', prefix='sky_app_') as fp: fp.write(codegen) From 40aad6dd63cbbf3d1c8bdf0243d73ad8913784ce Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 15 Mar 2023 21:47:57 -0700 Subject: [PATCH 005/183] Updates to master --- Dockerfile_k8s | 9 +++---- sky/backends/cloud_vm_ray_backend.py | 4 +-- sky/clouds/kubernetes.py | 26 +++++++++++++++++++ .../service_catalog/kubernetes_catalog.py | 5 ++++ tests/playground/deployment/run.sh | 4 +-- tests/playground/kind/create_cluster.sh | 1 + 6 files changed, 40 insertions(+), 9 deletions(-) diff --git a/Dockerfile_k8s b/Dockerfile_k8s index 4aa78ec9d3c..bf21d877509 100644 --- a/Dockerfile_k8s +++ b/Dockerfile_k8s @@ -3,7 +3,7 @@ FROM continuumio/miniconda3:4.11.0 # Initialize conda for root user, install ssh and other local dependencies RUN apt update -y && \ - apt install rsync sudo patch openssh-server pciutils nano -y && \ + apt install gcc rsync sudo patch openssh-server pciutils nano -y && \ rm -rf /var/lib/apt/lists/* && \ apt remove -y python3 && \ conda init @@ -24,17 +24,16 @@ RUN useradd -m -s /bin/bash sky && \ USER sky # Install SkyPilot pip dependencies -# Hack: we only install SkyPilot[aws] to optimize build time RUN pip install wheel Click colorama cryptography jinja2 jsonschema && \ pip install networkx oauth2client pandas pendulum PrettyTable && \ - pip install ray[default]==2.01 rich tabulate filelock 'grpcio<=1.43.0' && \ - pip install packaging 'protobuf<4.0.0' psutil pulp && \ + pip install ray==2.3.0 rich tabulate filelock 'grpcio<=1.43.0' && \ + pip install packaging 'protobuf<4.0.0' pulp && \ pip install awscli boto3 pycryptodome==3.12.0 && \ pip install docker # Install SkyPilot. This is purposely separate from installing SkyPilot # dependencies to optimize rebuild time -COPY . /skypilot/sky/ +COPY --chown=sky . /skypilot/sky/ RUN cd /skypilot/ && \ sudo mv -v sky/setup_files/* . && \ diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 38069b5acac..4b72fce7943 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -1854,7 +1854,7 @@ def __init__(self, launched_resources: resources_lib.Resources, stable_internal_external_ips: Optional[List[Tuple[ str, str]]] = None, - stable_ssh_ports: Optional[List[int]] = None, + stable_ssh_ports: Optional[List[int]] = None, tpu_create_script: Optional[str] = None, tpu_delete_script: Optional[str] = None) -> None: self._version = self._VERSION @@ -2374,7 +2374,7 @@ def _get_zone(runner): # to None. self._update_after_cluster_provisioned(handle, task, prev_cluster_status, ip_list, - ssh_port_list, +ssh_port_list, lock_path) return handle diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 080af787d25..8e4b4211e23 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -115,6 +115,32 @@ def get_accelerators_from_instance_type( return service_catalog.get_accelerators_from_instance_type( instance_type, clouds='kubernetes') + @classmethod + def get_vcpus_mem_from_instance_type( + cls, instance_type: str) -> Tuple[Optional[float], Optional[float]]: + """Returns the #vCPUs and memory that the instance type offers.""" + return service_catalog.get_vcpus_mem_from_instance_type(instance_type, + clouds='kubernetes') + + @classmethod + def zones_provision_loop( + cls, + *, + region: str, + num_nodes: int, + instance_type: str, + accelerators: Optional[Dict[str, int]] = None, + use_spot: bool = False, + ) -> Iterator[None]: + del num_nodes # Unused. + regions = cls.regions_with_offering(instance_type, + accelerators, + use_spot=use_spot, + region=region, + zone=None) + for r in regions: + yield r.zones + @classmethod def get_vcpus_from_instance_type( cls, diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py index 7861c9b258d..a666a932c80 100644 --- a/sky/clouds/service_catalog/kubernetes_catalog.py +++ b/sky/clouds/service_catalog/kubernetes_catalog.py @@ -68,6 +68,11 @@ def get_vcpus_from_instance_type(instance_type: str) -> Optional[float]: return common.get_vcpus_from_instance_type_impl(_df, instance_type) +def get_vcpus_mem_from_instance_type( + instance_type: str) -> Tuple[Optional[float], Optional[float]]: + return common.get_vcpus_mem_from_instance_type_impl(_df, + instance_type) + def get_default_instance_type(cpus: Optional[str] = None) -> Optional[str]: if cpus is None: cpus = str(_DEFAULT_NUM_VCPUS) diff --git a/tests/playground/deployment/run.sh b/tests/playground/deployment/run.sh index d19b4f5fc9e..d61b2442274 100644 --- a/tests/playground/deployment/run.sh +++ b/tests/playground/deployment/run.sh @@ -1,6 +1,6 @@ -kubectl create secret generic ssh-key-secret --from-file=ssh-publickey=/home/romilb/.ssh/sky-key.pub +kubectl create secret generic ssh-key-secret --from-file=ssh-publickey=/Users/romilb/.ssh/sky-key.pub kubectl apply -f skypilot_ssh_k8s_deployment.yaml # Use kubectl describe service skypilot-service to get the port of the service kubectl describe service skypilot-service | grep NodePort echo Run the following command to ssh into the container: -echo ssh sky@127.0.0.1 -p port -i ~/.ssh/sky-key +echo ssh sky@127.0.0.1 -p port -i ~/.ssh/sky-key \ No newline at end of file diff --git a/tests/playground/kind/create_cluster.sh b/tests/playground/kind/create_cluster.sh index 5566a24d99c..149b2f80e58 100644 --- a/tests/playground/kind/create_cluster.sh +++ b/tests/playground/kind/create_cluster.sh @@ -1,3 +1,4 @@ +set -e kind delete cluster kind create cluster --config cluster.yaml # Load local skypilot image From 47d09538beb79084c0a0da654f476bd492023e74 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 21 Mar 2023 10:30:13 -0700 Subject: [PATCH 006/183] ray2.3 --- Dockerfile_k8s | 4 ++-- sky/skylet/ray_patches/__init__.py | 4 ++-- sky/templates/kubernetes-ray.yml.j2 | 17 +++++++++-------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/Dockerfile_k8s b/Dockerfile_k8s index bf21d877509..75a373e68ca 100644 --- a/Dockerfile_k8s +++ b/Dockerfile_k8s @@ -1,5 +1,5 @@ # docker build -t skypilot:latest -f Dockerfile_k8s ./sky -FROM continuumio/miniconda3:4.11.0 +FROM continuumio/miniconda3:22.11.1 # Initialize conda for root user, install ssh and other local dependencies RUN apt update -y && \ @@ -29,7 +29,7 @@ RUN pip install wheel Click colorama cryptography jinja2 jsonschema && \ pip install ray==2.3.0 rich tabulate filelock 'grpcio<=1.43.0' && \ pip install packaging 'protobuf<4.0.0' pulp && \ pip install awscli boto3 pycryptodome==3.12.0 && \ - pip install docker + pip install docker kubernetes # Install SkyPilot. This is purposely separate from installing SkyPilot # dependencies to optimize rebuild time diff --git a/sky/skylet/ray_patches/__init__.py b/sky/skylet/ray_patches/__init__.py index 4f745e126c7..2cc16833b31 100644 --- a/sky/skylet/ray_patches/__init__.py +++ b/sky/skylet/ray_patches/__init__.py @@ -63,8 +63,8 @@ def patch() -> None: from ray._private import log_monitor _run_patch(log_monitor.__file__, _to_absolute('log_monitor.py.patch')) - from ray._private import worker - _run_patch(worker.__file__, _to_absolute('worker.py.patch')) + # from ray._private import worker + # _run_patch(worker.__file__, _to_absolute('worker.py.patch')) from ray.dashboard.modules.job import cli _run_patch(cli.__file__, _to_absolute('cli.py.patch')) diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 50cc938ba56..5a94e7cbb16 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -159,7 +159,7 @@ available_node_types: resources: requests: cpu: 1000m - memory: 1024Mi + memory: 2048Mi limits: # The maximum memory that this pod is allowed to use. The # limit will be detected by ray and split to use 10% for @@ -168,7 +168,7 @@ available_node_types: # the object store size is not set manually, ray will # allocate a very large object store in each pod that may # cause problems for other pods. - memory: 1024Mi + memory: 2048Mi head_node: node_config: apiVersion: v1 @@ -228,7 +228,7 @@ available_node_types: resources: requests: cpu: 1000m - memory: 1024Mi + memory: 2048Mi limits: # The maximum memory that this pod is allowed to use. The # limit will be detected by ray and split to use 10% for @@ -237,7 +237,7 @@ available_node_types: # the object store size is not set manually, ray will # allocate a very large object store in each pod that may # cause problems for other pods. - memory: 1024Mi + memory: 2048Mi setup_commands: # Disable `unattended-upgrades` to prevent apt-get from hanging. It should be called at the beginning before the process started to avoid being blocked. (This is a temporary fix.) @@ -260,7 +260,7 @@ setup_commands: (type -a pip | grep -q pip3) || echo 'alias pip=pip3' >> ~/.bashrc; which conda > /dev/null 2>&1 || (wget -nc https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && bash Miniconda3-latest-Linux-x86_64.sh -b && eval "$(~/miniconda3/bin/conda shell.bash hook)" && conda init && conda config --set auto_activate_base true); source ~/.bashrc; - (pip3 list | grep ray | grep {{ray_version}} 2>&1 > /dev/null || pip3 install -U ray[default]=={{ray_version}}) && mkdir -p ~/sky_workdir && mkdir -p ~/.sky/sky_app && touch ~/.sudo_as_admin_successful; + mkdir -p ~/sky_workdir && mkdir -p ~/.sky/sky_app && touch ~/.sudo_as_admin_successful; (pip3 list | grep skypilot && [ "$(cat {{sky_remote_path}}/current_sky_wheel_hash)" == "{{sky_wheel_hash}}" ]) || (pip3 uninstall skypilot -y; pip3 install "$(echo {{sky_remote_path}}/{{sky_wheel_hash}}/skypilot-{{sky_version}}*.whl)" && echo "{{sky_wheel_hash}}" > {{sky_remote_path}}/current_sky_wheel_hash || exit 1); sudo bash -c 'rm -rf /etc/security/limits.d; echo "* soft nofile 1048576" >> /etc/security/limits.conf; echo "* hard nofile 1048576" >> /etc/security/limits.conf'; sudo grep -e '^DefaultTasksMax' /etc/systemd/system.conf || (sudo bash -c 'echo "DefaultTasksMax=infinity" >> /etc/systemd/system.conf'); sudo systemctl set-property user-$(id -u $(whoami)).slice TasksMax=infinity; sudo systemctl daemon-reload; @@ -278,13 +278,15 @@ setup_commands: # Note dashboard-host is set to 0.0.0.0 so that kubernetes can port forward. head_start_ray_commands: # Start skylet daemon. (Should not place it in the head_setup_commands, otherwise it will run before skypilot is installed.) + # WARNING - Ray object store memory is capped at 100 MB. This is to avoid errors during initialization. --object-store-memory=100000000 - ((ps aux | grep -v nohup | grep -v grep | grep -q -- "python3 -m sky.skylet.skylet") || nohup python3 -m sky.skylet.skylet >> ~/.sky/skylet.log 2>&1 &); - ray stop; RAY_SCHEDULER_EVENTS=0 ray start --disable-usage-stats --dashboard-host 0.0.0.0 --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; + ray stop; RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --dashboard-host 0.0.0.0 --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; {%- if num_nodes > 1 %} worker_start_ray_commands: - - ray stop; RAY_SCHEDULER_EVENTS=0 ray start --disable-usage-stats --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; + # WARNING - Ray object store memory is capped at 100 MB. This is to avoid errors during initialization. + - ray stop; RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; {%- else %} worker_start_ray_commands: [] @@ -307,7 +309,6 @@ auth: ssh_private_key: {{ssh_private_key}} # These fields are required for external cloud providers. -setup_commands: [] head_setup_commands: [] worker_setup_commands: [] cluster_synced_files: [] From 9f59467e8b545dbb95cf0fd996149c12eb8e66c5 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 28 Mar 2023 22:25:16 -0700 Subject: [PATCH 007/183] Clean up docs --- Dockerfile_k8s | 4 +++- sky/backends/backend_utils.py | 7 +++++++ sky/clouds/kubernetes.py | 2 ++ .../providers/kubernetes/node_provider.py | 17 ++++++++++++++++- sky/templates/kubernetes-ray.yml.j2 | 8 ++++---- .../deployment/skypilot_ssh_k8s_deployment.yaml | 2 +- 6 files changed, 33 insertions(+), 7 deletions(-) diff --git a/Dockerfile_k8s b/Dockerfile_k8s index 75a373e68ca..4df97dacfde 100644 --- a/Dockerfile_k8s +++ b/Dockerfile_k8s @@ -1,4 +1,6 @@ -# docker build -t skypilot:latest -f Dockerfile_k8s ./sky +# docker build --platform=linux/amd64 -t skypilot:latest -f Dockerfile_k8s ./sky +# docker build --platform=linux/amd64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky +# docker push us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest FROM continuumio/miniconda3:22.11.1 # Initialize conda for root user, install ssh and other local dependencies diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 2d9d1a6eee2..4466c4fef4d 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -811,6 +811,10 @@ def write_cluster_config( assert cluster_name is not None credentials = sky_check.get_cloud_credential_file_mounts() + k8s_image = None + if isinstance(cloud, clouds.Kubernetes): + k8s_image = cloud.IMAGE + ip_list = None auth_config = {'ssh_private_key': auth.PRIVATE_SSH_KEY_PATH} if isinstance(cloud, clouds.Local): @@ -878,6 +882,9 @@ def write_cluster_config( # GCP only: 'gcp_project_id': gcp_project_id, + # Kubernetes only: + 'skypilot_k8s_image': k8s_image, + # Ray version. 'ray_version': constants.SKY_REMOTE_RAY_VERSION, # Cloud credentials for cloud storage. diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 8e4b4211e23..46ee778d27b 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -26,6 +26,8 @@ class Kubernetes(clouds.Cloud): clouds.CloudImplementationFeatures.MULTI_NODE: 'Multi-node is not supported by the Kubernetes implementation yet.', } + IMAGE = 'us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest' + @classmethod def _cloud_unsupported_features( cls) -> Dict[clouds.CloudImplementationFeatures, str]: diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 2395c8e1590..1c3d88b9d5c 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -4,6 +4,7 @@ import subprocess import time from typing import Dict +from urllib.parse import urlparse from uuid import uuid4 from kubernetes.client.rest import ApiException @@ -98,9 +99,23 @@ def get_apiserver_ip() -> str: "server"].split("//")[1].split(":")[0] return api_server_ip + @staticmethod + def get_external_ip_for_nodeport() -> str: + # Return the IP address of the first node with an external IP + nodes = core_api().list_node().items + for node in nodes: + if node.status.addresses: + for address in node.status.addresses: + if address.type == "ExternalIP": + return address.address + # If no external IP is found, use the API server IP + api_host = core_api().api_client.configuration.host + parsed_url = urlparse(api_host) + return parsed_url.hostname + def external_ip(self, node_id): # Extract the IP address of the API server from kubectl - return self.get_apiserver_ip() + return self.get_external_ip_for_nodeport() def external_port(self, node_id): # Extract the NodePort of the head node's SSH service diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 5a94e7cbb16..d843134e044 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -137,8 +137,8 @@ available_node_types: medium: Memory containers: - name: ray-node - imagePullPolicy: Never - image: skypilot:latest + imagePullPolicy: Always + image: {{skypilot_k8s_image}} command: ["/bin/bash", "-c", "--"] args: ["trap : TERM INT; sleep infinity & wait;"] lifecycle: @@ -200,8 +200,8 @@ available_node_types: medium: Memory containers: - name: ray-node - imagePullPolicy: Never - image: skypilot:latest + imagePullPolicy: Always + image: {{skypilot_k8s_image}} # Do not change this command - it keeps the pod alive until it is # explicitly killed. command: ["/bin/bash", "-c", "--"] diff --git a/tests/playground/deployment/skypilot_ssh_k8s_deployment.yaml b/tests/playground/deployment/skypilot_ssh_k8s_deployment.yaml index ed2715d61ab..8929a916d2a 100644 --- a/tests/playground/deployment/skypilot_ssh_k8s_deployment.yaml +++ b/tests/playground/deployment/skypilot_ssh_k8s_deployment.yaml @@ -20,7 +20,7 @@ spec: secretName: ssh-key-secret containers: - name: skypilot - image: skypilot:latest + image: us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest imagePullPolicy: Never env: - name: SECRET_THING From 07f9bcb57af011526cf807213368846dd412af38 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 31 Mar 2023 14:45:27 -0700 Subject: [PATCH 008/183] multiarch build --- Dockerfile_k8s | 18 ++++++++++++++++-- .../playground/deployment/install_dashboard.sh | 5 +++++ 2 files changed, 21 insertions(+), 2 deletions(-) create mode 100644 tests/playground/deployment/install_dashboard.sh diff --git a/Dockerfile_k8s b/Dockerfile_k8s index 4df97dacfde..9b0b093172f 100644 --- a/Dockerfile_k8s +++ b/Dockerfile_k8s @@ -1,6 +1,17 @@ -# docker build --platform=linux/amd64 -t skypilot:latest -f Dockerfile_k8s ./sky +# On M1 Macs, use the following command to build the image: +# docker build --platform=linux/arm64 -t skypilot:latest -f Dockerfile_k8s ./sky +# For amd64, use the following command: # docker build --platform=linux/amd64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky # docker push us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest +# Multiplatform build +# docker buildx build --platform=linux/arm64,linux/amd64 -t skypilot:latest -f Dockerfile_k8s ./sky + +# build both images +# docker buildx build --platform=linux/arm64,linux/amd64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky +# load M1 mac +# docker buildx build --load --platform linux/arm64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky && docker tag us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest skypilot:latest +# push both platforms as one image manifest list +# docker buildx build --push --platform linux/amd64,linux/arm64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky FROM continuumio/miniconda3:22.11.1 # Initialize conda for root user, install ssh and other local dependencies @@ -28,11 +39,14 @@ USER sky # Install SkyPilot pip dependencies RUN pip install wheel Click colorama cryptography jinja2 jsonschema && \ pip install networkx oauth2client pandas pendulum PrettyTable && \ - pip install ray==2.3.0 rich tabulate filelock 'grpcio<=1.43.0' && \ + pip install ray==2.3.0 rich tabulate filelock && \ pip install packaging 'protobuf<4.0.0' pulp && \ pip install awscli boto3 pycryptodome==3.12.0 && \ pip install docker kubernetes +# Add /home/sky/.local/bin/ to PATH +RUN echo 'export PATH="$PATH:$HOME/.local/bin"' >> ~/.bashrc + # Install SkyPilot. This is purposely separate from installing SkyPilot # dependencies to optimize rebuild time COPY --chown=sky . /skypilot/sky/ diff --git a/tests/playground/deployment/install_dashboard.sh b/tests/playground/deployment/install_dashboard.sh new file mode 100644 index 00000000000..0fc8f5b89da --- /dev/null +++ b/tests/playground/deployment/install_dashboard.sh @@ -0,0 +1,5 @@ +kubectl apply -f dashboard.yaml +echo "Dashboard installed, please run 'kubectl proxy' and visit http://localhost:8001/api/v1/namespaces/kubernetes-dashboard/services/https:kubernetes-dashboard:/proxy/#/node?namespace=default" +kubectl proxy + +# kubectl get ns kubernetes-dashboard -o json | jq '.spec.finalizers = []' | kubectl replace --raw "/api/v1/namespaces/kubernetes-dashboard/finalize" -f - \ No newline at end of file From bd12014d48ba361b58d28ebb36ca3c925cf0d12d Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 31 Mar 2023 16:22:56 -0700 Subject: [PATCH 009/183] hacking around ray start --- sky/templates/kubernetes-ray.yml.j2 | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index d843134e044..755b6262816 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -158,7 +158,7 @@ available_node_types: name: dshm resources: requests: - cpu: 1000m + cpu: 1 memory: 2048Mi limits: # The maximum memory that this pod is allowed to use. The @@ -267,6 +267,10 @@ setup_commands: mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n" >> ~/.ssh/config; python3 -c "from sky.skylet.ray_patches import patch; patch()" || exit 1; [ -f /etc/fuse.conf ] && sudo sed -i 's/#user_allow_other/user_allow_other/g' /etc/fuse.conf || (sudo sh -c 'echo "user_allow_other" > /etc/fuse.conf'); + echo Starting ray in setup commands; + ((ps aux | grep -v nohup | grep -v grep | grep -q -- "python3 -m sky.skylet.skylet") || nohup python3 -m sky.skylet.skylet >> ~/.sky/skylet.log 2>&1 &); + ray stop; RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --dashboard-host 0.0.0.0 --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; + which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; # Command to start ray on the head node. You don't need to change this. # NOTE: these are very performance-sensitive. Each new item opens/closes an SSH @@ -279,14 +283,15 @@ setup_commands: head_start_ray_commands: # Start skylet daemon. (Should not place it in the head_setup_commands, otherwise it will run before skypilot is installed.) # WARNING - Ray object store memory is capped at 100 MB. This is to avoid errors during initialization. --object-store-memory=100000000 - - ((ps aux | grep -v nohup | grep -v grep | grep -q -- "python3 -m sky.skylet.skylet") || nohup python3 -m sky.skylet.skylet >> ~/.sky/skylet.log 2>&1 &); - ray stop; RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --dashboard-host 0.0.0.0 --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; - which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; + #- ((ps aux | grep -v nohup | grep -v grep | grep -q -- "python3 -m sky.skylet.skylet") || nohup python3 -m sky.skylet.skylet >> ~/.sky/skylet.log 2>&1 &); + # ray stop; echo hiiiiii $(nproc); RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --dashboard-host 0.0.0.0 --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; + # which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; + - echo "For some reason ray does not honor numcpus when specified here. It is empty in ray.available_resources. So we do it in the setup."; {%- if num_nodes > 1 %} worker_start_ray_commands: # WARNING - Ray object store memory is capped at 100 MB. This is to avoid errors during initialization. - - ray stop; RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; + - ray stop; echo hi $(nproc); RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; {%- else %} worker_start_ray_commands: [] From 4baf0b685e9fbeba320b0686c7ece231a3a9d637 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 3 Apr 2023 09:06:32 -0700 Subject: [PATCH 010/183] more port fixes --- sky/backends/cloud_vm_ray_backend.py | 9 ++++++--- sky/utils/command_runner.py | 6 +++++- sky/utils/ux_utils.py | 6 +++--- 3 files changed, 14 insertions(+), 7 deletions(-) diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 4b72fce7943..16a3c45071a 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2441,6 +2441,7 @@ def _sync_workdir(self, handle: CloudVmRayResourceHandle, fore = colorama.Fore style = colorama.Style ip_list = handle.external_ips() + port_list = handle.external_ssh_ports() assert ip_list is not None, 'external_ips is not cached in handle' full_workdir = os.path.abspath(os.path.expanduser(workdir)) @@ -2470,7 +2471,7 @@ def _sync_workdir(self, handle: CloudVmRayResourceHandle, # TODO(zhwu): refactor this with backend_utils.parallel_cmd_with_rsync runners = command_runner.SSHCommandRunner.make_runner_list( - ip_list, **ssh_credentials) + ip_list, port_list=port_list, **ssh_credentials) def _sync_workdir_node(runner: command_runner.SSHCommandRunner) -> None: runner.rsync( @@ -2522,6 +2523,7 @@ def _setup(self, handle: CloudVmRayResourceHandle, task: task_lib.Task, setup_file = os.path.basename(setup_sh_path) # Sync the setup script up and run it. ip_list = handle.external_ips() + port_list = handle.external_ssh_ports() assert ip_list is not None, 'external_ips is not cached in handle' ssh_credentials = backend_utils.ssh_credential_from_yaml( handle.cluster_yaml) @@ -2530,7 +2532,7 @@ def _setup(self, handle: CloudVmRayResourceHandle, task: task_lib.Task, # forwarding. ssh_credentials.pop('ssh_control_name') runners = command_runner.SSHCommandRunner.make_runner_list( - ip_list, **ssh_credentials) + ip_list, port_list=port_list, **ssh_credentials) # Need this `-i` option to make sure `source ~/.bashrc` work setup_cmd = f'/bin/bash -i /tmp/{setup_file} 2>&1' @@ -3435,11 +3437,12 @@ def _execute_file_mounts(self, handle: CloudVmRayResourceHandle, logger.info(f'{fore.CYAN}Processing file mounts.{style.RESET_ALL}') start = time.time() ip_list = handle.external_ips() + port_list = handle.external_ssh_ports() assert ip_list is not None, 'external_ips is not cached in handle' ssh_credentials = backend_utils.ssh_credential_from_yaml( handle.cluster_yaml) runners = command_runner.SSHCommandRunner.make_runner_list( - ip_list, **ssh_credentials) + ip_list, port_list=port_list, **ssh_credentials) log_path = os.path.join(self.log_dir, 'file_mounts.log') # Check the files and warn diff --git a/sky/utils/command_runner.py b/sky/utils/command_runner.py index f17085628bb..a8c3948ee88 100644 --- a/sky/utils/command_runner.py +++ b/sky/utils/command_runner.py @@ -158,11 +158,15 @@ def make_runner_list( ssh_private_key: str, ssh_control_name: Optional[str] = None, ssh_proxy_command: Optional[str] = None, + port_list: Optional[List[int]] = None, ) -> List['SSHCommandRunner']: """Helper function for creating runners with the same ssh credentials""" + if not port_list: + port_list = [22] * len(ip_list) return [ SSHCommandRunner(ip, ssh_user, ssh_private_key, ssh_control_name, - ssh_proxy_command) for ip in ip_list + ssh_proxy_command, port) for ip, port in zip( + ip_list, port_list) ] def _ssh_base_command(self, *, ssh_mode: SshMode, diff --git a/sky/utils/ux_utils.py b/sky/utils/ux_utils.py index 47ea03920cc..c25afebcd6a 100644 --- a/sky/utils/ux_utils.py +++ b/sky/utils/ux_utils.py @@ -28,7 +28,7 @@ def print_exception_no_traceback(): if error(): raise ValueError('...') """ - original_tracelimit = getattr(sys, 'tracebacklimit', 1000) - sys.tracebacklimit = 0 + # original_tracelimit = getattr(sys, 'tracebacklimit', 1000) + # sys.tracebacklimit = 0 yield - sys.tracebacklimit = original_tracelimit + # sys.tracebacklimit = original_tracelimit From 7ed02ebba82f76e19042c079229a76a33d066af4 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 8 Jun 2023 15:12:52 -0700 Subject: [PATCH 011/183] fix up default instance selection --- sky/backends/backend_utils.py | 2 +- sky/backends/cloud_vm_ray_backend.py | 51 ++----------------- sky/clouds/kubernetes.py | 23 +++++++-- .../service_catalog/kubernetes_catalog.py | 22 +++++--- tests/playground/kind/create_cluster.sh | 2 + 5 files changed, 40 insertions(+), 60 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 4012958ea56..451d264f7e3 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -1608,7 +1608,7 @@ def get_head_ssh_port( # This happens for INIT clusters (e.g., exit 1 in setup). with ux_utils.print_exception_no_traceback(): raise ValueError( - 'Cluster\'s head SSH oirt not found; is it up? To fix: ' + 'Cluster\'s head SSH port not found; is it up? To fix: ' 'run a successful launch first (`sky launch`) to ensure' ' the cluster status is UP (`sky status`).') head_ssh_port = handle.head_ssh_port diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 758a1a11145..21a297f9460 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2839,30 +2839,8 @@ def _exec_code_on_head( mkdir_code = (f'{cd} && mkdir -p {remote_log_dir} && ' f'touch {remote_log_path}') code = job_lib.JobLibCodeGen.queue_job(job_id, job_submit_cmd) - job_submit_cmd = mkdir_code + ' && ' + code - if spot_task is not None: - # Add the spot job to spot queue table. - resources_str = backend_utils.get_task_resources_str(spot_task) - spot_codegen = spot_lib.SpotCodeGen() - spot_name = spot_task.name - assert spot_name is not None, spot_task - spot_code = spot_codegen.set_pending(job_id, spot_name, - resources_str) - # Set the spot job to PENDING state to make sure that this spot - # job appears in the `sky spot queue`, when there are already 16 - # controller process jobs running on the controller VM with 8 - # CPU cores. - # The spot job should be set to PENDING state *after* the - # controller process job has been queued, as our skylet on spot - # controller will set the spot job in FAILED state if the - # controller process job does not exist. - # We cannot set the spot job to PENDING state in the codegen for - # the controller process job, as it will stay in the job pending - # table and not be executed until there is an empty slot. - job_submit_cmd = job_submit_cmd + ' && ' + spot_code - returncode, stdout, stderr = self.run_on_head(handle, job_submit_cmd, stream_logs=False, @@ -3644,38 +3622,19 @@ def run_on_head( *, port_forward: Optional[List[int]] = None, log_path: str = '/dev/null', + process_stream: bool = True, stream_logs: bool = False, + use_cached_head_ip: bool = True, ssh_mode: command_runner.SshMode = command_runner.SshMode. NON_INTERACTIVE, under_remote_workdir: bool = False, require_outputs: bool = False, separate_stderr: bool = False, - process_stream: bool = True, **kwargs, ) -> Union[int, Tuple[int, str, str]]: - """Runs 'cmd' on the cluster's head node. - - Args: - handle: The ResourceHandle to the cluster. - cmd: The command to run. - - Advanced options: - - port_forward: A list of ports to forward. - log_path: The path to the log file. - stream_logs: Whether to stream the logs to stdout/stderr. - ssh_mode: The mode to use for ssh. - See command_runner.SSHCommandRunner.SSHMode for more details. - under_remote_workdir: Whether to run the command under the remote - workdir ~/sky_workdir. - require_outputs: Whether to return the stdout and stderr of the - command. - separate_stderr: Whether to separate stderr from stdout. - process_stream: Whether to post-process the stdout/stderr of the - command, such as replacing or skipping lines on the fly. If - enabled, lines are printed only when '\r' or '\n' is found. - """ - head_ip = backend_utils.get_head_ip(handle, _FETCH_IP_MAX_ATTEMPTS) + """Runs 'cmd' on the cluster's head node.""" + head_ip = backend_utils.get_head_ip(handle, + _FETCH_IP_MAX_ATTEMPTS) head_ssh_port = backend_utils.get_head_ssh_port(handle, use_cached_head_ip, _FETCH_IP_MAX_ATTEMPTS) ssh_credentials = backend_utils.ssh_credential_from_yaml( diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 46ee778d27b..7e75243b96b 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -106,8 +106,15 @@ def get_port(cls, svc_name, namespace): return get_port(svc_name, namespace) @classmethod - def get_default_instance_type(cls) -> str: - return 'cpu1' + def get_default_instance_type( + cls, + cpus: Optional[str] = None, + memory: Optional[str] = None, + disk_tier: Optional[str] = None) -> Optional[str]: + return service_catalog.get_default_instance_type(cpus=cpus, + memory=memory, + disk_tier=disk_tier, + clouds='kubernetes') @classmethod def get_accelerators_from_instance_type( @@ -201,9 +208,15 @@ def _make(instance_list): # Currently, handle a filter on accelerators only. accelerators = resources.accelerators if accelerators is None: - # No requirements to filter, so just return a default VM type. - return (_make([Kubernetes.get_default_instance_type()]), - fuzzy_candidate_list) + # Return a default instance type with the given number of vCPUs. + default_instance_type = Kubernetes.get_default_instance_type( + cpus=resources.cpus, + memory=resources.memory, + disk_tier=resources.disk_tier) + if default_instance_type is None: + return ([], []) + else: + return (_make([default_instance_type]), []) assert len(accelerators) == 1, resources acc, acc_count = list(accelerators.items())[0] diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py index a666a932c80..948512e2bb6 100644 --- a/sky/clouds/service_catalog/kubernetes_catalog.py +++ b/sky/clouds/service_catalog/kubernetes_catalog.py @@ -23,6 +23,7 @@ logger = sky_logging.init_logger(__name__) _DEFAULT_NUM_VCPUS = 1 +_DEFAULT_MEMORY_CPU_RATIO = 1 _DEFAULT_INSTANCE_TYPE = 'cpu1' _df = common.read_catalog('kubernetes/vms.csv') @@ -73,15 +74,20 @@ def get_vcpus_mem_from_instance_type( return common.get_vcpus_mem_from_instance_type_impl(_df, instance_type) -def get_default_instance_type(cpus: Optional[str] = None) -> Optional[str]: - if cpus is None: - cpus = str(_DEFAULT_NUM_VCPUS) - df = _df[_df['InstanceType'].eq(_DEFAULT_INSTANCE_TYPE)] - instance = common.get_instance_type_for_cpus_impl(df, cpus) - if not instance: - instance = common.get_instance_type_for_cpus_impl(_df, cpus) - return instance +def get_default_instance_type(cpus: Optional[str] = None, + memory: Optional[str] = None, + disk_tier: Optional[str] = None) -> Optional[str]: + del disk_tier # unused + if cpus is None and memory is None: + cpus = f'{_DEFAULT_NUM_VCPUS}+' + + if memory is None: + memory_gb_or_ratio = f'{_DEFAULT_MEMORY_CPU_RATIO}x' + else: + memory_gb_or_ratio = memory + return common.get_instance_type_for_cpus_mem_impl(_df, cpus, + memory_gb_or_ratio) def get_accelerators_from_instance_type( instance_type: str) -> Optional[Dict[str, int]]: diff --git a/tests/playground/kind/create_cluster.sh b/tests/playground/kind/create_cluster.sh index 149b2f80e58..54a4adad583 100644 --- a/tests/playground/kind/create_cluster.sh +++ b/tests/playground/kind/create_cluster.sh @@ -1,3 +1,5 @@ +# Be sure to have built the latest image before running this script +# docker buildx build --load --platform linux/arm64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky && docker tag us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest skypilot:latest && docker push us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest set -e kind delete cluster kind create cluster --config cluster.yaml From 898a85149d49231c494d314598c492661e293e90 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 8 Jun 2023 16:09:54 -0700 Subject: [PATCH 012/183] fix resource selection --- sky/clouds/kubernetes.py | 10 +++++++++- sky/templates/kubernetes-ray.yml.j2 | 14 ++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 7e75243b96b..c358fdff3b0 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -1,5 +1,6 @@ import json import os +import sys import typing from typing import Dict, Iterator, List, Optional, Tuple @@ -177,11 +178,18 @@ def make_deploy_resources_variables( else: custom_resources = None - # TODO: return number of CPUs and memory here + # TODO: Resources.memory and resources.cpus are None if they are not explicitly set. + # We fetch the default values for the instance type in that case. + cpus, mem = service_catalog.get_vcpus_mem_from_instance_type(resources.instance_type, clouds='kubernetes') + # Convert to int + cpus = int(cpus) + mem = int(mem) return { 'instance_type': resources.instance_type, 'custom_resources': custom_resources, 'region': region.name, + 'cpus': cpus, + 'memory': mem } def get_feasible_launchable_resources(self, diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 755b6262816..3fa1ba17ba6 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -158,8 +158,8 @@ available_node_types: name: dshm resources: requests: - cpu: 1 - memory: 2048Mi + cpu: {{cpus}} + memory: {{memory}}G limits: # The maximum memory that this pod is allowed to use. The # limit will be detected by ray and split to use 10% for @@ -168,7 +168,8 @@ available_node_types: # the object store size is not set manually, ray will # allocate a very large object store in each pod that may # cause problems for other pods. - memory: 2048Mi + cpu: {{cpus}} + memory: {{memory}}G head_node: node_config: apiVersion: v1 @@ -227,8 +228,8 @@ available_node_types: command: ["/bin/bash", "-c", "mkdir -p ~/.ssh && cp /etc/secret-volume/ssh-publickey ~/.ssh/authorized_keys && sudo service ssh restart"] resources: requests: - cpu: 1000m - memory: 2048Mi + cpu: {{cpus}} + memory: {{memory}}G limits: # The maximum memory that this pod is allowed to use. The # limit will be detected by ray and split to use 10% for @@ -237,7 +238,8 @@ available_node_types: # the object store size is not set manually, ray will # allocate a very large object store in each pod that may # cause problems for other pods. - memory: 2048Mi + cpu: {{cpus}} + memory: {{memory}}G setup_commands: # Disable `unattended-upgrades` to prevent apt-get from hanging. It should be called at the beginning before the process started to avoid being blocked. (This is a temporary fix.) From fcb51d119bd4fcf1b83237d49def82a1bc13bf2d Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 8 Jun 2023 18:06:37 -0700 Subject: [PATCH 013/183] Add provisioning timeout by checking if pods are ready --- sky/skylet/providers/kubernetes/__init__.py | 2 +- .../providers/kubernetes/node_provider.py | 27 ++++++++++++++++++- sky/skylet/providers/kubernetes/utils.py | 4 +++ 3 files changed, 31 insertions(+), 2 deletions(-) diff --git a/sky/skylet/providers/kubernetes/__init__.py b/sky/skylet/providers/kubernetes/__init__.py index f3113ec817b..984f6ddc816 100644 --- a/sky/skylet/providers/kubernetes/__init__.py +++ b/sky/skylet/providers/kubernetes/__init__.py @@ -1,2 +1,2 @@ -from sky.skylet.providers.kubernetes.utils import core_api, log_prefix, networking_api, auth_api, get_head_ssh_port, get_port +from sky.skylet.providers.kubernetes.utils import core_api, log_prefix, networking_api, auth_api, get_head_ssh_port, get_port, KubernetesError from sky.skylet.providers.kubernetes.node_provider import KubernetesNodeProvider diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 1c3d88b9d5c..7d65308bb88 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -9,7 +9,7 @@ from kubernetes.client.rest import ApiException -from sky.skylet.providers.kubernetes import core_api, log_prefix, networking_api, get_head_ssh_port +from sky.skylet.providers.kubernetes import core_api, log_prefix, networking_api, get_head_ssh_port, KubernetesError from sky.skylet.providers.kubernetes.config import ( bootstrap_kubernetes, fillout_resources_kubernetes, @@ -238,6 +238,31 @@ def create_node(self, node_config, tags, count): ) networking_api().create_namespaced_ingress(self.namespace, ingress_spec) + # Wait for all pods to be ready, and if it exceeds the timeout, raise an + # exception. + + # TODO(romilb): Figure out a way to make this timeout configurable. + TIMEOUT = 30 + start = time.time() + while True: + if time.time() - start > TIMEOUT: + raise KubernetesError( + "Timed out while waiting for nodes to start. Cluster may be out of resources or may be too slow to autoscale." + ) + all_ready = True + for pod in new_nodes: + pod = core_api().read_namespaced_pod(pod.metadata.name, self.namespace) + if pod.status.phase != "Running": + all_ready = False + break + if all_ready: + break + else: + time.sleep(1) + + + + def terminate_node(self, node_id): logger.info(log_prefix + "calling delete_namespaced_pod") try: diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 0182777b882..dbd2b6a8a4c 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -64,3 +64,7 @@ def get_port(svc_name, namespace): return head_service.spec.ports[0].node_port log_prefix = "KubernetesNodeProvider: " + + +class KubernetesError(Exception): + pass \ No newline at end of file From 13eb198d4556d924e58b17936c400a2681fd730d Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 9 Jun 2023 11:44:26 -0700 Subject: [PATCH 014/183] Working mounting --- Dockerfile_k8s | 2 +- sky/backends/cloud_vm_ray_backend.py | 3 ++- sky/clouds/service_catalog/kubernetes_catalog.py | 4 ++-- sky/skylet/providers/kubernetes/node_provider.py | 2 +- sky/templates/kubernetes-ray.yml.j2 | 16 ++++++++++++++-- tests/playground/kind/create_cluster.sh | 4 ++-- 6 files changed, 22 insertions(+), 9 deletions(-) diff --git a/Dockerfile_k8s b/Dockerfile_k8s index 9b0b093172f..0151c447d82 100644 --- a/Dockerfile_k8s +++ b/Dockerfile_k8s @@ -16,7 +16,7 @@ FROM continuumio/miniconda3:22.11.1 # Initialize conda for root user, install ssh and other local dependencies RUN apt update -y && \ - apt install gcc rsync sudo patch openssh-server pciutils nano -y && \ + apt install gcc rsync sudo patch openssh-server pciutils nano fuse -y && \ rm -rf /var/lib/apt/lists/* && \ apt remove -y python3 && \ conda init diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 21a297f9460..ea953abaef1 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -3902,11 +3902,12 @@ def _execute_storage_mounts(self, handle: CloudVmRayResourceHandle, f'storage mount{plural}.{style.RESET_ALL}') start = time.time() ip_list = handle.external_ips() + port_list = handle.external_ssh_ports() assert ip_list is not None, 'external_ips is not cached in handle' ssh_credentials = backend_utils.ssh_credential_from_yaml( handle.cluster_yaml) runners = command_runner.SSHCommandRunner.make_runner_list( - ip_list, **ssh_credentials) + ip_list, port_list=port_list, **ssh_credentials) log_path = os.path.join(self.log_dir, 'storage_mounts.log') for dst, storage_obj in storage_mounts.items(): diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py index 948512e2bb6..b7da3934857 100644 --- a/sky/clouds/service_catalog/kubernetes_catalog.py +++ b/sky/clouds/service_catalog/kubernetes_catalog.py @@ -22,9 +22,9 @@ logger = sky_logging.init_logger(__name__) -_DEFAULT_NUM_VCPUS = 1 +_DEFAULT_NUM_VCPUS = 4 _DEFAULT_MEMORY_CPU_RATIO = 1 -_DEFAULT_INSTANCE_TYPE = 'cpu1' +_DEFAULT_INSTANCE_TYPE = 'cpu4' _df = common.read_catalog('kubernetes/vms.csv') diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 7d65308bb88..e6c382f249a 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -242,7 +242,7 @@ def create_node(self, node_config, tags, count): # exception. # TODO(romilb): Figure out a way to make this timeout configurable. - TIMEOUT = 30 + TIMEOUT = 60 start = time.time() while True: if time.time() - start > TIMEOUT: diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 3fa1ba17ba6..27773320909 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -135,6 +135,9 @@ available_node_types: - name: dshm emptyDir: medium: Memory + - name: dev-fuse # Required for fuse mounting + hostPath: + path: /dev/fuse containers: - name: ray-node imagePullPolicy: Always @@ -156,6 +159,10 @@ available_node_types: mountPath: "/etc/secret-volume" - mountPath: /dev/shm name: dshm + - mountPath: /dev/fuse # Required for fuse mounting + name: dev-fuse + securityContext: # Required for FUSE mounting, but may be a security risk + privileged: true resources: requests: cpu: {{cpus}} @@ -199,6 +206,9 @@ available_node_types: - name: dshm emptyDir: medium: Memory + - name: dev-fuse # Required for fuse mounting + hostPath: + path: /dev/fuse containers: - name: ray-node imagePullPolicy: Always @@ -222,6 +232,10 @@ available_node_types: mountPath: "/etc/secret-volume" - mountPath: /dev/shm name: dshm + - mountPath: /dev/fuse # Required for fuse mounting + name: dev-fuse + securityContext: # Required for FUSE mounting, but may be a security risk + privileged: true lifecycle: postStart: exec: @@ -251,8 +265,6 @@ setup_commands: # Line 'python3 -c ..': patch the buggy ray files and enable `-o allow_other` option for `goofys` - sudo systemctl stop unattended-upgrades || true; sudo systemctl disable unattended-upgrades || true; - sudo sed -i 's/Unattended-Upgrade "1"/Unattended-Upgrade "0"/g' /etc/apt/apt.conf.d/20auto-upgrades || true; - sudo kill -9 `sudo lsof /var/lib/dpkg/lock-frontend | awk '{print $2}' | tail -n 1` || true; sudo pkill -9 apt-get; sudo pkill -9 dpkg; sudo dpkg --configure -a; diff --git a/tests/playground/kind/create_cluster.sh b/tests/playground/kind/create_cluster.sh index 54a4adad583..47c0148fcaa 100644 --- a/tests/playground/kind/create_cluster.sh +++ b/tests/playground/kind/create_cluster.sh @@ -1,7 +1,7 @@ # Be sure to have built the latest image before running this script -# docker buildx build --load --platform linux/arm64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky && docker tag us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest skypilot:latest && docker push us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest +# docker buildx build --load --platform linux/amd64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky && docker tag us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest skypilot:latest && docker push us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest set -e kind delete cluster kind create cluster --config cluster.yaml # Load local skypilot image -kind load docker-image skypilot:latest +kind load docker-image us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest From 428f143af7425eb94d88edfb6798605d23a3d4db Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 13 Jun 2023 14:50:25 -0700 Subject: [PATCH 015/183] Remove catalog --- sky/clouds/kubernetes.py | 123 +++++++++--------- .../service_catalog/kubernetes_catalog.py | 5 - 2 files changed, 60 insertions(+), 68 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index c358fdff3b0..8837ef001d3 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -19,8 +19,10 @@ @clouds.CLOUD_REGISTRY.register class Kubernetes(clouds.Cloud): + _DEFAULT_NUM_VCPUS = 4 + _DEFAULT_MEMORY_CPU_RATIO = 1 _REPR = 'Kubernetes' - _regions: List[clouds.Region] = ['kubernetes'] + _regions: List[clouds.Region] = [clouds.Region('kubernetes')] _CLOUD_UNSUPPORTED_FEATURES = { clouds.CloudImplementationFeatures.STOP: 'Kubernetes does not support stopping VMs.', clouds.CloudImplementationFeatures.AUTOSTOP: 'Kubernetes does not support stopping VMs.', @@ -43,19 +45,8 @@ def regions_with_offering(cls, instance_type: Optional[str], accelerators: Optional[Dict[str, int]], use_spot: bool, region: Optional[str], zone: Optional[str]) -> List[clouds.Region]: - del accelerators, zone # unused - if use_spot: - return [] - if instance_type is None: - # Fall back to default regions - regions = cls.regions() - else: - regions = service_catalog.get_region_zones_for_instance_type( - instance_type, use_spot, 'kubernetes') - - if region is not None: - regions = [r for r in regions if r.name == region] - return regions + # No notion of regions in Kubernetes - return a single region. + return cls.regions() @classmethod def region_zones_provision_loop( @@ -65,12 +56,8 @@ def region_zones_provision_loop( accelerators: Optional[Dict[str, int]] = None, use_spot: bool = False, ) -> Iterator[Tuple[clouds.Region, List[clouds.Zone]]]: - regions = cls.regions_with_offering(instance_type, - accelerators, - use_spot, - region=None, - zone=None) - for region in regions: + # No notion of regions in Kubernetes - return a single region. + for region in cls.regions(): yield region, region.zones def instance_type_to_hourly_cost(self, @@ -78,11 +65,8 @@ def instance_type_to_hourly_cost(self, use_spot: bool, region: Optional[str] = None, zone: Optional[str] = None) -> float: - return service_catalog.get_hourly_cost(instance_type, - use_spot=use_spot, - region=region, - zone=zone, - clouds='kubernetes') + # Assume zero cost for Kubernetes clusters + return 0.0 def accelerators_to_hourly_cost(self, accelerators: Dict[str, int], @@ -112,25 +96,37 @@ def get_default_instance_type( cpus: Optional[str] = None, memory: Optional[str] = None, disk_tier: Optional[str] = None) -> Optional[str]: - return service_catalog.get_default_instance_type(cpus=cpus, - memory=memory, - disk_tier=disk_tier, - clouds='kubernetes') + del disk_tier # Unused. + virtual_instance_type = '' + if cpus is not None: + virtual_instance_type += f'{cpus}vCPU-' + else: + virtual_instance_type += f'{cls._DEFAULT_NUM_VCPUS}vCPU' + if memory is not None: + virtual_instance_type += f'{memory}GB' + else: + virtual_instance_type += f'{cls._DEFAULT_NUM_VCPUS * cls._DEFAULT_MEMORY_CPU_RATIO}GB' + return virtual_instance_type + @classmethod def get_accelerators_from_instance_type( cls, instance_type: str, ) -> Optional[Dict[str, int]]: - return service_catalog.get_accelerators_from_instance_type( - instance_type, clouds='kubernetes') + # TODO(romilb): Add GPU support. + return None @classmethod def get_vcpus_mem_from_instance_type( cls, instance_type: str) -> Tuple[Optional[float], Optional[float]]: """Returns the #vCPUs and memory that the instance type offers.""" - return service_catalog.get_vcpus_mem_from_instance_type(instance_type, - clouds='kubernetes') + vcpus = cls.get_vcpus_from_instance_type(instance_type) + mem = cls.get_mem_from_instance_type(instance_type) + return vcpus, mem + + + @classmethod def zones_provision_loop( @@ -143,12 +139,7 @@ def zones_provision_loop( use_spot: bool = False, ) -> Iterator[None]: del num_nodes # Unused. - regions = cls.regions_with_offering(instance_type, - accelerators, - use_spot=use_spot, - region=region, - zone=None) - for r in regions: + for r in cls.regions(): yield r.zones @classmethod @@ -156,8 +147,21 @@ def get_vcpus_from_instance_type( cls, instance_type: str, ) -> Optional[float]: - return service_catalog.get_vcpus_from_instance_type(instance_type, - clouds='kubernetes') + """Returns the #vCPUs that the instance type offers.""" + if instance_type is None: + return None + # TODO(romilb): Better parsing + return float(instance_type.split('vCPU')[0]) + + @classmethod + def get_mem_from_instance_type( + cls, + instance_type: str, + ) -> Optional[float]: + """Returns the memory that the instance type offers.""" + if instance_type is None: + return None + return float(instance_type.split('vCPU-')[1].split('GB')[0]) @classmethod def get_zone_shell_cmd(cls) -> Optional[str]: @@ -178,10 +182,10 @@ def make_deploy_resources_variables( else: custom_resources = None - # TODO: Resources.memory and resources.cpus are None if they are not explicitly set. - # We fetch the default values for the instance type in that case. - cpus, mem = service_catalog.get_vcpus_mem_from_instance_type(resources.instance_type, clouds='kubernetes') - # Convert to int + # resources.memory and resources.cpus are None if they are not explicitly set. + # We fetch the default values for the instance type in that case. + cpus, mem = self.get_vcpus_mem_from_instance_type(resources.instance_type) + # TODO(romilb): Allow fractional resources here cpus = int(cpus) mem = int(mem) return { @@ -227,18 +231,8 @@ def _make(instance_list): return (_make([default_instance_type]), []) assert len(accelerators) == 1, resources - acc, acc_count = list(accelerators.items())[0] - (instance_list, fuzzy_candidate_list - ) = service_catalog.get_instance_type_for_accelerator( - acc, - acc_count, - use_spot=resources.use_spot, - region=resources.region, - zone=resources.zone, - clouds='kubernetes') - if instance_list is None: - return ([], fuzzy_candidate_list) - return (_make(instance_list), fuzzy_candidate_list) + # TODO(romilb): Add GPU support. + raise NotImplementedError("GPU support not implemented yet.") def check_credentials(self) -> Tuple[bool, Optional[str]]: # TODO(romilb): Check credential validity using k8s api @@ -257,17 +251,20 @@ def get_credential_file_mounts(self) -> Dict[str, str]: # } def instance_type_exists(self, instance_type: str) -> bool: - return service_catalog.instance_type_exists(instance_type, 'kubernetes') + # TODO(romilb): All instance types are supported for now. In the future + # we should check if the instance type is supported by the cluster. + return True def validate_region_zone(self, region: Optional[str], zone: Optional[str]): - return service_catalog.validate_region_zone(region, - zone, - clouds='kubernetes') + # Kubernetes doesn't have regions or zones, so we don't need to validate + return region, zone def accelerator_in_region_or_zone(self, accelerator: str, acc_count: int, region: Optional[str] = None, zone: Optional[str] = None) -> bool: - return service_catalog.accelerator_in_region_or_zone( - accelerator, acc_count, region, zone, 'kubernetes') + # TODO(romilb): All accelerators are marked as available for now. In the + # future, we should return false for accelerators that we know are not + # supported by the cluster. + return True diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py index b7da3934857..9e56fd83735 100644 --- a/sky/clouds/service_catalog/kubernetes_catalog.py +++ b/sky/clouds/service_catalog/kubernetes_catalog.py @@ -22,11 +22,6 @@ logger = sky_logging.init_logger(__name__) -_DEFAULT_NUM_VCPUS = 4 -_DEFAULT_MEMORY_CPU_RATIO = 1 -_DEFAULT_INSTANCE_TYPE = 'cpu4' - -_df = common.read_catalog('kubernetes/vms.csv') def instance_type_exists(instance_type: str) -> bool: return common.instance_type_exists_impl(_df, instance_type) From ebf9d83c67c0f3518384b0f71eed7ca2bd697ef2 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 13 Jun 2023 21:15:49 -0700 Subject: [PATCH 016/183] fixes --- sky/templates/kubernetes-ray.yml.j2 | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 27773320909..c1f6270a5e2 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -281,10 +281,10 @@ setup_commands: mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n" >> ~/.ssh/config; python3 -c "from sky.skylet.ray_patches import patch; patch()" || exit 1; [ -f /etc/fuse.conf ] && sudo sed -i 's/#user_allow_other/user_allow_other/g' /etc/fuse.conf || (sudo sh -c 'echo "user_allow_other" > /etc/fuse.conf'); - echo Starting ray in setup commands; - ((ps aux | grep -v nohup | grep -v grep | grep -q -- "python3 -m sky.skylet.skylet") || nohup python3 -m sky.skylet.skylet >> ~/.sky/skylet.log 2>&1 &); - ray stop; RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --dashboard-host 0.0.0.0 --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; - which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; + # echo Starting ray in setup commands; + # ((ps aux | grep -v nohup | grep -v grep | grep -q -- "python3 -m sky.skylet.skylet") || nohup python3 -m sky.skylet.skylet >> ~/.sky/skylet.log 2>&1 &); + # ray stop; RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --dashboard-host 0.0.0.0 --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; + # which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; # Command to start ray on the head node. You don't need to change this. # NOTE: these are very performance-sensitive. Each new item opens/closes an SSH @@ -297,9 +297,10 @@ setup_commands: head_start_ray_commands: # Start skylet daemon. (Should not place it in the head_setup_commands, otherwise it will run before skypilot is installed.) # WARNING - Ray object store memory is capped at 100 MB. This is to avoid errors during initialization. --object-store-memory=100000000 - #- ((ps aux | grep -v nohup | grep -v grep | grep -q -- "python3 -m sky.skylet.skylet") || nohup python3 -m sky.skylet.skylet >> ~/.sky/skylet.log 2>&1 &); - # ray stop; echo hiiiiii $(nproc); RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --dashboard-host 0.0.0.0 --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; - # which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; + - echo "Running head."; + - ((ps aux | grep -v nohup | grep -v grep | grep -q -- "python3 -m sky.skylet.skylet") || nohup python3 -m sky.skylet.skylet >> ~/.sky/skylet.log 2>&1 &); + ray stop; echo hiiiiii $(nproc); RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --dashboard-host 0.0.0.0 --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; + which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; - echo "For some reason ray does not honor numcpus when specified here. It is empty in ray.available_resources. So we do it in the setup."; {%- if num_nodes > 1 %} From da570fc52144be1d5fd0286999144f6afd7960a3 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 14 Jun 2023 18:33:05 -0700 Subject: [PATCH 017/183] fixes --- Dockerfile_k8s | 2 +- sky/clouds/kubernetes.py | 14 ++++---------- sky/templates/kubernetes-ray.yml.j2 | 19 ++++++++----------- tests/playground/kind/create_cluster.sh | 5 +++-- tests/playground/kind/portmap_gen.py | 2 +- 5 files changed, 17 insertions(+), 25 deletions(-) diff --git a/Dockerfile_k8s b/Dockerfile_k8s index 0151c447d82..3fc306f380b 100644 --- a/Dockerfile_k8s +++ b/Dockerfile_k8s @@ -39,7 +39,7 @@ USER sky # Install SkyPilot pip dependencies RUN pip install wheel Click colorama cryptography jinja2 jsonschema && \ pip install networkx oauth2client pandas pendulum PrettyTable && \ - pip install ray==2.3.0 rich tabulate filelock && \ + pip install ray==2.4.0 rich tabulate filelock && \ pip install packaging 'protobuf<4.0.0' pulp && \ pip install awscli boto3 pycryptodome==3.12.0 && \ pip install docker kubernetes diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 8837ef001d3..279b1a471fd 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -98,14 +98,10 @@ def get_default_instance_type( disk_tier: Optional[str] = None) -> Optional[str]: del disk_tier # Unused. virtual_instance_type = '' - if cpus is not None: - virtual_instance_type += f'{cpus}vCPU-' - else: - virtual_instance_type += f'{cls._DEFAULT_NUM_VCPUS}vCPU' - if memory is not None: - virtual_instance_type += f'{memory}GB' - else: - virtual_instance_type += f'{cls._DEFAULT_NUM_VCPUS * cls._DEFAULT_MEMORY_CPU_RATIO}GB' + n_cpus = cpus if cpus is not None else cls._DEFAULT_NUM_VCPUS + mem = memory if memory is not None else cls._DEFAULT_NUM_VCPUS * cls._DEFAULT_MEMORY_CPU_RATIO + virtual_instance_type += f'{n_cpus}vCPU-' + virtual_instance_type += f'{mem}GB' return virtual_instance_type @@ -126,8 +122,6 @@ def get_vcpus_mem_from_instance_type( return vcpus, mem - - @classmethod def zones_provision_loop( cls, diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index c1f6270a5e2..04cb698b99e 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -281,10 +281,6 @@ setup_commands: mkdir -p ~/.ssh; (grep -Pzo -q "Host \*\n StrictHostKeyChecking no" ~/.ssh/config) || printf "Host *\n StrictHostKeyChecking no\n" >> ~/.ssh/config; python3 -c "from sky.skylet.ray_patches import patch; patch()" || exit 1; [ -f /etc/fuse.conf ] && sudo sed -i 's/#user_allow_other/user_allow_other/g' /etc/fuse.conf || (sudo sh -c 'echo "user_allow_other" > /etc/fuse.conf'); - # echo Starting ray in setup commands; - # ((ps aux | grep -v nohup | grep -v grep | grep -q -- "python3 -m sky.skylet.skylet") || nohup python3 -m sky.skylet.skylet >> ~/.sky/skylet.log 2>&1 &); - # ray stop; RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --dashboard-host 0.0.0.0 --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; - # which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; # Command to start ray on the head node. You don't need to change this. # NOTE: these are very performance-sensitive. Each new item opens/closes an SSH @@ -295,18 +291,19 @@ setup_commands: # current num items (num SSH connections): 2 # Note dashboard-host is set to 0.0.0.0 so that kubernetes can port forward. head_start_ray_commands: - # Start skylet daemon. (Should not place it in the head_setup_commands, otherwise it will run before skypilot is installed.) - # WARNING - Ray object store memory is capped at 100 MB. This is to avoid errors during initialization. --object-store-memory=100000000 - - echo "Running head."; + # Start skylet daemon. (Should not place it in the head_setup_commands, otherwise it will run before sky is installed.) + # NOTE: --disable-usage-stats in `ray start` saves 10 seconds of idle wait. + # Line "which prlimit ..": increase the limit of the number of open files for the raylet process, as the `ulimit` may not take effect at this point, because it requires + # all the sessions to be reloaded. This is a workaround. + - echo "Starting Ray." - ((ps aux | grep -v nohup | grep -v grep | grep -q -- "python3 -m sky.skylet.skylet") || nohup python3 -m sky.skylet.skylet >> ~/.sky/skylet.log 2>&1 &); - ray stop; echo hiiiiii $(nproc); RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --dashboard-host 0.0.0.0 --head --port=6379 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; + ray stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 ray start --disable-usage-stats --head --port={{ray_port}} --dashboard-port={{ray_dashboard_port}} --dashboard-host 0.0.0.0 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1; which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; - - echo "For some reason ray does not honor numcpus when specified here. It is empty in ray.available_resources. So we do it in the setup."; + {{dump_port_command}}; {%- if num_nodes > 1 %} worker_start_ray_commands: - # WARNING - Ray object store memory is capped at 100 MB. This is to avoid errors during initialization. - - ray stop; echo hi $(nproc); RAY_SCHEDULER_EVENTS=0 ray start --num-cpus $(nproc) --disable-usage-stats --address=$RAY_HEAD_IP:6379 --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} || exit 1; + - ray stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 ray start --disable-usage-stats --address=$RAY_HEAD_IP:{{ray_port}} --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1; which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; {%- else %} worker_start_ray_commands: [] diff --git a/tests/playground/kind/create_cluster.sh b/tests/playground/kind/create_cluster.sh index 47c0148fcaa..73990b0b795 100644 --- a/tests/playground/kind/create_cluster.sh +++ b/tests/playground/kind/create_cluster.sh @@ -1,7 +1,8 @@ # Be sure to have built the latest image before running this script -# docker buildx build --load --platform linux/amd64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky && docker tag us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest skypilot:latest && docker push us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest +# If running on apple silicon: +# docker buildx build --load --platform linux/arm64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky && docker tag us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest skypilot:latest && docker push us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest set -e kind delete cluster -kind create cluster --config cluster.yaml +kind create cluster --config kind-cluster.yaml # Load local skypilot image kind load docker-image us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest diff --git a/tests/playground/kind/portmap_gen.py b/tests/playground/kind/portmap_gen.py index 1fd526a7f80..f95da3a71a5 100644 --- a/tests/playground/kind/portmap_gen.py +++ b/tests/playground/kind/portmap_gen.py @@ -5,7 +5,7 @@ - role: control-plane extraPortMappings:""" suffix = """- role: worker""" -with open('cluster.yaml', 'w') as f: +with open('kind-cluster.yaml', 'w') as f: f.write(preamble) for port in range(30000, 32768): f.write(f""" From 1bea86640a97f394c500f4037b3dd563523c08ef Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 14 Jun 2023 21:10:46 -0700 Subject: [PATCH 018/183] Fix ssh-key auth to create unique secrets --- sky/authentication.py | 3 +-- sky/backends/backend_utils.py | 3 +++ sky/clouds/kubernetes.py | 5 +++-- sky/templates/kubernetes-ray.yml.j2 | 4 ++-- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/sky/authentication.py b/sky/authentication.py index c64d2cb615b..1694e20c552 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -428,8 +428,7 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: # Run kubectl command to add the public key to the cluster. public_key_path = os.path.expanduser(PUBLIC_SSH_KEY_PATH) - # TODO(romilb): Change 'ssh-key-secret' to a unique name. - key_label = 'ssh-key-secret' + key_label = clouds.Kubernetes.SKY_SSH_KEY_SECRET_NAME cmd = f"kubectl create secret generic {key_label} --from-file=ssh-publickey={public_key_path}" try: subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 451d264f7e3..e4e71aef765 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -853,8 +853,10 @@ def write_cluster_config( credentials = sky_check.get_cloud_credential_file_mounts() k8s_image = None + ssh_key_secret_name = None if isinstance(cloud, clouds.Kubernetes): k8s_image = cloud.IMAGE + ssh_key_secret_name = cloud.SKY_SSH_KEY_SECRET_NAME ip_list = None auth_config = {'ssh_private_key': auth.PRIVATE_SSH_KEY_PATH} @@ -951,6 +953,7 @@ def write_cluster_config( # Kubernetes only: 'skypilot_k8s_image': k8s_image, + 'ssh_key_secret_name': ssh_key_secret_name, # Port of Ray (GCS server). # Ray's default port 6379 is conflicted with Redis. diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 279b1a471fd..345cbbff20d 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -5,7 +5,7 @@ from typing import Dict, Iterator, List, Optional, Tuple from sky import clouds -from sky.clouds import service_catalog +from sky.utils import common_utils if typing.TYPE_CHECKING: # Renaming to avoid shadowing variables. @@ -19,6 +19,7 @@ @clouds.CLOUD_REGISTRY.register class Kubernetes(clouds.Cloud): + SKY_SSH_KEY_SECRET_NAME = f'sky-ssh-{common_utils.get_user_hash()}' _DEFAULT_NUM_VCPUS = 4 _DEFAULT_MEMORY_CPU_RATIO = 1 _REPR = 'Kubernetes' @@ -99,7 +100,7 @@ def get_default_instance_type( del disk_tier # Unused. virtual_instance_type = '' n_cpus = cpus if cpus is not None else cls._DEFAULT_NUM_VCPUS - mem = memory if memory is not None else cls._DEFAULT_NUM_VCPUS * cls._DEFAULT_MEMORY_CPU_RATIO + mem = memory if memory is not None else n_cpus * cls._DEFAULT_MEMORY_CPU_RATIO virtual_instance_type += f'{n_cpus}vCPU-' virtual_instance_type += f'{mem}GB' return virtual_instance_type diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 04cb698b99e..4e070e85423 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -131,7 +131,7 @@ available_node_types: volumes: - name: secret-volume secret: - secretName: ssh-key-secret + secretName: {{ssh_key_secret_name}} - name: dshm emptyDir: medium: Memory @@ -202,7 +202,7 @@ available_node_types: volumes: - name: secret-volume secret: - secretName: ssh-key-secret + secretName: {{ssh_key_secret_name}} - name: dshm emptyDir: medium: Memory From 9def75681d6d16fdaf060b692d0541ebff64c285 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 15 Jun 2023 13:18:37 -0700 Subject: [PATCH 019/183] Fix for ContainerCreating timeout --- .../providers/kubernetes/node_provider.py | 24 ++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index e6c382f249a..3879a4e9cb7 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -239,7 +239,8 @@ def create_node(self, node_config, tags, count): networking_api().create_namespaced_ingress(self.namespace, ingress_spec) # Wait for all pods to be ready, and if it exceeds the timeout, raise an - # exception. + # exception. If pod's container is ContainerCreating, then we can assume + # that resources have been allocated and we can exit. # TODO(romilb): Figure out a way to make this timeout configurable. TIMEOUT = 60 @@ -250,15 +251,22 @@ def create_node(self, node_config, tags, count): "Timed out while waiting for nodes to start. Cluster may be out of resources or may be too slow to autoscale." ) all_ready = True - for pod in new_nodes: - pod = core_api().read_namespaced_pod(pod.metadata.name, self.namespace) - if pod.status.phase != "Running": - all_ready = False - break + for node in new_nodes: + pod = core_api().read_namespaced_pod(node.metadata.name, self.namespace) + if pod.status.phase == "Pending": + # Check conditions for more detailed status + for condition in pod.status.conditions: + if condition.reason == 'ContainerCreating': + # Container is creating, so we can assume resources + # have been allocated. Safe to exit. + break + else: + # Pod is pending and not in 'ContainerCreating' state + all_ready = False + break if all_ready: break - else: - time.sleep(1) + time.sleep(1) From 65366eb35dbad388025d5a4b6d27d67ea9e5fb3e Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 15 Jun 2023 14:24:52 -0700 Subject: [PATCH 020/183] Fix head node ssh port caching --- sky/backends/backend_utils.py | 11 +++----- sky/backends/cloud_vm_ray_backend.py | 39 +++++++++++++++++++++------- 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 496d1233630..f45caba20bc 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -1603,17 +1603,12 @@ def get_head_ssh_port( max_attempts: int = 1, ) -> str: """Returns the ip of the head node.""" + del max_attempts # Unused. # Use port 22 for everything except Kubernetes + # TODO(romilb): Add a get port method to the cloud classes. if not isinstance(handle.launched_resources.cloud, clouds.Kubernetes): return 22 - if use_cache: - if handle.head_ssh_port is None: - # This happens for INIT clusters (e.g., exit 1 in setup). - with ux_utils.print_exception_no_traceback(): - raise ValueError( - 'Cluster\'s head SSH port not found; is it up? To fix: ' - 'run a successful launch first (`sky launch`) to ensure' - ' the cluster status is UP (`sky status`).') + if use_cache and handle.head_ssh_port is not None: head_ssh_port = handle.head_ssh_port else: # TODO(romilb): Only supports headnode for now! No multinode! diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index f9215578d34..7a7b861e500 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2127,9 +2127,9 @@ def _update_cluster_region(self): self.launched_resources = self.launched_resources.copy(region=region) - def _update_stable_ssh_ports(self): + def _update_stable_ssh_ports(self, max_attempts: int = 1) -> None: if isinstance(self.launched_resources.cloud, clouds.Kubernetes): - head_port = backend_utils.get_head_ssh_port(self, use_cache=False) + head_port = backend_utils.get_head_ssh_port(self, use_cache=False, max_attempts=max_attempts) # TODO(romilb): Multinode doesn't work with Kubernetes yet. worker_ports = [22] * self.launched_nodes ports = [head_port] + worker_ports @@ -2208,7 +2208,7 @@ def external_ssh_ports(self, max_attempts: int = _FETCH_IP_MAX_ATTEMPTS, use_cached_ports: bool = True) -> Optional[List[str]]: if not use_cached_ports: - self._update_stable_ssh_ports() + self._update_stable_ssh_ports(max_attempts=max_attempts) if self.stable_ssh_ports is not None: return self.stable_ssh_ports return None @@ -3656,21 +3656,40 @@ def run_on_head( *, port_forward: Optional[List[int]] = None, log_path: str = '/dev/null', - process_stream: bool = True, stream_logs: bool = False, - use_cached_head_ip: bool = True, ssh_mode: command_runner.SshMode = command_runner.SshMode. NON_INTERACTIVE, under_remote_workdir: bool = False, require_outputs: bool = False, separate_stderr: bool = False, + process_stream: bool = True, **kwargs, ) -> Union[int, Tuple[int, str, str]]: - """Runs 'cmd' on the cluster's head node.""" - head_ip = backend_utils.get_head_ip(handle, - _FETCH_IP_MAX_ATTEMPTS) - head_ssh_port = backend_utils.get_head_ssh_port(handle, use_cached_head_ip, - _FETCH_IP_MAX_ATTEMPTS) + """Runs 'cmd' on the cluster's head node. + + Args: + handle: The ResourceHandle to the cluster. + cmd: The command to run. + + Advanced options: + + port_forward: A list of ports to forward. + log_path: The path to the log file. + stream_logs: Whether to stream the logs to stdout/stderr. + ssh_mode: The mode to use for ssh. + See command_runner.SSHCommandRunner.SSHMode for more details. + under_remote_workdir: Whether to run the command under the remote + workdir ~/sky_workdir. + require_outputs: Whether to return the stdout and stderr of the + command. + separate_stderr: Whether to separate stderr from stdout. + process_stream: Whether to post-process the stdout/stderr of the + command, such as replacing or skipping lines on the fly. If + enabled, lines are printed only when '\r' or '\n' is found. + """ + head_ip = backend_utils.get_head_ip(handle, _FETCH_IP_MAX_ATTEMPTS) + head_ssh_port = backend_utils.get_head_ssh_port(handle, + _FETCH_IP_MAX_ATTEMPTS) ssh_credentials = backend_utils.ssh_credential_from_yaml( handle.cluster_yaml) runner = command_runner.SSHCommandRunner(head_ip, port=head_ssh_port, **ssh_credentials) From b984ead500b5f8acba15284b294511f66153190c Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 15 Jun 2023 16:38:07 -0700 Subject: [PATCH 021/183] mypy --- sky/authentication.py | 4 +- sky/backends/backend_utils.py | 125 ++++------------- sky/backends/cloud_vm_ray_backend.py | 34 +++-- sky/clouds/kubernetes.py | 72 +++++----- sky/clouds/service_catalog/__init__.py | 3 +- .../service_catalog/kubernetes_catalog.py | 129 ------------------ sky/skylet/providers/kubernetes/config.py | 75 +++++----- .../providers/kubernetes/kubectl-rsync.sh | 30 ---- .../providers/kubernetes/node_provider.py | 118 +++++++--------- sky/skylet/providers/kubernetes/utils.py | 8 +- sky/utils/command_runner.py | 11 +- sky/utils/command_runner.pyi | 15 +- tests/playground/kind/portmap_gen.py | 2 - 13 files changed, 186 insertions(+), 440 deletions(-) delete mode 100644 sky/clouds/service_catalog/kubernetes_catalog.py delete mode 100644 sky/skylet/providers/kubernetes/kubectl-rsync.sh diff --git a/sky/authentication.py b/sky/authentication.py index b660f9cc740..a00ad60c635 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -461,7 +461,9 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: output = e.output.decode('utf-8') print(output) if 'already exists' in output: - logger.warning(f'Key {key_label} already exists in Kubernetes cluster, continuing...') + logger.warning( + f'Key {key_label} already exists in Kubernetes cluster, continuing...' + ) pass else: raise e diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index f45caba20bc..ae1a27abf28 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -433,13 +433,8 @@ def _get_generated_config(cls, autogen_comment: str, host_name: str, @classmethod @timeline.FileLockEvent(ssh_conf_lock_path) - def add_cluster( - cls, - cluster_name: str, - ips: List[str], - auth_config: Dict[str, str], - ports: List[int] - ): + def add_cluster(cls, cluster_name: str, ips: List[str], + auth_config: Dict[str, str], ports: List[int]): """Add authentication information for cluster to local SSH config file. If a host with `cluster_name` already exists and the configuration was @@ -532,7 +527,6 @@ def _add_multinode_config( external_worker_ips: List[str], auth_config: Dict[str, str], ): - # TODO(romilb): Make this work with multinode! username = auth_config['ssh_user'] key_path = os.path.expanduser(auth_config['ssh_private_key']) host_name = cluster_name @@ -599,8 +593,13 @@ def _add_multinode_config( host_name = external_worker_ips[idx] logger.warning(f'Using {host_name} to identify host instead.') codegens[idx] = cls._get_generated_config( - sky_autogen_comment, host_name, external_worker_ips[idx], - username, key_path, proxy_command, port = 22) + sky_autogen_comment, + host_name, + external_worker_ips[idx], + username, + key_path, + proxy_command, + port=22) # All workers go to SKY_USER_FILE_PATH/ssh/{cluster_name} for i, line in enumerate(extra_config): @@ -612,15 +611,24 @@ def _add_multinode_config( overwrites[idx] = True overwrite_begin_idxs[idx] = i - 1 codegens[idx] = cls._get_generated_config( - sky_autogen_comment, host_name, external_worker_ips[idx], - username, key_path, proxy_command, port = 22) + sky_autogen_comment, + host_name, + external_worker_ips[idx], + username, + key_path, + proxy_command, + port=22) # This checks if all codegens have been created. for idx, ip in enumerate(external_worker_ips): if not codegens[idx]: - codegens[idx] = cls._get_generated_config( - sky_autogen_comment, worker_names[idx], ip, username, - key_path, proxy_command, port = 22) + codegens[idx] = cls._get_generated_config(sky_autogen_comment, + worker_names[idx], + ip, + username, + key_path, + proxy_command, + port=22) for idx in range(len(external_worker_ips)): # Add (or overwrite) the new config. @@ -1598,12 +1606,12 @@ def get_head_ip( @timeline.event def get_head_ssh_port( - handle: backends.Backend.ResourceHandle, + handle: 'cloud_vm_ray_backend.CloudVmRayResourceHandle', use_cache: bool = True, max_attempts: int = 1, -) -> str: +) -> int: """Returns the ip of the head node.""" - del max_attempts # Unused. + del max_attempts # Unused. # Use port 22 for everything except Kubernetes # TODO(romilb): Add a get port method to the cloud classes. if not isinstance(handle.launched_resources.cloud, clouds.Kubernetes): @@ -1616,86 +1624,6 @@ def get_head_ssh_port( head_ssh_port = clouds.Kubernetes.get_port(svc_name, 'default') return head_ssh_port -def run_command_and_handle_ssh_failure( - runner: command_runner.SSHCommandRunner, - command: str, - failure_message: Optional[str] = None) -> str: - """Runs command remotely and returns output with proper error handling.""" - rc, stdout, stderr = runner.run(command, - require_outputs=True, - stream_logs=False) - if rc == 255: - # SSH failed - raise RuntimeError( - f'SSH with user {runner.ssh_user} and key {runner.ssh_private_key} ' - f'to {runner.ip} failed. This is most likely due to incorrect ' - 'credentials or incorrect permissions for the key file. Check ' - 'your credentials and try again.') - subprocess_utils.handle_returncode(rc, - command, - failure_message, - stderr=stderr) - return stdout - - -def do_filemounts_and_setup_on_local_workers( - cluster_config_file: str, - worker_ips: List[str] = None, - extra_setup_cmds: List[str] = None): - """Completes filemounting and setup on worker nodes. - - Syncs filemounts and runs setup on worker nodes for a local cluster. This - is a workaround for a Ray Autoscaler bug where `ray up` does not perform - filemounting or setup for local cluster worker nodes. - """ - config = common_utils.read_yaml(cluster_config_file) - - ssh_credentials = ssh_credential_from_yaml(cluster_config_file) - if worker_ips is None: - worker_ips = config['provider']['worker_ips'] - file_mounts = config['file_mounts'] - - setup_cmds = config['setup_commands'] - if extra_setup_cmds is not None: - setup_cmds += extra_setup_cmds - setup_script = log_lib.make_task_bash_script('\n'.join(setup_cmds)) - - worker_runners = command_runner.SSHCommandRunner.make_runner_list( - worker_ips, **ssh_credentials) - - # Uploads setup script to the worker node - with tempfile.NamedTemporaryFile('w', prefix='sky_setup_') as f: - f.write(setup_script) - f.flush() - setup_sh_path = f.name - setup_file = os.path.basename(setup_sh_path) - file_mounts[f'/tmp/{setup_file}'] = setup_sh_path - - # Ray Autoscaler Bug: Filemounting + Ray Setup - # does not happen on workers. - def _setup_local_worker(runner: command_runner.SSHCommandRunner): - for dst, src in file_mounts.items(): - mkdir_dst = f'mkdir -p {os.path.dirname(dst)}' - run_command_and_handle_ssh_failure( - runner, - mkdir_dst, - failure_message=f'Failed to run {mkdir_dst} on remote.') - if os.path.isdir(src): - src = os.path.join(src, '') - runner.rsync(source=src, target=dst, up=True, stream_logs=False) - - setup_cmd = f'/bin/bash -i /tmp/{setup_file} 2>&1' - rc, stdout, _ = runner.run(setup_cmd, - stream_logs=False, - require_outputs=True) - subprocess_utils.handle_returncode( - rc, - setup_cmd, - 'Failed to setup Ray autoscaler commands on remote.', - stderr=stdout) - - subprocess_utils.run_in_parallel(_setup_local_worker, worker_runners) - def check_network_connection(): # Tolerate 3 retries as it is observed that connections can fail. @@ -2052,6 +1980,7 @@ def _query_status_kubernetes( ) -> List[global_user_state.ClusterStatus]: raise NotImplementedError + _QUERY_STATUS_FUNCS = { 'AWS': _query_status_aws, 'GCP': _query_status_gcp, diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 7a7b861e500..6a0736d14fd 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2032,7 +2032,7 @@ def __init__(self, launched_resources: resources_lib.Resources, stable_internal_external_ips: Optional[List[Tuple[ str, str]]] = None, - stable_ssh_ports: Optional[List[int]] = None, + stable_ssh_ports: Optional[List[int]] = None, tpu_create_script: Optional[str] = None, tpu_delete_script: Optional[str] = None) -> None: self._version = self._VERSION @@ -2129,13 +2129,16 @@ def _update_cluster_region(self): def _update_stable_ssh_ports(self, max_attempts: int = 1) -> None: if isinstance(self.launched_resources.cloud, clouds.Kubernetes): - head_port = backend_utils.get_head_ssh_port(self, use_cache=False, max_attempts=max_attempts) + head_port = backend_utils.get_head_ssh_port( + self, use_cache=False, max_attempts=max_attempts) # TODO(romilb): Multinode doesn't work with Kubernetes yet. worker_ports = [22] * self.launched_nodes ports = [head_port] + worker_ports else: # Use port 22 for other clouds - ports = [22] * len(self.external_ips()) + ext_ips = self.external_ips() + assert ext_ips is not None, ext_ips + ports = [22] * len(ext_ips) self.stable_ssh_ports = ports def _update_stable_cluster_ips(self, max_attempts: int = 1) -> None: @@ -2204,9 +2207,10 @@ def external_ips(self, return [ips[1] for ips in self.stable_internal_external_ips] return None - def external_ssh_ports(self, - max_attempts: int = _FETCH_IP_MAX_ATTEMPTS, - use_cached_ports: bool = True) -> Optional[List[str]]: + def external_ssh_ports( + self, + max_attempts: int = _FETCH_IP_MAX_ATTEMPTS, + use_cached_ports: bool = True) -> Optional[List[int]]: if not use_cached_ports: self._update_stable_ssh_ports(max_attempts=max_attempts) if self.stable_ssh_ports is not None: @@ -2506,9 +2510,10 @@ def _provision( ip_list = handle.external_ips(max_attempts=_FETCH_IP_MAX_ATTEMPTS, use_cached_ips=False) - ssh_port_list = handle.external_ssh_ports(max_attempts=_FETCH_IP_MAX_ATTEMPTS, - use_cached_ports=False) + ssh_port_list = handle.external_ssh_ports( + max_attempts=_FETCH_IP_MAX_ATTEMPTS, use_cached_ports=False) assert ip_list is not None, handle + assert ssh_port_list is not None, handle if 'tpu_name' in config_dict: self._set_tpu_name(handle, config_dict['tpu_name']) @@ -2566,14 +2571,14 @@ def _get_zone(runner): # to None. self._update_after_cluster_provisioned(handle, task, prev_cluster_status, ip_list, -ssh_port_list, - lock_path) + ssh_port_list, lock_path) return handle def _update_after_cluster_provisioned( self, handle: CloudVmRayResourceHandle, task: task_lib.Task, prev_cluster_status: Optional[global_user_state.ClusterStatus], - ip_list: List[str], ssh_port_list: List[int], lock_path: str) -> None: + ip_list: List[str], ssh_port_list: List[int], + lock_path: str) -> None: usage_lib.messages.usage.update_cluster_resources( handle.launched_nodes, handle.launched_resources) usage_lib.messages.usage.update_final_cluster_status( @@ -2625,7 +2630,8 @@ def _update_after_cluster_provisioned( global_user_state.ClusterStatus.UP) auth_config = common_utils.read_yaml(handle.cluster_yaml)['auth'] backend_utils.SSHConfigHelper.add_cluster(handle.cluster_name, - ip_list, auth_config, ssh_port_list) + ip_list, auth_config, + ssh_port_list) common_utils.remove_file_if_exists(lock_path) @@ -3692,7 +3698,9 @@ def run_on_head( _FETCH_IP_MAX_ATTEMPTS) ssh_credentials = backend_utils.ssh_credential_from_yaml( handle.cluster_yaml) - runner = command_runner.SSHCommandRunner(head_ip, port=head_ssh_port, **ssh_credentials) + runner = command_runner.SSHCommandRunner(head_ip, + port=head_ssh_port, + **ssh_credentials) if under_remote_workdir: cmd = f'cd {SKY_REMOTE_WORKDIR} && {cmd}' diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 345cbbff20d..63cd3f0f851 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -49,18 +49,6 @@ def regions_with_offering(cls, instance_type: Optional[str], # No notion of regions in Kubernetes - return a single region. return cls.regions() - @classmethod - def region_zones_provision_loop( - cls, - *, - instance_type: Optional[str] = None, - accelerators: Optional[Dict[str, int]] = None, - use_spot: bool = False, - ) -> Iterator[Tuple[clouds.Region, List[clouds.Zone]]]: - # No notion of regions in Kubernetes - return a single region. - for region in cls.regions(): - yield region, region.zones - def instance_type_to_hourly_cost(self, instance_type: str, use_spot: bool, @@ -87,7 +75,7 @@ def is_same_cloud(self, other: clouds.Cloud) -> bool: return isinstance(other, Kubernetes) @classmethod - def get_port(cls, svc_name, namespace): + def get_port(cls, svc_name, namespace) -> int: from sky.skylet.providers.kubernetes.utils import get_port return get_port(svc_name, namespace) @@ -97,15 +85,18 @@ def get_default_instance_type( cpus: Optional[str] = None, memory: Optional[str] = None, disk_tier: Optional[str] = None) -> Optional[str]: - del disk_tier # Unused. + del disk_tier # Unused. virtual_instance_type = '' - n_cpus = cpus if cpus is not None else cls._DEFAULT_NUM_VCPUS - mem = memory if memory is not None else n_cpus * cls._DEFAULT_MEMORY_CPU_RATIO + # Remove the + from the cpus/memory string + n_cpus = int( + cpus.strip('+')) if cpus is not None else cls._DEFAULT_NUM_VCPUS + mem = int( + memory.strip('+') + ) if memory is not None else n_cpus * cls._DEFAULT_MEMORY_CPU_RATIO virtual_instance_type += f'{n_cpus}vCPU-' virtual_instance_type += f'{mem}GB' return virtual_instance_type - @classmethod def get_accelerators_from_instance_type( cls, @@ -116,46 +107,43 @@ def get_accelerators_from_instance_type( @classmethod def get_vcpus_mem_from_instance_type( - cls, instance_type: str) -> Tuple[Optional[float], Optional[float]]: + cls, instance_type: str) -> Tuple[float, float]: """Returns the #vCPUs and memory that the instance type offers.""" vcpus = cls.get_vcpus_from_instance_type(instance_type) mem = cls.get_mem_from_instance_type(instance_type) return vcpus, mem - @classmethod def zones_provision_loop( - cls, - *, - region: str, - num_nodes: int, - instance_type: str, - accelerators: Optional[Dict[str, int]] = None, - use_spot: bool = False, - ) -> Iterator[None]: + cls, + *, + region: str, + num_nodes: int, + instance_type: str, + accelerators: Optional[Dict[str, int]] = None, + use_spot: bool = False, + ) -> Iterator[List[clouds.Zone]]: del num_nodes # Unused. for r in cls.regions(): + assert r.zones is not None, r yield r.zones @classmethod def get_vcpus_from_instance_type( cls, instance_type: str, - ) -> Optional[float]: + ) -> float: """Returns the #vCPUs that the instance type offers.""" - if instance_type is None: - return None - # TODO(romilb): Better parsing + # TODO(romilb): Need more robust parsing return float(instance_type.split('vCPU')[0]) @classmethod def get_mem_from_instance_type( cls, instance_type: str, - ) -> Optional[float]: + ) -> float: """Returns the memory that the instance type offers.""" - if instance_type is None: - return None + # TODO(romilb): Need more robust parsing return float(instance_type.split('vCPU-')[1].split('GB')[0]) @classmethod @@ -168,7 +156,7 @@ def make_deploy_resources_variables( zones: Optional[List['clouds.Zone']]) -> Dict[str, Optional[str]]: del zones if region is None: - region = self._get_default_region() + region = self._regions[0] r = resources acc_dict = self.get_accelerators_from_instance_type(r.instance_type) @@ -179,16 +167,17 @@ def make_deploy_resources_variables( # resources.memory and resources.cpus are None if they are not explicitly set. # We fetch the default values for the instance type in that case. - cpus, mem = self.get_vcpus_mem_from_instance_type(resources.instance_type) + cpus, mem = self.get_vcpus_mem_from_instance_type( + resources.instance_type) # TODO(romilb): Allow fractional resources here - cpus = int(cpus) - mem = int(mem) + # cpus = int(cpus) + # mem = int(mem) return { 'instance_type': resources.instance_type, 'custom_resources': custom_resources, 'region': region.name, - 'cpus': cpus, - 'memory': mem + 'cpus': str(cpus), + 'memory': str(mem) } def get_feasible_launchable_resources(self, @@ -229,7 +218,8 @@ def _make(instance_list): # TODO(romilb): Add GPU support. raise NotImplementedError("GPU support not implemented yet.") - def check_credentials(self) -> Tuple[bool, Optional[str]]: + @classmethod + def check_credentials(cls) -> Tuple[bool, Optional[str]]: # TODO(romilb): Check credential validity using k8s api if os.path.exists(os.path.expanduser(f'~/.kube/config')): return True, None diff --git a/sky/clouds/service_catalog/__init__.py b/sky/clouds/service_catalog/__init__.py index 66d78ea1dd5..3116831b8ba 100644 --- a/sky/clouds/service_catalog/__init__.py +++ b/sky/clouds/service_catalog/__init__.py @@ -16,8 +16,7 @@ from sky.clouds.service_catalog import common CloudFilter = Optional[Union[List[str], str]] -_ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci', - 'kubernetes') +_ALL_CLOUDS = ('aws', 'azure', 'gcp', 'ibm', 'lambda', 'scp', 'oci') def _map_clouds_catalog(clouds: CloudFilter, method_name: str, *args, **kwargs): diff --git a/sky/clouds/service_catalog/kubernetes_catalog.py b/sky/clouds/service_catalog/kubernetes_catalog.py deleted file mode 100644 index 9e56fd83735..00000000000 --- a/sky/clouds/service_catalog/kubernetes_catalog.py +++ /dev/null @@ -1,129 +0,0 @@ -"""Kubernetes Offerings Catalog. - -This module loads the service catalog file and can be used to query -instance types and pricing information for Kubernetes. - -TODO: This module should dynamically fetch resources from k8s instead of using - a static catalog. -""" -import colorama -import os -import typing -from typing import Dict, List, Optional, Tuple - -import pandas as pd - -from sky import sky_logging -from sky.clouds.service_catalog import common -from sky.utils import ux_utils - -if typing.TYPE_CHECKING: - from sky.clouds import cloud - -logger = sky_logging.init_logger(__name__) - - -def instance_type_exists(instance_type: str) -> bool: - return common.instance_type_exists_impl(_df, instance_type) - -def validate_region_zone( - region: Optional[str], - zone: Optional[str]) -> Tuple[Optional[str], Optional[str]]: - # if zone is not None: - # breakpoint() - # with ux_utils.print_exception_no_traceback(): - # raise ValueError('Kubernetes does not support zones.') - return common.validate_region_zone_impl('kubernetes', _df, region, zone) - - -def accelerator_in_region_or_zone(acc_name: str, - acc_count: int, - region: Optional[str] = None, - zone: Optional[str] = None) -> bool: - if zone is not None: - with ux_utils.print_exception_no_traceback(): - raise ValueError('Kubernetes does not support zones.') - return common.accelerator_in_region_or_zone_impl(_df, acc_name, acc_count, - region, zone) - - -def get_hourly_cost(instance_type: str, - use_spot: bool = False, - region: Optional[str] = None, - zone: Optional[str] = None) -> float: - """Returns the cost, or the cheapest cost among all zones for spot.""" - assert not use_spot, 'Kubernetes does not support spot instances.' - if zone is not None: - with ux_utils.print_exception_no_traceback(): - raise ValueError('Kubernetes does not support zones.') - return common.get_hourly_cost_impl(_df, instance_type, use_spot, region, - zone) - - -def get_vcpus_from_instance_type(instance_type: str) -> Optional[float]: - return common.get_vcpus_from_instance_type_impl(_df, instance_type) - - -def get_vcpus_mem_from_instance_type( - instance_type: str) -> Tuple[Optional[float], Optional[float]]: - return common.get_vcpus_mem_from_instance_type_impl(_df, - instance_type) - - -def get_default_instance_type(cpus: Optional[str] = None, - memory: Optional[str] = None, - disk_tier: Optional[str] = None) -> Optional[str]: - del disk_tier # unused - if cpus is None and memory is None: - cpus = f'{_DEFAULT_NUM_VCPUS}+' - - if memory is None: - memory_gb_or_ratio = f'{_DEFAULT_MEMORY_CPU_RATIO}x' - else: - memory_gb_or_ratio = memory - return common.get_instance_type_for_cpus_mem_impl(_df, cpus, - memory_gb_or_ratio) - -def get_accelerators_from_instance_type( - instance_type: str) -> Optional[Dict[str, int]]: - return common.get_accelerators_from_instance_type_impl(_df, instance_type) - - -def get_instance_type_for_accelerator( - acc_name: str, - acc_count: int, - cpus: Optional[str] = None, - use_spot: bool = False, - region: Optional[str] = None, - zone: Optional[str] = None) -> Tuple[Optional[List[str]], List[str]]: - """ - Returns a list of instance types satisfying the required count of - accelerators with sorted prices and a list of candidates with fuzzy search. - """ - if zone is not None: - with ux_utils.print_exception_no_traceback(): - raise ValueError('Kubernetes does not support zones.') - return common.get_instance_type_for_accelerator_impl(df=_df, - acc_name=acc_name, - acc_count=acc_count, - cpus=cpus, - use_spot=use_spot, - region=region, - zone=zone) - - -def get_region_zones_for_instance_type(instance_type: str, - use_spot: bool) -> List['cloud.Region']: - df = _df[_df['InstanceType'] == instance_type] - return common.get_region_zones(df, use_spot) - - -def list_accelerators( - gpus_only: bool, - name_filter: Optional[str], - region_filter: Optional[str], - case_sensitive: bool = True -) -> Dict[str, List[common.InstanceTypeInfo]]: - """Returns all Kubernetes 'instances' offering GPUs.""" - return common.list_accelerators_impl('Kubernetes', _df, gpus_only, name_filter, - region_filter, case_sensitive) diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index 7b392614f68..80f618b55cd 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -11,21 +11,20 @@ logger = logging.getLogger(__name__) MEMORY_SIZE_UNITS = { - "K": 2 ** 10, - "M": 2 ** 20, - "G": 2 ** 30, - "T": 2 ** 40, - "P": 2 ** 50, + "K": 2**10, + "M": 2**20, + "G": 2**30, + "T": 2**40, + "P": 2**50, } class InvalidNamespaceError(ValueError): + def __init__(self, field_name, namespace): - self.message = ( - "Namespace of {} config doesn't match provided " - "namespace '{}'. Either set it to {} or remove the " - "field".format(field_name, namespace, namespace) - ) + self.message = ("Namespace of {} config doesn't match provided " + "namespace '{}'. Either set it to {} or remove the " + "field".format(field_name, namespace, namespace)) def __str__(self): return self.message @@ -40,7 +39,8 @@ def updating_existing_msg(resource_type, name): def not_found_msg(resource_type, name): - return "{} '{}' not found, attempting to create it".format(resource_type, name) + return "{} '{}' not found, attempting to create it".format( + resource_type, name) def not_checking_msg(resource_type, name): @@ -107,14 +107,12 @@ def fillout_resources_kubernetes(config): if "resources" not in config["available_node_types"][node_type]: config["available_node_types"][node_type]["resources"] = {} autodetected_resources.update( - config["available_node_types"][node_type]["resources"] - ) - config["available_node_types"][node_type]["resources"] = autodetected_resources + config["available_node_types"][node_type]["resources"]) + config["available_node_types"][node_type][ + "resources"] = autodetected_resources logger.debug( "Updating the resources of node type {} to include {}.".format( - node_type, autodetected_resources - ) - ) + node_type, autodetected_resources)) return config @@ -135,7 +133,9 @@ def get_autodetected_resources(container_data): def get_resource(container_resources, resource_name): - limit = _get_resource(container_resources, resource_name, field_name="limits") + limit = _get_resource(container_resources, + resource_name, + field_name="limits") # float("inf") means there's no limit set return 0 if limit == float("inf") else int(limit) @@ -204,9 +204,11 @@ def _configure_namespace(provider_config): namespace = provider_config[namespace_field] field_selector = "metadata.name={}".format(namespace) try: - namespaces = core_api().list_namespace(field_selector=field_selector).items + namespaces = core_api().list_namespace( + field_selector=field_selector).items except ApiException: - logger.warning(log_prefix + not_checking_msg(namespace_field, namespace)) + logger.warning(log_prefix + + not_checking_msg(namespace_field, namespace)) return namespace if len(namespaces) > 0: @@ -215,7 +217,8 @@ def _configure_namespace(provider_config): return namespace logger.info(log_prefix + not_found_msg(namespace_field, namespace)) - namespace_config = client.V1Namespace(metadata=client.V1ObjectMeta(name=namespace)) + namespace_config = client.V1Namespace(metadata=client.V1ObjectMeta( + name=namespace)) core_api().create_namespace(namespace_config) logger.info(log_prefix + created_msg(namespace_field, namespace)) return namespace @@ -235,11 +238,8 @@ def _configure_autoscaler_service_account(namespace, provider_config): name = account["metadata"]["name"] field_selector = "metadata.name={}".format(name) - accounts = ( - core_api() - .list_namespaced_service_account(namespace, field_selector=field_selector) - .items - ) + accounts = (core_api().list_namespaced_service_account( + namespace, field_selector=field_selector).items) if len(accounts) > 0: assert len(accounts) == 1 logger.info(log_prefix + using_existing_msg(account_field, name)) @@ -264,9 +264,8 @@ def _configure_autoscaler_role(namespace, provider_config): name = role["metadata"]["name"] field_selector = "metadata.name={}".format(name) - accounts = ( - auth_api().list_namespaced_role(namespace, field_selector=field_selector).items - ) + accounts = (auth_api().list_namespaced_role( + namespace, field_selector=field_selector).items) if len(accounts) > 0: assert len(accounts) == 1 logger.info(log_prefix + using_existing_msg(role_field, name)) @@ -293,16 +292,13 @@ def _configure_autoscaler_role_binding(namespace, provider_config): subject["namespace"] = namespace elif subject["namespace"] != namespace: raise InvalidNamespaceError( - binding_field + " subject '{}'".format(subject["name"]), namespace - ) + binding_field + " subject '{}'".format(subject["name"]), + namespace) name = binding["metadata"]["name"] field_selector = "metadata.name={}".format(name) - accounts = ( - auth_api() - .list_namespaced_role_binding(namespace, field_selector=field_selector) - .items - ) + accounts = (auth_api().list_namespaced_role_binding( + namespace, field_selector=field_selector).items) if len(accounts) > 0: assert len(accounts) == 1 logger.info(log_prefix + using_existing_msg(binding_field, name)) @@ -328,11 +324,8 @@ def _configure_services(namespace, provider_config): name = service["metadata"]["name"] field_selector = "metadata.name={}".format(name) - services = ( - core_api() - .list_namespaced_service(namespace, field_selector=field_selector) - .items - ) + services = (core_api().list_namespaced_service( + namespace, field_selector=field_selector).items) if len(services) > 0: assert len(services) == 1 existing_service = services[0] diff --git a/sky/skylet/providers/kubernetes/kubectl-rsync.sh b/sky/skylet/providers/kubernetes/kubectl-rsync.sh deleted file mode 100644 index 361eb6d85c5..00000000000 --- a/sky/skylet/providers/kubernetes/kubectl-rsync.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash - -# Helper script to use kubectl as a remote shell for rsync to sync files -# to/from pods that have rsync installed. Taken from: -# https://serverfault.com/questions/741670/rsync-files-to-a-kubernetes-pod/746352 - -if [ -z "$KRSYNC_STARTED" ]; then - export KRSYNC_STARTED=true - exec rsync --blocking-io --rsh "$0" "$@" -fi - -# Running as --rsh -namespace='' -pod=$1 -shift - -# If use uses pod@namespace rsync passes as: {us} -l pod namespace ... -if [ "X$pod" = "X-l" ]; then - pod=$1 - shift - # Space before $1 leads to namespace errors - namespace="-n$1" - shift -fi - -if [ -z "$KUBE_API_SERVER" ]; then - exec kubectl "$namespace" exec -i "$pod" -- "$@" -else - exec kubectl --server "$KUBE_API_SERVER" "$namespace" exec -i "$pod" -- "$@" -fi diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 3879a4e9cb7..3609606ce25 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -1,7 +1,5 @@ import copy -import json import logging -import subprocess import time from typing import Dict from urllib.parse import urlparse @@ -26,12 +24,15 @@ RAY_COMPONENT_LABEL = "cluster.ray.io/component" -# Patch SSHCommandRunner to allow specifying SSH port + +# Monkey patch SSHCommandRunner to allow specifying SSH port def set_port(self, port): self.ssh_options.arg_dict["Port"] = port + SSHCommandRunner.set_port = set_port + def head_service_selector(cluster_name: str) -> Dict[str, str]: """Selector for Operator-configured head service.""" return {RAY_COMPONENT_LABEL: f"{cluster_name}-ray-head"} @@ -47,6 +48,7 @@ def to_label_selector(tags): class KubernetesNodeProvider(NodeProvider): + def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.cluster_name = cluster_name @@ -56,20 +58,18 @@ def non_terminated_nodes(self, tag_filters): # Match pods that are in the 'Pending' or 'Running' phase. # Unfortunately there is no OR operator in field selectors, so we # have to match on NOT any of the other phases. - field_selector = ",".join( - [ - "status.phase!=Failed", - "status.phase!=Unknown", - "status.phase!=Succeeded", - "status.phase!=Terminating", - ] - ) + field_selector = ",".join([ + "status.phase!=Failed", + "status.phase!=Unknown", + "status.phase!=Succeeded", + "status.phase!=Terminating", + ]) tag_filters[TAG_RAY_CLUSTER_NAME] = self.cluster_name label_selector = to_label_selector(tag_filters) - pod_list = core_api().list_namespaced_pod( - self.namespace, field_selector=field_selector, label_selector=label_selector - ) + pod_list = core_api().list_namespaced_pod(self.namespace, + field_selector=field_selector, + label_selector=label_selector) # Don't return pods marked for deletion, # i.e. pods with non-null metadata.DeletionTimestamp. @@ -91,16 +91,8 @@ def node_tags(self, node_id): pod = core_api().read_namespaced_pod(node_id, self.namespace) return pod.metadata.labels - @staticmethod - def get_apiserver_ip() -> str: - output = subprocess.Popen("kubectl config view -o json".split(), - stdout=subprocess.PIPE).communicate()[0] - api_server_ip = json.loads(output)["clusters"][0]["cluster"][ - "server"].split("//")[1].split(":")[0] - return api_server_ip - - @staticmethod - def get_external_ip_for_nodeport() -> str: + def external_ip(self, node_id): + # # Return the IP address of the first node with an external IP nodes = core_api().list_node().items for node in nodes: @@ -113,10 +105,6 @@ def get_external_ip_for_nodeport() -> str: parsed_url = urlparse(api_host) return parsed_url.hostname - def external_ip(self, node_id): - # Extract the IP address of the API server from kubectl - return self.get_external_ip_for_nodeport() - def external_port(self, node_id): # Extract the NodePort of the head node's SSH service # TODO(romilb): Implement caching here for performance @@ -143,9 +131,8 @@ def find_node_id(): if not find_node_id(): all_nodes = self.non_terminated_nodes({}) ip_func = self.internal_ip if use_internal_ip else self.external_ip - ip_cache = ( - self._internal_ip_cache if use_internal_ip else self._external_ip_cache - ) + ip_cache = (self._internal_ip_cache + if use_internal_ip else self._external_ip_cache) for node_id in all_nodes: ip_cache[ip_func(node_id)] = node_id @@ -165,10 +152,8 @@ def set_node_tags(self, node_ids, tags): return except ApiException as e: if e.status == 409: - logger.info( - log_prefix + "Caught a 409 error while setting" - " node tags. Retrying..." - ) + logger.info(log_prefix + "Caught a 409 error while setting" + " node tags. Retrying...") time.sleep(DELAY_BEFORE_TAG_RETRY) continue else: @@ -200,9 +185,8 @@ def create_node(self, node_config, tags, count): head_selector = head_service_selector(self.cluster_name) pod_spec["metadata"]["labels"].update(head_selector) - logger.info( - log_prefix + "calling create_namespaced_pod (count={}).".format(count) - ) + logger.info(log_prefix + + "calling create_namespaced_pod (count={}).".format(count)) new_nodes = [] for _ in range(count): pod = core_api().create_namespaced_pod(self.namespace, pod_spec) @@ -210,10 +194,8 @@ def create_node(self, node_config, tags, count): new_svcs = [] if service_spec is not None: - logger.info( - log_prefix + "calling create_namespaced_service " - "(count={}).".format(count) - ) + logger.info(log_prefix + "calling create_namespaced_service " + "(count={}).".format(count)) for new_node in new_nodes: @@ -221,22 +203,21 @@ def create_node(self, node_config, tags, count): metadata["name"] = new_node.metadata.name service_spec["metadata"] = metadata service_spec["spec"]["selector"] = {"ray-node-uuid": node_uuid} - svc = core_api().create_namespaced_service(self.namespace, service_spec) + svc = core_api().create_namespaced_service( + self.namespace, service_spec) new_svcs.append(svc) if ingress_spec is not None: - logger.info( - log_prefix + "calling create_namespaced_ingress " - "(count={}).".format(count) - ) + logger.info(log_prefix + "calling create_namespaced_ingress " + "(count={}).".format(count)) for new_svc in new_svcs: metadata = ingress_spec.get("metadata", {}) metadata["name"] = new_svc.metadata.name ingress_spec["metadata"] = metadata ingress_spec = _add_service_name_to_service_port( - ingress_spec, new_svc.metadata.name - ) - networking_api().create_namespaced_ingress(self.namespace, ingress_spec) + ingress_spec, new_svc.metadata.name) + networking_api().create_namespaced_ingress( + self.namespace, ingress_spec) # Wait for all pods to be ready, and if it exceeds the timeout, raise an # exception. If pod's container is ContainerCreating, then we can assume @@ -252,7 +233,8 @@ def create_node(self, node_config, tags, count): ) all_ready = True for node in new_nodes: - pod = core_api().read_namespaced_pod(node.metadata.name, self.namespace) + pod = core_api().read_namespaced_pod(node.metadata.name, + self.namespace) if pod.status.phase == "Pending": # Check conditions for more detailed status for condition in pod.status.conditions: @@ -268,19 +250,14 @@ def create_node(self, node_config, tags, count): break time.sleep(1) - - - def terminate_node(self, node_id): logger.info(log_prefix + "calling delete_namespaced_pod") try: core_api().delete_namespaced_pod(node_id, self.namespace) except ApiException as e: if e.status == 404: - logger.warning( - log_prefix + f"Tried to delete pod {node_id}," - " but the pod was not found (404)." - ) + logger.warning(log_prefix + f"Tried to delete pod {node_id}," + " but the pod was not found (404).") else: raise try: @@ -299,15 +276,14 @@ def terminate_nodes(self, node_ids): for node_id in node_ids: self.terminate_node(node_id) - def get_command_runner( - self, - log_prefix, - node_id, - auth_config, - cluster_name, - process_runner, - use_internal_ip, - docker_config = None): + def get_command_runner(self, + log_prefix, + node_id, + auth_config, + cluster_name, + process_runner, + use_internal_ip, + docker_config=None): """Returns the CommandRunner class used to perform SSH commands. Args: @@ -344,6 +320,7 @@ def get_command_runner( f.write(f'{node_id} port: {port}\n') command_runner.set_port(port) return command_runner + # def get_command_runner( # self, # log_prefix, @@ -381,11 +358,12 @@ def _add_service_name_to_service_port(spec, svc_name): raise ValueError( "The value of serviceName must be set to " "${RAY_POD_NAME}. It is automatically replaced " - "when using the autoscaler." - ) + "when using the autoscaler.") elif isinstance(spec, list): - spec = [_add_service_name_to_service_port(item, svc_name) for item in spec] + spec = [ + _add_service_name_to_service_port(item, svc_name) for item in spec + ] elif isinstance(spec, str): # The magic string ${RAY_POD_NAME} is replaced with diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index dbd2b6a8a4c..1c1c5243c7b 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -54,17 +54,19 @@ def custom_objects_api(): return _custom_objects_api + def get_head_ssh_port(cluster_name, namespace): svc_name = f'{cluster_name}-ray-head-ssh' return get_port(svc_name, namespace) + def get_port(svc_name, namespace): - head_service = core_api().read_namespaced_service( - svc_name, namespace) + head_service = core_api().read_namespaced_service(svc_name, namespace) return head_service.spec.ports[0].node_port + log_prefix = "KubernetesNodeProvider: " class KubernetesError(Exception): - pass \ No newline at end of file + pass diff --git a/sky/utils/command_runner.py b/sky/utils/command_runner.py index 27a21ba6464..bff7b854ee2 100644 --- a/sky/utils/command_runner.py +++ b/sky/utils/command_runner.py @@ -142,6 +142,7 @@ def __init__( ssh_proxy_command: Optional, the value to pass to '-o ProxyCommand'. Useful for communicating with clusters without public IPs using a "jump server". + port: The port to use for ssh. """ self.ip = ip self.ssh_user = ssh_user @@ -163,11 +164,11 @@ def make_runner_list( ) -> List['SSHCommandRunner']: """Helper function for creating runners with the same ssh credentials""" if not port_list: - port_list = [22] * len(ip_list) + port_list = ["22"] * len(ip_list) return [ SSHCommandRunner(ip, ssh_user, ssh_private_key, ssh_control_name, - ssh_proxy_command, port) for ip, port in zip( - ip_list, port_list) + ssh_proxy_command, port) + for ip, port in zip(ip_list, port_list) ] def _ssh_base_command(self, *, ssh_mode: SshMode, @@ -190,7 +191,7 @@ def _ssh_base_command(self, *, ssh_mode: SshMode, self.ssh_private_key, self.ssh_control_name, ssh_proxy_command=self._ssh_proxy_command, - port = self.port, + port=self.port, ) + [f'{self.ssh_user}@{self.ip}'] def run( @@ -345,7 +346,7 @@ def rsync( self.ssh_private_key, self.ssh_control_name, ssh_proxy_command=self._ssh_proxy_command, - port = self.port, + port=self.port, )) rsync_command.append(f'-e "ssh {ssh_options}"') # To support spaces in the path, we need to quote source and target. diff --git a/sky/utils/command_runner.pyi b/sky/utils/command_runner.pyi index 7120755441f..53e78db15a8 100644 --- a/sky/utils/command_runner.pyi +++ b/sky/utils/command_runner.pyi @@ -36,20 +36,25 @@ class SSHCommandRunner: ssh_user: str ssh_private_key: str ssh_control_name: Optional[str] + port: int def __init__(self, ip: str, ssh_user: str, ssh_private_key: str, - ssh_control_name: Optional[str] = ...) -> None: + ssh_control_name: Optional[str] = ..., + port: str = ...) -> None: ... @staticmethod def make_runner_list( - ip_list: List[str], - ssh_user: str, - ssh_private_key: str, - ssh_control_name: Optional[str] = ...) -> List['SSHCommandRunner']: + ip_list: List[str], + ssh_user: str, + ssh_private_key: str, + ssh_control_name: Optional[str] = ..., + ssh_proxy_command: Optional[str] = ..., + port_list: Optional[List[int]] = ..., + ) -> List['SSHCommandRunner']: ... @typing.overload diff --git a/tests/playground/kind/portmap_gen.py b/tests/playground/kind/portmap_gen.py index f95da3a71a5..8e14b792860 100644 --- a/tests/playground/kind/portmap_gen.py +++ b/tests/playground/kind/portmap_gen.py @@ -15,5 +15,3 @@ protocol: tcp""") f.write("\n") f.write(suffix) - - From 3bca8a9f6f722529b90525fd0b3262b2c6f874db Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 15 Jun 2023 17:11:12 -0700 Subject: [PATCH 022/183] lint --- sky/authentication.py | 7 +++--- sky/backends/cloud_vm_ray_backend.py | 6 ++--- sky/backends/onprem_utils.py | 2 +- sky/clouds/kubernetes.py | 34 ++++++++++++++++++---------- sky/utils/command_runner.py | 2 +- 5 files changed, 30 insertions(+), 21 deletions(-) diff --git a/sky/authentication.py b/sky/authentication.py index a00ad60c635..c6381361069 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -454,16 +454,15 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: # Run kubectl command to add the public key to the cluster. public_key_path = os.path.expanduser(PUBLIC_SSH_KEY_PATH) key_label = clouds.Kubernetes.SKY_SSH_KEY_SECRET_NAME - cmd = f"kubectl create secret generic {key_label} --from-file=ssh-publickey={public_key_path}" + cmd = f'kubectl create secret generic {key_label} ' \ + f'--from-file=ssh-publickey={public_key_path}' try: subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) except subprocess.CalledProcessError as e: output = e.output.decode('utf-8') - print(output) if 'already exists' in output: logger.warning( - f'Key {key_label} already exists in Kubernetes cluster, continuing...' - ) + f'Key {key_label} already exists in the cluster, continuing...') pass else: raise e diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 6a0736d14fd..44c0eec814f 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2534,7 +2534,7 @@ def _provision( ssh_credentials = backend_utils.ssh_credential_from_yaml( handle.cluster_yaml) runners = command_runner.SSHCommandRunner.make_runner_list( - ip_list, **ssh_credentials) + ip_list, **ssh_credentials, port_list=None) def _get_zone(runner): retry_count = 0 @@ -3215,7 +3215,7 @@ def sync_down_logs( ssh_credentials = backend_utils.ssh_credential_from_yaml( handle.cluster_yaml) runners = command_runner.SSHCommandRunner.make_runner_list( - ip_list, **ssh_credentials) + ip_list, **ssh_credentials, port_list=None) def _rsync_down(args) -> None: """Rsync down logs from remote nodes. @@ -3784,7 +3784,7 @@ def _set_tpu_name(self, handle: CloudVmRayResourceHandle, handle.cluster_yaml) runners = command_runner.SSHCommandRunner.make_runner_list( - ip_list, **ssh_credentials) + ip_list, **ssh_credentials, port_list=None) def _setup_tpu_name_on_node( runner: command_runner.SSHCommandRunner) -> None: diff --git a/sky/backends/onprem_utils.py b/sky/backends/onprem_utils.py index 0666210e5aa..a0453a92447 100644 --- a/sky/backends/onprem_utils.py +++ b/sky/backends/onprem_utils.py @@ -546,7 +546,7 @@ def do_filemounts_and_setup_on_local_workers( setup_script = log_lib.make_task_bash_script('\n'.join(setup_cmds)) worker_runners = command_runner.SSHCommandRunner.make_runner_list( - worker_ips, **ssh_credentials) + worker_ips, **ssh_credentials, port_list=None) # Uploads setup script to the worker node with tempfile.NamedTemporaryFile('w', prefix='sky_setup_') as f: diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 63cd3f0f851..661bbbf3b92 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -1,11 +1,12 @@ +"""Kubernetes.""" import json import os -import sys import typing from typing import Dict, Iterator, List, Optional, Tuple from sky import clouds from sky.utils import common_utils +from sky.skylet.providers.kubernetes.utils import get_port if typing.TYPE_CHECKING: # Renaming to avoid shadowing variables. @@ -18,6 +19,7 @@ @clouds.CLOUD_REGISTRY.register class Kubernetes(clouds.Cloud): + """Kubernetes.""" SKY_SSH_KEY_SECRET_NAME = f'sky-ssh-{common_utils.get_user_hash()}' _DEFAULT_NUM_VCPUS = 4 @@ -25,12 +27,18 @@ class Kubernetes(clouds.Cloud): _REPR = 'Kubernetes' _regions: List[clouds.Region] = [clouds.Region('kubernetes')] _CLOUD_UNSUPPORTED_FEATURES = { - clouds.CloudImplementationFeatures.STOP: 'Kubernetes does not support stopping VMs.', - clouds.CloudImplementationFeatures.AUTOSTOP: 'Kubernetes does not support stopping VMs.', - clouds.CloudImplementationFeatures.MULTI_NODE: 'Multi-node is not supported by the Kubernetes implementation yet.', + clouds.CloudImplementationFeatures.STOP: 'Kubernetes does not ' + 'support stopping VMs.', + clouds.CloudImplementationFeatures.AUTOSTOP: 'Kubernetes does not ' + 'support stopping VMs.', + clouds.CloudImplementationFeatures.MULTI_NODE: 'Multi-node is not ' + 'supported by the ' + 'Kubernetes ' + 'implementation yet.', } - IMAGE = 'us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest' + IMAGE = 'us-central1-docker.pkg.dev/' \ + 'skypilot-375900/skypilotk8s/skypilot:latest' @classmethod def _cloud_unsupported_features( @@ -76,7 +84,6 @@ def is_same_cloud(self, other: clouds.Cloud) -> bool: @classmethod def get_port(cls, svc_name, namespace) -> int: - from sky.skylet.providers.kubernetes.utils import get_port return get_port(svc_name, namespace) @classmethod @@ -123,7 +130,7 @@ def zones_provision_loop( accelerators: Optional[Dict[str, int]] = None, use_spot: bool = False, ) -> Iterator[List[clouds.Zone]]: - del num_nodes # Unused. + del num_nodes, region, instance_type, accelerators, use_spot # Unused. for r in cls.regions(): assert r.zones is not None, r yield r.zones @@ -165,7 +172,7 @@ def make_deploy_resources_variables( else: custom_resources = None - # resources.memory and resources.cpus are None if they are not explicitly set. + # resources.memory and cpus are None if they are not explicitly set. # We fetch the default values for the instance type in that case. cpus, mem = self.get_vcpus_mem_from_instance_type( resources.instance_type) @@ -216,19 +223,22 @@ def _make(instance_list): assert len(accelerators) == 1, resources # TODO(romilb): Add GPU support. - raise NotImplementedError("GPU support not implemented yet.") + raise NotImplementedError('GPUs are not supported for Kubernetes ' + 'clusters yet.') @classmethod def check_credentials(cls) -> Tuple[bool, Optional[str]]: # TODO(romilb): Check credential validity using k8s api - if os.path.exists(os.path.expanduser(f'~/.kube/config')): + if os.path.exists(os.path.expanduser('~/.kube/config')): return True, None else: - return False, "Kubeconfig doesn't exist" + return False, 'Kubeconfig not found - ' \ + 'check if ~/.kube/config exists.' def get_credential_file_mounts(self) -> Dict[str, str]: return {} - # TODO(romilb): Fix the file mounts optimization ('config' here clashes with azure config file) + # TODO(romilb): Fix the file mounts optimization + # ('config' here clashes with azure config file) # return { # f'~/.kube/{filename}': f'~/.kube/{filename}' # for filename in _CREDENTIAL_FILES diff --git a/sky/utils/command_runner.py b/sky/utils/command_runner.py index bff7b854ee2..cea171773db 100644 --- a/sky/utils/command_runner.py +++ b/sky/utils/command_runner.py @@ -164,7 +164,7 @@ def make_runner_list( ) -> List['SSHCommandRunner']: """Helper function for creating runners with the same ssh credentials""" if not port_list: - port_list = ["22"] * len(ip_list) + port_list = [22] * len(ip_list) return [ SSHCommandRunner(ip, ssh_user, ssh_private_key, ssh_control_name, ssh_proxy_command, port) From 61df297f02325b7afaf89a6ef7327c86779d3b7d Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 15 Jun 2023 17:25:59 -0700 Subject: [PATCH 023/183] fix ports --- sky/backends/backend_utils.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index ae1a27abf28..3c5d3dbe286 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -450,6 +450,7 @@ def add_cluster(cls, cluster_name: str, ips: List[str], ips: List of public IP addresses in the cluster. First IP is head node. auth_config: read_yaml(handle.cluster_yaml)['auth'] + ports: List of port numbers for SSH corresponding to ips """ username = auth_config['ssh_user'] key_path = os.path.expanduser(auth_config['ssh_private_key']) @@ -1614,14 +1615,15 @@ def get_head_ssh_port( del max_attempts # Unused. # Use port 22 for everything except Kubernetes # TODO(romilb): Add a get port method to the cloud classes. + head_ssh_port = 22 if not isinstance(handle.launched_resources.cloud, clouds.Kubernetes): - return 22 - if use_cache and handle.head_ssh_port is not None: - head_ssh_port = handle.head_ssh_port - else: - # TODO(romilb): Only supports headnode for now! No multinode! - svc_name = f'{handle.get_cluster_name()}-ray-head-ssh' - head_ssh_port = clouds.Kubernetes.get_port(svc_name, 'default') + return head_ssh_port + elif isinstance(handle.launched_resources.cloud, clouds.Kubernetes): + if use_cache and handle.head_ssh_port is not None: + head_ssh_port = handle.head_ssh_port + else: + svc_name = f'{handle.get_cluster_name()}-ray-head-ssh' + head_ssh_port = clouds.Kubernetes.get_port(svc_name, 'default') return head_ssh_port @@ -1716,7 +1718,7 @@ def _process_cli_query( return statuses -def _query_status_aws( +def ( cluster: str, ray_config: Dict[str, Any], ) -> List[global_user_state.ClusterStatus]: From 036eaf9a27e2871f187858d69aa344b1b059f18d Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 15 Jun 2023 17:26:34 -0700 Subject: [PATCH 024/183] typo --- sky/backends/backend_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 3c5d3dbe286..3f685b6f538 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -1718,7 +1718,7 @@ def _process_cli_query( return statuses -def ( +def _query_status_aws( cluster: str, ray_config: Dict[str, Any], ) -> List[global_user_state.ClusterStatus]: From 95e160c9c8301b37a846e15b719d14e58aac184d Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 15 Jun 2023 21:00:25 -0700 Subject: [PATCH 025/183] cleanup --- sky/backends/backend_utils.py | 9 +++++++-- sky/clouds/kubernetes.py | 3 +-- sky/skylet/providers/kubernetes/config.py | 9 +-------- .../providers/kubernetes/node_provider.py | 18 +++--------------- sky/skylet/ray_patches/__init__.py | 4 ++-- 5 files changed, 14 insertions(+), 29 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 3f685b6f538..4a4b287117b 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -1980,7 +1980,11 @@ def _query_status_kubernetes( cluster: str, ray_config: Dict[str, Any], # pylint: disable=unused-argument ) -> List[global_user_state.ClusterStatus]: - raise NotImplementedError + # TODO(romilb): Implement this. For now, we return UP as the status. + # Assuming single node cluster. + del cluster # Unused. + del ray_config # Unused. + return [global_user_state.ClusterStatus.UP] _QUERY_STATUS_FUNCS = { @@ -2108,7 +2112,8 @@ def run_ray_status_to_check_ray_cluster_healthy() -> bool: # Check if ray cluster status is healthy. ssh_credentials = ssh_credential_from_yaml(handle.cluster_yaml) runner = command_runner.SSHCommandRunner(external_ips[0], - **ssh_credentials) + **ssh_credentials, + port=handle.head_ssh_port) rc, output, _ = runner.run(RAY_STATUS_WITH_SKY_RAY_PORT_COMMAND, stream_logs=False, require_outputs=True, diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 661bbbf3b92..0f0946eb9d9 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -129,10 +129,9 @@ def zones_provision_loop( instance_type: str, accelerators: Optional[Dict[str, int]] = None, use_spot: bool = False, - ) -> Iterator[List[clouds.Zone]]: + ) -> Iterator[Optional[List[clouds.Zone]]]: del num_nodes, region, instance_type, accelerators, use_spot # Unused. for r in cls.regions(): - assert r.zones is not None, r yield r.zones @classmethod diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index 80f618b55cd..b1809f4564e 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -56,13 +56,6 @@ def not_provided_msg(resource_type): def bootstrap_kubernetes(config): - # if not config["provider"]["use_internal_ips"]: - # return ValueError( - # "Exposing external IP addresses for ray containers isn't " - # "currently supported. Please set " - # "'use_internal_ips' to false." - # ) - if config["provider"].get("_operator"): namespace = config["provider"]["namespace"] else: @@ -181,7 +174,7 @@ def _parse_cpu_or_gpu_resource(resource): # For example, '500m' rounds up to 1. return math.ceil(int(resource_str[:-1]) / 1000) else: - return int(resource_str) + return float(resource_str) def _parse_memory_resource(resource): diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 3609606ce25..25a864dccee 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -229,7 +229,9 @@ def create_node(self, node_config, tags, count): while True: if time.time() - start > TIMEOUT: raise KubernetesError( - "Timed out while waiting for nodes to start. Cluster may be out of resources or may be too slow to autoscale." + "Timed out while waiting for nodes to start. " + "Cluster may be out of resources or " + "may be too slow to autoscale." ) all_ready = True for node in new_nodes: @@ -321,20 +323,6 @@ def get_command_runner(self, command_runner.set_port(port) return command_runner - # def get_command_runner( - # self, - # log_prefix, - # node_id, - # auth_config, - # cluster_name, - # process_runner, - # use_internal_ip, - # docker_config=None, - # ): - # return KubernetesCommandRunner( - # log_prefix, self.namespace, node_id, auth_config, process_runner - # ) - @staticmethod def bootstrap_config(cluster_config): return bootstrap_kubernetes(cluster_config) diff --git a/sky/skylet/ray_patches/__init__.py b/sky/skylet/ray_patches/__init__.py index 77cf5b0f000..7178779d0e0 100644 --- a/sky/skylet/ray_patches/__init__.py +++ b/sky/skylet/ray_patches/__init__.py @@ -63,8 +63,8 @@ def patch() -> None: from ray._private import log_monitor _run_patch(log_monitor.__file__, _to_absolute('log_monitor.py.patch')) - # from ray._private import worker - # _run_patch(worker.__file__, _to_absolute('worker.py.patch')) + from ray._private import worker + _run_patch(worker.__file__, _to_absolute('worker.py.patch')) from ray.dashboard.modules.job import cli _run_patch(cli.__file__, _to_absolute('cli.py.patch')) From 301a914b53bb7817cdc76299bf75b33cee1db1bf Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 15 Jun 2023 22:40:02 -0700 Subject: [PATCH 026/183] cleanup --- Dockerfile_k8s | 15 +- sky/authentication.py | 2 +- sky/backends/cloud_vm_ray_backend.py | 3 + sky/registry.py | 0 .../providers/kubernetes/node_provider.py | 11 +- tests/kubernetes/README.md | 31 + tests/kubernetes/build_image.sh | 45 + tests/kubernetes/kind/create_cluster.sh | 11 + .../kind/portmap_gen.py | 4 +- .../scripts}/clean_k8s.sh | 0 tests/kubernetes/scripts/dashboard.yaml | 306 + .../scripts}/delete.sh | 0 .../scripts}/install_dashboard.sh | 0 .../scripts}/ray_k8s_sky.yaml | 0 .../deployment => kubernetes/scripts}/run.sh | 0 .../scripts}/skypilot_ssh_k8s_deployment.yaml | 0 tests/playground/kind/cluster.yaml | 11077 ---------------- tests/playground/kind/create_cluster.sh | 8 - tests/playground/ray_k8s_example_full.yaml | 213 - 19 files changed, 407 insertions(+), 11319 deletions(-) delete mode 100644 sky/registry.py create mode 100644 tests/kubernetes/README.md create mode 100644 tests/kubernetes/build_image.sh create mode 100644 tests/kubernetes/kind/create_cluster.sh rename tests/{playground => kubernetes}/kind/portmap_gen.py (69%) rename tests/{playground => kubernetes/scripts}/clean_k8s.sh (100%) create mode 100644 tests/kubernetes/scripts/dashboard.yaml rename tests/{playground/deployment => kubernetes/scripts}/delete.sh (100%) rename tests/{playground/deployment => kubernetes/scripts}/install_dashboard.sh (100%) rename tests/{playground => kubernetes/scripts}/ray_k8s_sky.yaml (100%) rename tests/{playground/deployment => kubernetes/scripts}/run.sh (100%) rename tests/{playground/deployment => kubernetes/scripts}/skypilot_ssh_k8s_deployment.yaml (100%) delete mode 100644 tests/playground/kind/cluster.yaml delete mode 100644 tests/playground/kind/create_cluster.sh delete mode 100644 tests/playground/ray_k8s_example_full.yaml diff --git a/Dockerfile_k8s b/Dockerfile_k8s index 3fc306f380b..42e97206a2f 100644 --- a/Dockerfile_k8s +++ b/Dockerfile_k8s @@ -1,17 +1,3 @@ -# On M1 Macs, use the following command to build the image: -# docker build --platform=linux/arm64 -t skypilot:latest -f Dockerfile_k8s ./sky -# For amd64, use the following command: -# docker build --platform=linux/amd64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky -# docker push us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -# Multiplatform build -# docker buildx build --platform=linux/arm64,linux/amd64 -t skypilot:latest -f Dockerfile_k8s ./sky - -# build both images -# docker buildx build --platform=linux/arm64,linux/amd64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky -# load M1 mac -# docker buildx build --load --platform linux/arm64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky && docker tag us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest skypilot:latest -# push both platforms as one image manifest list -# docker buildx build --push --platform linux/amd64,linux/arm64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky FROM continuumio/miniconda3:22.11.1 # Initialize conda for root user, install ssh and other local dependencies @@ -51,6 +37,7 @@ RUN echo 'export PATH="$PATH:$HOME/.local/bin"' >> ~/.bashrc # dependencies to optimize rebuild time COPY --chown=sky . /skypilot/sky/ +# TODO(romilb): Installing SkyPilot may not be necessary since ray up will do it RUN cd /skypilot/ && \ sudo mv -v sky/setup_files/* . && \ pip install ".[aws]" diff --git a/sky/authentication.py b/sky/authentication.py index c6381361069..74fb6d37869 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -462,7 +462,7 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: output = e.output.decode('utf-8') if 'already exists' in output: logger.warning( - f'Key {key_label} already exists in the cluster, continuing...') + f'Key {key_label} already exists in the cluster, using it...') pass else: raise e diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 44c0eec814f..65719f6ae64 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -1670,6 +1670,9 @@ def ray_up(): cluster_name = logging_info['cluster_name'] logger.info(f'{style.BRIGHT}Launching on local cluster ' f'{cluster_name!r}.') + elif isinstance(to_provision_cloud, clouds.Kubernetes): + logger.info(f'{style.BRIGHT}Launching on {to_provision_cloud} ' + f'{style.RESET_ALL}') else: logger.info(f'{style.BRIGHT}Launching on {to_provision_cloud} ' f'{region_name}{style.RESET_ALL}{zone_str}') diff --git a/sky/registry.py b/sky/registry.py deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 25a864dccee..e0302635060 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -239,11 +239,12 @@ def create_node(self, node_config, tags, count): self.namespace) if pod.status.phase == "Pending": # Check conditions for more detailed status - for condition in pod.status.conditions: - if condition.reason == 'ContainerCreating': - # Container is creating, so we can assume resources - # have been allocated. Safe to exit. - break + if pod.status.conditions is not None: + for condition in pod.status.conditions: + if condition.reason == 'ContainerCreating': + # Container is creating, so we can assume resources + # have been allocated. Safe to exit. + break else: # Pod is pending and not in 'ContainerCreating' state all_ready = False diff --git a/tests/kubernetes/README.md b/tests/kubernetes/README.md new file mode 100644 index 00000000000..f628796664d --- /dev/null +++ b/tests/kubernetes/README.md @@ -0,0 +1,31 @@ +# SkyPilot Kubernetes Development Scripts + +This directory contains useful scripts and notes for developing SkyPilot on Kubernetes. + +## Building and pushing SkyPilot image + +We maintain a container image that has all basic SkyPilot dependencies installed. +This image is hosted at `us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest`. + +To build this image locally and optionally push to the SkyPilot registry, run: +```bash +# Build and loaad image locally +./build.sh +# Build and push image (CAREFUL - this will push to the SkyPilot registry!) +./build.sh -p +``` + +## Running a local development cluster +You can use (kind)[https://kind.sigs.k8s.io/] to run a local Kubernetes cluster +for development. The following script will create a cluster with 1 node and +will make NodePort services available on localhost. + +```bash +cd kind +./create_cluster.sh +``` + +## Other useful scripts +`scripts` directory contains other useful scripts for development, including +Kubernetes dashboard, ray yaml for testing the SkyPilot Kubernetes node provider +and more. \ No newline at end of file diff --git a/tests/kubernetes/build_image.sh b/tests/kubernetes/build_image.sh new file mode 100644 index 00000000000..55df547f964 --- /dev/null +++ b/tests/kubernetes/build_image.sh @@ -0,0 +1,45 @@ +#!/bin/bash +# Builds the Dockerfile_k8s image as the SkyPilot image. +# Optionally, if -p is specified, pushes the image to the registry. +# Uses buildx to build the image for both amd64 and arm64. +# Usage: ./build_image.sh [-p] +# -p: Push the image to the registry + +TAG=us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest + +# Parse command line arguments +while getopts ":p" opt; do + case $opt in + p) + push=true + ;; + \?) + echo "Invalid option: -$OPTARG" >&2 + ;; + esac +done + +# Navigate to the root of the project (inferred from git) +cd "$(git rev-parse --show-toplevel)" + +# If push is used, build the image for both amd64 and arm64 +if [[ $push ]]; then + echo "Building and pushing for amd64 and arm64" + # Push both platforms as one image manifest list + docker buildx build --push --platform linux/amd64,linux/arm64 -t $TAG -f Dockerfile_k8s ./sky +fi + +# Load the right image depending on the architecture of the host machine (Apple Silicon or Intel) +if [[ $(uname -m) == "arm64" ]]; then + echo "Loading image for arm64 (Apple Silicon etc.)" + docker buildx build --load --platform linux/arm64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky +elif [[ $(uname -m) == "x86_64" ]]; then + echo "Building for amd64 (Intel CPUs)" + docker buildx build --load --platform linux/amd64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky +else + echo "Unsupported architecture: $(uname -m)" + exit 1 +fi + +echo "Tagging image as skypilot:latest" +docker tag us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest skypilot:latest \ No newline at end of file diff --git a/tests/kubernetes/kind/create_cluster.sh b/tests/kubernetes/kind/create_cluster.sh new file mode 100644 index 00000000000..9562191aed2 --- /dev/null +++ b/tests/kubernetes/kind/create_cluster.sh @@ -0,0 +1,11 @@ +# Be sure to have built the latest image before running this script +set -e +kind delete cluster +# If kind-cluster.yaml is not present, generate it +if [ ! -f kind-cluster.yaml ]; then + echo "Generating kind-cluster.yaml" + python portmap_gen.py +fi +kind create cluster --config kind-cluster.yaml +# Load local skypilot image on to the cluster for faster startup +kind load docker-image us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest diff --git a/tests/playground/kind/portmap_gen.py b/tests/kubernetes/kind/portmap_gen.py similarity index 69% rename from tests/playground/kind/portmap_gen.py rename to tests/kubernetes/kind/portmap_gen.py index 8e14b792860..a0d32b09ffe 100644 --- a/tests/playground/kind/portmap_gen.py +++ b/tests/kubernetes/kind/portmap_gen.py @@ -1,10 +1,12 @@ +# Generates a kind-cluster.yaml file with all ports mapped from host to container + preamble = """ apiVersion: kind.x-k8s.io/v1alpha4 kind: Cluster nodes: - role: control-plane extraPortMappings:""" -suffix = """- role: worker""" +suffix = "" # """- role: worker""" # Uncomment this line to add a worker node with open('kind-cluster.yaml', 'w') as f: f.write(preamble) for port in range(30000, 32768): diff --git a/tests/playground/clean_k8s.sh b/tests/kubernetes/scripts/clean_k8s.sh similarity index 100% rename from tests/playground/clean_k8s.sh rename to tests/kubernetes/scripts/clean_k8s.sh diff --git a/tests/kubernetes/scripts/dashboard.yaml b/tests/kubernetes/scripts/dashboard.yaml new file mode 100644 index 00000000000..80308368456 --- /dev/null +++ b/tests/kubernetes/scripts/dashboard.yaml @@ -0,0 +1,306 @@ +# Copyright 2017 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +apiVersion: v1 +kind: Namespace +metadata: + name: kubernetes-dashboard + +--- + +apiVersion: v1 +kind: ServiceAccount +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard + namespace: kubernetes-dashboard + +--- + +kind: Service +apiVersion: v1 +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard + namespace: kubernetes-dashboard +spec: + ports: + - port: 443 + targetPort: 8443 + selector: + k8s-app: kubernetes-dashboard + +--- + +apiVersion: v1 +kind: Secret +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard-certs + namespace: kubernetes-dashboard +type: Opaque + +--- + +apiVersion: v1 +kind: Secret +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard-csrf + namespace: kubernetes-dashboard +type: Opaque +data: + csrf: "" + +--- + +apiVersion: v1 +kind: Secret +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard-key-holder + namespace: kubernetes-dashboard +type: Opaque + +--- + +kind: ConfigMap +apiVersion: v1 +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard-settings + namespace: kubernetes-dashboard + +--- + +kind: Role +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard + namespace: kubernetes-dashboard +rules: + # Allow Dashboard to get, update and delete Dashboard exclusive secrets. + - apiGroups: [""] + resources: ["secrets"] + resourceNames: ["kubernetes-dashboard-key-holder", "kubernetes-dashboard-certs", "kubernetes-dashboard-csrf"] + verbs: ["get", "update", "delete"] + # Allow Dashboard to get and update 'kubernetes-dashboard-settings' config map. + - apiGroups: [""] + resources: ["configmaps"] + resourceNames: ["kubernetes-dashboard-settings"] + verbs: ["get", "update"] + # Allow Dashboard to get metrics. + - apiGroups: [""] + resources: ["services"] + resourceNames: ["heapster", "dashboard-metrics-scraper"] + verbs: ["proxy"] + - apiGroups: [""] + resources: ["services/proxy"] + resourceNames: ["heapster", "http:heapster:", "https:heapster:", "dashboard-metrics-scraper", "http:dashboard-metrics-scraper"] + verbs: ["get"] + +--- + +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard +rules: + # Allow Metrics Scraper to get metrics from the Metrics server + - apiGroups: ["metrics.k8s.io"] + resources: ["pods", "nodes"] + verbs: ["get", "list", "watch"] + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard + namespace: kubernetes-dashboard +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: kubernetes-dashboard +subjects: + - kind: ServiceAccount + name: kubernetes-dashboard + namespace: kubernetes-dashboard + +--- + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kubernetes-dashboard + labels: + k8s-app: kubernetes-dashboard +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: cluster-admin +subjects: +- kind: ServiceAccount + name: kubernetes-dashboard + namespace: kubernetes-dashboard + +--- + +kind: Deployment +apiVersion: apps/v1 +metadata: + labels: + k8s-app: kubernetes-dashboard + name: kubernetes-dashboard + namespace: kubernetes-dashboard +spec: + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + k8s-app: kubernetes-dashboard + template: + metadata: + labels: + k8s-app: kubernetes-dashboard + spec: + containers: + - name: kubernetes-dashboard + image: kubernetesui/dashboard:v2.3.1 + imagePullPolicy: Always + ports: + - containerPort: 8443 + protocol: TCP + args: + - --enable-skip-login + - --disable-settings-authorizer + - --auto-generate-certificates + - --namespace=kubernetes-dashboard + # Uncomment the following line to manually specify Kubernetes API server Host + # If not specified, Dashboard will attempt to auto discover the API server and connect + # to it. Uncomment only if the default does not work. + # - --apiserver-host=http://my-address:port + volumeMounts: + - name: kubernetes-dashboard-certs + mountPath: /certs + # Create on-disk volume to store exec logs + - mountPath: /tmp + name: tmp-volume + livenessProbe: + httpGet: + scheme: HTTPS + path: / + port: 8443 + initialDelaySeconds: 30 + timeoutSeconds: 30 + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsUser: 1001 + runAsGroup: 2001 + volumes: + - name: kubernetes-dashboard-certs + secret: + secretName: kubernetes-dashboard-certs + - name: tmp-volume + emptyDir: {} + serviceAccountName: kubernetes-dashboard + nodeSelector: + "kubernetes.io/os": linux + # Comment the following tolerations if Dashboard must not be deployed on master + tolerations: + - key: node-role.kubernetes.io/master + effect: NoSchedule + +--- + +kind: Service +apiVersion: v1 +metadata: + labels: + k8s-app: dashboard-metrics-scraper + name: dashboard-metrics-scraper + namespace: kubernetes-dashboard +spec: + ports: + - port: 8000 + targetPort: 8000 + selector: + k8s-app: dashboard-metrics-scraper + +--- + +kind: Deployment +apiVersion: apps/v1 +metadata: + labels: + k8s-app: dashboard-metrics-scraper + name: dashboard-metrics-scraper + namespace: kubernetes-dashboard +spec: + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + k8s-app: dashboard-metrics-scraper + template: + metadata: + labels: + k8s-app: dashboard-metrics-scraper + annotations: + seccomp.security.alpha.kubernetes.io/pod: 'runtime/default' + spec: + containers: + - name: dashboard-metrics-scraper + image: kubernetesui/metrics-scraper:v1.0.6 + ports: + - containerPort: 8000 + protocol: TCP + livenessProbe: + httpGet: + scheme: HTTP + path: / + port: 8000 + initialDelaySeconds: 30 + timeoutSeconds: 30 + volumeMounts: + - mountPath: /tmp + name: tmp-volume + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: true + runAsUser: 1001 + runAsGroup: 2001 + serviceAccountName: kubernetes-dashboard + nodeSelector: + "kubernetes.io/os": linux + # Comment the following tolerations if Dashboard must not be deployed on master + tolerations: + - key: node-role.kubernetes.io/master + effect: NoSchedule + volumes: + - name: tmp-volume + emptyDir: {} \ No newline at end of file diff --git a/tests/playground/deployment/delete.sh b/tests/kubernetes/scripts/delete.sh similarity index 100% rename from tests/playground/deployment/delete.sh rename to tests/kubernetes/scripts/delete.sh diff --git a/tests/playground/deployment/install_dashboard.sh b/tests/kubernetes/scripts/install_dashboard.sh similarity index 100% rename from tests/playground/deployment/install_dashboard.sh rename to tests/kubernetes/scripts/install_dashboard.sh diff --git a/tests/playground/ray_k8s_sky.yaml b/tests/kubernetes/scripts/ray_k8s_sky.yaml similarity index 100% rename from tests/playground/ray_k8s_sky.yaml rename to tests/kubernetes/scripts/ray_k8s_sky.yaml diff --git a/tests/playground/deployment/run.sh b/tests/kubernetes/scripts/run.sh similarity index 100% rename from tests/playground/deployment/run.sh rename to tests/kubernetes/scripts/run.sh diff --git a/tests/playground/deployment/skypilot_ssh_k8s_deployment.yaml b/tests/kubernetes/scripts/skypilot_ssh_k8s_deployment.yaml similarity index 100% rename from tests/playground/deployment/skypilot_ssh_k8s_deployment.yaml rename to tests/kubernetes/scripts/skypilot_ssh_k8s_deployment.yaml diff --git a/tests/playground/kind/cluster.yaml b/tests/playground/kind/cluster.yaml deleted file mode 100644 index cbeb04829cc..00000000000 --- a/tests/playground/kind/cluster.yaml +++ /dev/null @@ -1,11077 +0,0 @@ -apiVersion: kind.x-k8s.io/v1alpha4 -kind: Cluster -nodes: -- role: control-plane - extraPortMappings: - - containerPort: 30000 - hostPort: 30000 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30001 - hostPort: 30001 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30002 - hostPort: 30002 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30003 - hostPort: 30003 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30004 - hostPort: 30004 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30005 - hostPort: 30005 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30006 - hostPort: 30006 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30007 - hostPort: 30007 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30008 - hostPort: 30008 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30009 - hostPort: 30009 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30010 - hostPort: 30010 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30011 - hostPort: 30011 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30012 - hostPort: 30012 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30013 - hostPort: 30013 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30014 - hostPort: 30014 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30015 - hostPort: 30015 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30016 - hostPort: 30016 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30017 - hostPort: 30017 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30018 - hostPort: 30018 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30019 - hostPort: 30019 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30020 - hostPort: 30020 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30021 - hostPort: 30021 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30022 - hostPort: 30022 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30023 - hostPort: 30023 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30024 - hostPort: 30024 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30025 - hostPort: 30025 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30026 - hostPort: 30026 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30027 - hostPort: 30027 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30028 - hostPort: 30028 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30029 - hostPort: 30029 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30030 - hostPort: 30030 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30031 - hostPort: 30031 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30032 - hostPort: 30032 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30033 - hostPort: 30033 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30034 - hostPort: 30034 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30035 - hostPort: 30035 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30036 - hostPort: 30036 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30037 - hostPort: 30037 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30038 - hostPort: 30038 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30039 - hostPort: 30039 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30040 - hostPort: 30040 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30041 - hostPort: 30041 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30042 - hostPort: 30042 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30043 - hostPort: 30043 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30044 - hostPort: 30044 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30045 - hostPort: 30045 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30046 - hostPort: 30046 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30047 - hostPort: 30047 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30048 - hostPort: 30048 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30049 - hostPort: 30049 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30050 - hostPort: 30050 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30051 - hostPort: 30051 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30052 - hostPort: 30052 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30053 - hostPort: 30053 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30054 - hostPort: 30054 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30055 - hostPort: 30055 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30056 - hostPort: 30056 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30057 - hostPort: 30057 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30058 - hostPort: 30058 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30059 - hostPort: 30059 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30060 - hostPort: 30060 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30061 - hostPort: 30061 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30062 - hostPort: 30062 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30063 - hostPort: 30063 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30064 - hostPort: 30064 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30065 - hostPort: 30065 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30066 - hostPort: 30066 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30067 - hostPort: 30067 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30068 - hostPort: 30068 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30069 - hostPort: 30069 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30070 - hostPort: 30070 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30071 - hostPort: 30071 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30072 - hostPort: 30072 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30073 - hostPort: 30073 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30074 - hostPort: 30074 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30075 - hostPort: 30075 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30076 - hostPort: 30076 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30077 - hostPort: 30077 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30078 - hostPort: 30078 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30079 - hostPort: 30079 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30080 - hostPort: 30080 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30081 - hostPort: 30081 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30082 - hostPort: 30082 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30083 - hostPort: 30083 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30084 - hostPort: 30084 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30085 - hostPort: 30085 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30086 - hostPort: 30086 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30087 - hostPort: 30087 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30088 - hostPort: 30088 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30089 - hostPort: 30089 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30090 - hostPort: 30090 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30091 - hostPort: 30091 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30092 - hostPort: 30092 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30093 - hostPort: 30093 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30094 - hostPort: 30094 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30095 - hostPort: 30095 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30096 - hostPort: 30096 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30097 - hostPort: 30097 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30098 - hostPort: 30098 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30099 - hostPort: 30099 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30100 - hostPort: 30100 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30101 - hostPort: 30101 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30102 - hostPort: 30102 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30103 - hostPort: 30103 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30104 - hostPort: 30104 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30105 - hostPort: 30105 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30106 - hostPort: 30106 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30107 - hostPort: 30107 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30108 - hostPort: 30108 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30109 - hostPort: 30109 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30110 - hostPort: 30110 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30111 - hostPort: 30111 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30112 - hostPort: 30112 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30113 - hostPort: 30113 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30114 - hostPort: 30114 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30115 - hostPort: 30115 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30116 - hostPort: 30116 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30117 - hostPort: 30117 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30118 - hostPort: 30118 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30119 - hostPort: 30119 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30120 - hostPort: 30120 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30121 - hostPort: 30121 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30122 - hostPort: 30122 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30123 - hostPort: 30123 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30124 - hostPort: 30124 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30125 - hostPort: 30125 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30126 - hostPort: 30126 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30127 - hostPort: 30127 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30128 - hostPort: 30128 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30129 - hostPort: 30129 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30130 - hostPort: 30130 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30131 - hostPort: 30131 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30132 - hostPort: 30132 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30133 - hostPort: 30133 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30134 - hostPort: 30134 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30135 - hostPort: 30135 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30136 - hostPort: 30136 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30137 - hostPort: 30137 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30138 - hostPort: 30138 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30139 - hostPort: 30139 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30140 - hostPort: 30140 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30141 - hostPort: 30141 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30142 - hostPort: 30142 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30143 - hostPort: 30143 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30144 - hostPort: 30144 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30145 - hostPort: 30145 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30146 - hostPort: 30146 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30147 - hostPort: 30147 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30148 - hostPort: 30148 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30149 - hostPort: 30149 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30150 - hostPort: 30150 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30151 - hostPort: 30151 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30152 - hostPort: 30152 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30153 - hostPort: 30153 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30154 - hostPort: 30154 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30155 - hostPort: 30155 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30156 - hostPort: 30156 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30157 - hostPort: 30157 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30158 - hostPort: 30158 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30159 - hostPort: 30159 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30160 - hostPort: 30160 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30161 - hostPort: 30161 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30162 - hostPort: 30162 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30163 - hostPort: 30163 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30164 - hostPort: 30164 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30165 - hostPort: 30165 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30166 - hostPort: 30166 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30167 - hostPort: 30167 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30168 - hostPort: 30168 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30169 - hostPort: 30169 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30170 - hostPort: 30170 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30171 - hostPort: 30171 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30172 - hostPort: 30172 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30173 - hostPort: 30173 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30174 - hostPort: 30174 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30175 - hostPort: 30175 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30176 - hostPort: 30176 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30177 - hostPort: 30177 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30178 - hostPort: 30178 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30179 - hostPort: 30179 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30180 - hostPort: 30180 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30181 - hostPort: 30181 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30182 - hostPort: 30182 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30183 - hostPort: 30183 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30184 - hostPort: 30184 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30185 - hostPort: 30185 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30186 - hostPort: 30186 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30187 - hostPort: 30187 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30188 - hostPort: 30188 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30189 - hostPort: 30189 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30190 - hostPort: 30190 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30191 - hostPort: 30191 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30192 - hostPort: 30192 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30193 - hostPort: 30193 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30194 - hostPort: 30194 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30195 - hostPort: 30195 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30196 - hostPort: 30196 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30197 - hostPort: 30197 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30198 - hostPort: 30198 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30199 - hostPort: 30199 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30200 - hostPort: 30200 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30201 - hostPort: 30201 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30202 - hostPort: 30202 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30203 - hostPort: 30203 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30204 - hostPort: 30204 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30205 - hostPort: 30205 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30206 - hostPort: 30206 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30207 - hostPort: 30207 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30208 - hostPort: 30208 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30209 - hostPort: 30209 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30210 - hostPort: 30210 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30211 - hostPort: 30211 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30212 - hostPort: 30212 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30213 - hostPort: 30213 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30214 - hostPort: 30214 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30215 - hostPort: 30215 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30216 - hostPort: 30216 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30217 - hostPort: 30217 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30218 - hostPort: 30218 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30219 - hostPort: 30219 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30220 - hostPort: 30220 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30221 - hostPort: 30221 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30222 - hostPort: 30222 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30223 - hostPort: 30223 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30224 - hostPort: 30224 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30225 - hostPort: 30225 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30226 - hostPort: 30226 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30227 - hostPort: 30227 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30228 - hostPort: 30228 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30229 - hostPort: 30229 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30230 - hostPort: 30230 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30231 - hostPort: 30231 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30232 - hostPort: 30232 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30233 - hostPort: 30233 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30234 - hostPort: 30234 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30235 - hostPort: 30235 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30236 - hostPort: 30236 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30237 - hostPort: 30237 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30238 - hostPort: 30238 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30239 - hostPort: 30239 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30240 - hostPort: 30240 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30241 - hostPort: 30241 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30242 - hostPort: 30242 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30243 - hostPort: 30243 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30244 - hostPort: 30244 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30245 - hostPort: 30245 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30246 - hostPort: 30246 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30247 - hostPort: 30247 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30248 - hostPort: 30248 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30249 - hostPort: 30249 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30250 - hostPort: 30250 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30251 - hostPort: 30251 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30252 - hostPort: 30252 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30253 - hostPort: 30253 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30254 - hostPort: 30254 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30255 - hostPort: 30255 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30256 - hostPort: 30256 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30257 - hostPort: 30257 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30258 - hostPort: 30258 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30259 - hostPort: 30259 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30260 - hostPort: 30260 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30261 - hostPort: 30261 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30262 - hostPort: 30262 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30263 - hostPort: 30263 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30264 - hostPort: 30264 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30265 - hostPort: 30265 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30266 - hostPort: 30266 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30267 - hostPort: 30267 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30268 - hostPort: 30268 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30269 - hostPort: 30269 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30270 - hostPort: 30270 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30271 - hostPort: 30271 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30272 - hostPort: 30272 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30273 - hostPort: 30273 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30274 - hostPort: 30274 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30275 - hostPort: 30275 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30276 - hostPort: 30276 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30277 - hostPort: 30277 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30278 - hostPort: 30278 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30279 - hostPort: 30279 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30280 - hostPort: 30280 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30281 - hostPort: 30281 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30282 - hostPort: 30282 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30283 - hostPort: 30283 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30284 - hostPort: 30284 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30285 - hostPort: 30285 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30286 - hostPort: 30286 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30287 - hostPort: 30287 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30288 - hostPort: 30288 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30289 - hostPort: 30289 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30290 - hostPort: 30290 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30291 - hostPort: 30291 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30292 - hostPort: 30292 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30293 - hostPort: 30293 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30294 - hostPort: 30294 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30295 - hostPort: 30295 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30296 - hostPort: 30296 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30297 - hostPort: 30297 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30298 - hostPort: 30298 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30299 - hostPort: 30299 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30300 - hostPort: 30300 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30301 - hostPort: 30301 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30302 - hostPort: 30302 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30303 - hostPort: 30303 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30304 - hostPort: 30304 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30305 - hostPort: 30305 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30306 - hostPort: 30306 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30307 - hostPort: 30307 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30308 - hostPort: 30308 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30309 - hostPort: 30309 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30310 - hostPort: 30310 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30311 - hostPort: 30311 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30312 - hostPort: 30312 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30313 - hostPort: 30313 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30314 - hostPort: 30314 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30315 - hostPort: 30315 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30316 - hostPort: 30316 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30317 - hostPort: 30317 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30318 - hostPort: 30318 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30319 - hostPort: 30319 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30320 - hostPort: 30320 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30321 - hostPort: 30321 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30322 - hostPort: 30322 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30323 - hostPort: 30323 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30324 - hostPort: 30324 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30325 - hostPort: 30325 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30326 - hostPort: 30326 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30327 - hostPort: 30327 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30328 - hostPort: 30328 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30329 - hostPort: 30329 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30330 - hostPort: 30330 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30331 - hostPort: 30331 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30332 - hostPort: 30332 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30333 - hostPort: 30333 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30334 - hostPort: 30334 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30335 - hostPort: 30335 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30336 - hostPort: 30336 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30337 - hostPort: 30337 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30338 - hostPort: 30338 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30339 - hostPort: 30339 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30340 - hostPort: 30340 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30341 - hostPort: 30341 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30342 - hostPort: 30342 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30343 - hostPort: 30343 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30344 - hostPort: 30344 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30345 - hostPort: 30345 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30346 - hostPort: 30346 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30347 - hostPort: 30347 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30348 - hostPort: 30348 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30349 - hostPort: 30349 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30350 - hostPort: 30350 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30351 - hostPort: 30351 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30352 - hostPort: 30352 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30353 - hostPort: 30353 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30354 - hostPort: 30354 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30355 - hostPort: 30355 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30356 - hostPort: 30356 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30357 - hostPort: 30357 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30358 - hostPort: 30358 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30359 - hostPort: 30359 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30360 - hostPort: 30360 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30361 - hostPort: 30361 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30362 - hostPort: 30362 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30363 - hostPort: 30363 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30364 - hostPort: 30364 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30365 - hostPort: 30365 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30366 - hostPort: 30366 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30367 - hostPort: 30367 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30368 - hostPort: 30368 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30369 - hostPort: 30369 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30370 - hostPort: 30370 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30371 - hostPort: 30371 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30372 - hostPort: 30372 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30373 - hostPort: 30373 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30374 - hostPort: 30374 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30375 - hostPort: 30375 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30376 - hostPort: 30376 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30377 - hostPort: 30377 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30378 - hostPort: 30378 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30379 - hostPort: 30379 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30380 - hostPort: 30380 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30381 - hostPort: 30381 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30382 - hostPort: 30382 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30383 - hostPort: 30383 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30384 - hostPort: 30384 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30385 - hostPort: 30385 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30386 - hostPort: 30386 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30387 - hostPort: 30387 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30388 - hostPort: 30388 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30389 - hostPort: 30389 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30390 - hostPort: 30390 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30391 - hostPort: 30391 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30392 - hostPort: 30392 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30393 - hostPort: 30393 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30394 - hostPort: 30394 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30395 - hostPort: 30395 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30396 - hostPort: 30396 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30397 - hostPort: 30397 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30398 - hostPort: 30398 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30399 - hostPort: 30399 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30400 - hostPort: 30400 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30401 - hostPort: 30401 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30402 - hostPort: 30402 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30403 - hostPort: 30403 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30404 - hostPort: 30404 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30405 - hostPort: 30405 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30406 - hostPort: 30406 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30407 - hostPort: 30407 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30408 - hostPort: 30408 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30409 - hostPort: 30409 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30410 - hostPort: 30410 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30411 - hostPort: 30411 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30412 - hostPort: 30412 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30413 - hostPort: 30413 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30414 - hostPort: 30414 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30415 - hostPort: 30415 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30416 - hostPort: 30416 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30417 - hostPort: 30417 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30418 - hostPort: 30418 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30419 - hostPort: 30419 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30420 - hostPort: 30420 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30421 - hostPort: 30421 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30422 - hostPort: 30422 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30423 - hostPort: 30423 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30424 - hostPort: 30424 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30425 - hostPort: 30425 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30426 - hostPort: 30426 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30427 - hostPort: 30427 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30428 - hostPort: 30428 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30429 - hostPort: 30429 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30430 - hostPort: 30430 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30431 - hostPort: 30431 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30432 - hostPort: 30432 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30433 - hostPort: 30433 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30434 - hostPort: 30434 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30435 - hostPort: 30435 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30436 - hostPort: 30436 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30437 - hostPort: 30437 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30438 - hostPort: 30438 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30439 - hostPort: 30439 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30440 - hostPort: 30440 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30441 - hostPort: 30441 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30442 - hostPort: 30442 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30443 - hostPort: 30443 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30444 - hostPort: 30444 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30445 - hostPort: 30445 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30446 - hostPort: 30446 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30447 - hostPort: 30447 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30448 - hostPort: 30448 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30449 - hostPort: 30449 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30450 - hostPort: 30450 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30451 - hostPort: 30451 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30452 - hostPort: 30452 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30453 - hostPort: 30453 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30454 - hostPort: 30454 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30455 - hostPort: 30455 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30456 - hostPort: 30456 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30457 - hostPort: 30457 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30458 - hostPort: 30458 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30459 - hostPort: 30459 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30460 - hostPort: 30460 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30461 - hostPort: 30461 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30462 - hostPort: 30462 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30463 - hostPort: 30463 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30464 - hostPort: 30464 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30465 - hostPort: 30465 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30466 - hostPort: 30466 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30467 - hostPort: 30467 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30468 - hostPort: 30468 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30469 - hostPort: 30469 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30470 - hostPort: 30470 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30471 - hostPort: 30471 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30472 - hostPort: 30472 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30473 - hostPort: 30473 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30474 - hostPort: 30474 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30475 - hostPort: 30475 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30476 - hostPort: 30476 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30477 - hostPort: 30477 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30478 - hostPort: 30478 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30479 - hostPort: 30479 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30480 - hostPort: 30480 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30481 - hostPort: 30481 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30482 - hostPort: 30482 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30483 - hostPort: 30483 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30484 - hostPort: 30484 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30485 - hostPort: 30485 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30486 - hostPort: 30486 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30487 - hostPort: 30487 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30488 - hostPort: 30488 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30489 - hostPort: 30489 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30490 - hostPort: 30490 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30491 - hostPort: 30491 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30492 - hostPort: 30492 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30493 - hostPort: 30493 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30494 - hostPort: 30494 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30495 - hostPort: 30495 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30496 - hostPort: 30496 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30497 - hostPort: 30497 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30498 - hostPort: 30498 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30499 - hostPort: 30499 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30500 - hostPort: 30500 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30501 - hostPort: 30501 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30502 - hostPort: 30502 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30503 - hostPort: 30503 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30504 - hostPort: 30504 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30505 - hostPort: 30505 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30506 - hostPort: 30506 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30507 - hostPort: 30507 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30508 - hostPort: 30508 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30509 - hostPort: 30509 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30510 - hostPort: 30510 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30511 - hostPort: 30511 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30512 - hostPort: 30512 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30513 - hostPort: 30513 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30514 - hostPort: 30514 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30515 - hostPort: 30515 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30516 - hostPort: 30516 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30517 - hostPort: 30517 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30518 - hostPort: 30518 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30519 - hostPort: 30519 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30520 - hostPort: 30520 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30521 - hostPort: 30521 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30522 - hostPort: 30522 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30523 - hostPort: 30523 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30524 - hostPort: 30524 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30525 - hostPort: 30525 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30526 - hostPort: 30526 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30527 - hostPort: 30527 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30528 - hostPort: 30528 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30529 - hostPort: 30529 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30530 - hostPort: 30530 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30531 - hostPort: 30531 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30532 - hostPort: 30532 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30533 - hostPort: 30533 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30534 - hostPort: 30534 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30535 - hostPort: 30535 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30536 - hostPort: 30536 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30537 - hostPort: 30537 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30538 - hostPort: 30538 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30539 - hostPort: 30539 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30540 - hostPort: 30540 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30541 - hostPort: 30541 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30542 - hostPort: 30542 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30543 - hostPort: 30543 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30544 - hostPort: 30544 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30545 - hostPort: 30545 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30546 - hostPort: 30546 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30547 - hostPort: 30547 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30548 - hostPort: 30548 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30549 - hostPort: 30549 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30550 - hostPort: 30550 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30551 - hostPort: 30551 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30552 - hostPort: 30552 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30553 - hostPort: 30553 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30554 - hostPort: 30554 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30555 - hostPort: 30555 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30556 - hostPort: 30556 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30557 - hostPort: 30557 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30558 - hostPort: 30558 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30559 - hostPort: 30559 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30560 - hostPort: 30560 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30561 - hostPort: 30561 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30562 - hostPort: 30562 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30563 - hostPort: 30563 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30564 - hostPort: 30564 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30565 - hostPort: 30565 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30566 - hostPort: 30566 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30567 - hostPort: 30567 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30568 - hostPort: 30568 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30569 - hostPort: 30569 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30570 - hostPort: 30570 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30571 - hostPort: 30571 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30572 - hostPort: 30572 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30573 - hostPort: 30573 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30574 - hostPort: 30574 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30575 - hostPort: 30575 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30576 - hostPort: 30576 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30577 - hostPort: 30577 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30578 - hostPort: 30578 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30579 - hostPort: 30579 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30580 - hostPort: 30580 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30581 - hostPort: 30581 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30582 - hostPort: 30582 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30583 - hostPort: 30583 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30584 - hostPort: 30584 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30585 - hostPort: 30585 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30586 - hostPort: 30586 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30587 - hostPort: 30587 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30588 - hostPort: 30588 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30589 - hostPort: 30589 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30590 - hostPort: 30590 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30591 - hostPort: 30591 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30592 - hostPort: 30592 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30593 - hostPort: 30593 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30594 - hostPort: 30594 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30595 - hostPort: 30595 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30596 - hostPort: 30596 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30597 - hostPort: 30597 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30598 - hostPort: 30598 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30599 - hostPort: 30599 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30600 - hostPort: 30600 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30601 - hostPort: 30601 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30602 - hostPort: 30602 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30603 - hostPort: 30603 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30604 - hostPort: 30604 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30605 - hostPort: 30605 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30606 - hostPort: 30606 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30607 - hostPort: 30607 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30608 - hostPort: 30608 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30609 - hostPort: 30609 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30610 - hostPort: 30610 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30611 - hostPort: 30611 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30612 - hostPort: 30612 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30613 - hostPort: 30613 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30614 - hostPort: 30614 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30615 - hostPort: 30615 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30616 - hostPort: 30616 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30617 - hostPort: 30617 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30618 - hostPort: 30618 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30619 - hostPort: 30619 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30620 - hostPort: 30620 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30621 - hostPort: 30621 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30622 - hostPort: 30622 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30623 - hostPort: 30623 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30624 - hostPort: 30624 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30625 - hostPort: 30625 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30626 - hostPort: 30626 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30627 - hostPort: 30627 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30628 - hostPort: 30628 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30629 - hostPort: 30629 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30630 - hostPort: 30630 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30631 - hostPort: 30631 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30632 - hostPort: 30632 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30633 - hostPort: 30633 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30634 - hostPort: 30634 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30635 - hostPort: 30635 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30636 - hostPort: 30636 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30637 - hostPort: 30637 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30638 - hostPort: 30638 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30639 - hostPort: 30639 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30640 - hostPort: 30640 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30641 - hostPort: 30641 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30642 - hostPort: 30642 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30643 - hostPort: 30643 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30644 - hostPort: 30644 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30645 - hostPort: 30645 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30646 - hostPort: 30646 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30647 - hostPort: 30647 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30648 - hostPort: 30648 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30649 - hostPort: 30649 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30650 - hostPort: 30650 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30651 - hostPort: 30651 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30652 - hostPort: 30652 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30653 - hostPort: 30653 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30654 - hostPort: 30654 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30655 - hostPort: 30655 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30656 - hostPort: 30656 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30657 - hostPort: 30657 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30658 - hostPort: 30658 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30659 - hostPort: 30659 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30660 - hostPort: 30660 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30661 - hostPort: 30661 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30662 - hostPort: 30662 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30663 - hostPort: 30663 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30664 - hostPort: 30664 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30665 - hostPort: 30665 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30666 - hostPort: 30666 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30667 - hostPort: 30667 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30668 - hostPort: 30668 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30669 - hostPort: 30669 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30670 - hostPort: 30670 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30671 - hostPort: 30671 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30672 - hostPort: 30672 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30673 - hostPort: 30673 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30674 - hostPort: 30674 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30675 - hostPort: 30675 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30676 - hostPort: 30676 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30677 - hostPort: 30677 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30678 - hostPort: 30678 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30679 - hostPort: 30679 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30680 - hostPort: 30680 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30681 - hostPort: 30681 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30682 - hostPort: 30682 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30683 - hostPort: 30683 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30684 - hostPort: 30684 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30685 - hostPort: 30685 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30686 - hostPort: 30686 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30687 - hostPort: 30687 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30688 - hostPort: 30688 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30689 - hostPort: 30689 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30690 - hostPort: 30690 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30691 - hostPort: 30691 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30692 - hostPort: 30692 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30693 - hostPort: 30693 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30694 - hostPort: 30694 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30695 - hostPort: 30695 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30696 - hostPort: 30696 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30697 - hostPort: 30697 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30698 - hostPort: 30698 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30699 - hostPort: 30699 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30700 - hostPort: 30700 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30701 - hostPort: 30701 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30702 - hostPort: 30702 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30703 - hostPort: 30703 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30704 - hostPort: 30704 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30705 - hostPort: 30705 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30706 - hostPort: 30706 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30707 - hostPort: 30707 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30708 - hostPort: 30708 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30709 - hostPort: 30709 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30710 - hostPort: 30710 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30711 - hostPort: 30711 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30712 - hostPort: 30712 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30713 - hostPort: 30713 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30714 - hostPort: 30714 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30715 - hostPort: 30715 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30716 - hostPort: 30716 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30717 - hostPort: 30717 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30718 - hostPort: 30718 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30719 - hostPort: 30719 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30720 - hostPort: 30720 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30721 - hostPort: 30721 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30722 - hostPort: 30722 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30723 - hostPort: 30723 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30724 - hostPort: 30724 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30725 - hostPort: 30725 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30726 - hostPort: 30726 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30727 - hostPort: 30727 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30728 - hostPort: 30728 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30729 - hostPort: 30729 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30730 - hostPort: 30730 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30731 - hostPort: 30731 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30732 - hostPort: 30732 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30733 - hostPort: 30733 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30734 - hostPort: 30734 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30735 - hostPort: 30735 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30736 - hostPort: 30736 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30737 - hostPort: 30737 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30738 - hostPort: 30738 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30739 - hostPort: 30739 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30740 - hostPort: 30740 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30741 - hostPort: 30741 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30742 - hostPort: 30742 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30743 - hostPort: 30743 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30744 - hostPort: 30744 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30745 - hostPort: 30745 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30746 - hostPort: 30746 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30747 - hostPort: 30747 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30748 - hostPort: 30748 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30749 - hostPort: 30749 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30750 - hostPort: 30750 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30751 - hostPort: 30751 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30752 - hostPort: 30752 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30753 - hostPort: 30753 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30754 - hostPort: 30754 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30755 - hostPort: 30755 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30756 - hostPort: 30756 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30757 - hostPort: 30757 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30758 - hostPort: 30758 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30759 - hostPort: 30759 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30760 - hostPort: 30760 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30761 - hostPort: 30761 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30762 - hostPort: 30762 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30763 - hostPort: 30763 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30764 - hostPort: 30764 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30765 - hostPort: 30765 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30766 - hostPort: 30766 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30767 - hostPort: 30767 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30768 - hostPort: 30768 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30769 - hostPort: 30769 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30770 - hostPort: 30770 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30771 - hostPort: 30771 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30772 - hostPort: 30772 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30773 - hostPort: 30773 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30774 - hostPort: 30774 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30775 - hostPort: 30775 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30776 - hostPort: 30776 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30777 - hostPort: 30777 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30778 - hostPort: 30778 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30779 - hostPort: 30779 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30780 - hostPort: 30780 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30781 - hostPort: 30781 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30782 - hostPort: 30782 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30783 - hostPort: 30783 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30784 - hostPort: 30784 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30785 - hostPort: 30785 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30786 - hostPort: 30786 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30787 - hostPort: 30787 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30788 - hostPort: 30788 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30789 - hostPort: 30789 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30790 - hostPort: 30790 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30791 - hostPort: 30791 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30792 - hostPort: 30792 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30793 - hostPort: 30793 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30794 - hostPort: 30794 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30795 - hostPort: 30795 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30796 - hostPort: 30796 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30797 - hostPort: 30797 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30798 - hostPort: 30798 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30799 - hostPort: 30799 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30800 - hostPort: 30800 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30801 - hostPort: 30801 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30802 - hostPort: 30802 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30803 - hostPort: 30803 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30804 - hostPort: 30804 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30805 - hostPort: 30805 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30806 - hostPort: 30806 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30807 - hostPort: 30807 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30808 - hostPort: 30808 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30809 - hostPort: 30809 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30810 - hostPort: 30810 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30811 - hostPort: 30811 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30812 - hostPort: 30812 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30813 - hostPort: 30813 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30814 - hostPort: 30814 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30815 - hostPort: 30815 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30816 - hostPort: 30816 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30817 - hostPort: 30817 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30818 - hostPort: 30818 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30819 - hostPort: 30819 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30820 - hostPort: 30820 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30821 - hostPort: 30821 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30822 - hostPort: 30822 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30823 - hostPort: 30823 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30824 - hostPort: 30824 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30825 - hostPort: 30825 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30826 - hostPort: 30826 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30827 - hostPort: 30827 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30828 - hostPort: 30828 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30829 - hostPort: 30829 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30830 - hostPort: 30830 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30831 - hostPort: 30831 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30832 - hostPort: 30832 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30833 - hostPort: 30833 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30834 - hostPort: 30834 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30835 - hostPort: 30835 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30836 - hostPort: 30836 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30837 - hostPort: 30837 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30838 - hostPort: 30838 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30839 - hostPort: 30839 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30840 - hostPort: 30840 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30841 - hostPort: 30841 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30842 - hostPort: 30842 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30843 - hostPort: 30843 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30844 - hostPort: 30844 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30845 - hostPort: 30845 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30846 - hostPort: 30846 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30847 - hostPort: 30847 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30848 - hostPort: 30848 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30849 - hostPort: 30849 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30850 - hostPort: 30850 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30851 - hostPort: 30851 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30852 - hostPort: 30852 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30853 - hostPort: 30853 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30854 - hostPort: 30854 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30855 - hostPort: 30855 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30856 - hostPort: 30856 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30857 - hostPort: 30857 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30858 - hostPort: 30858 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30859 - hostPort: 30859 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30860 - hostPort: 30860 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30861 - hostPort: 30861 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30862 - hostPort: 30862 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30863 - hostPort: 30863 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30864 - hostPort: 30864 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30865 - hostPort: 30865 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30866 - hostPort: 30866 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30867 - hostPort: 30867 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30868 - hostPort: 30868 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30869 - hostPort: 30869 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30870 - hostPort: 30870 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30871 - hostPort: 30871 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30872 - hostPort: 30872 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30873 - hostPort: 30873 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30874 - hostPort: 30874 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30875 - hostPort: 30875 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30876 - hostPort: 30876 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30877 - hostPort: 30877 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30878 - hostPort: 30878 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30879 - hostPort: 30879 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30880 - hostPort: 30880 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30881 - hostPort: 30881 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30882 - hostPort: 30882 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30883 - hostPort: 30883 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30884 - hostPort: 30884 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30885 - hostPort: 30885 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30886 - hostPort: 30886 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30887 - hostPort: 30887 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30888 - hostPort: 30888 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30889 - hostPort: 30889 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30890 - hostPort: 30890 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30891 - hostPort: 30891 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30892 - hostPort: 30892 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30893 - hostPort: 30893 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30894 - hostPort: 30894 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30895 - hostPort: 30895 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30896 - hostPort: 30896 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30897 - hostPort: 30897 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30898 - hostPort: 30898 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30899 - hostPort: 30899 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30900 - hostPort: 30900 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30901 - hostPort: 30901 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30902 - hostPort: 30902 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30903 - hostPort: 30903 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30904 - hostPort: 30904 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30905 - hostPort: 30905 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30906 - hostPort: 30906 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30907 - hostPort: 30907 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30908 - hostPort: 30908 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30909 - hostPort: 30909 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30910 - hostPort: 30910 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30911 - hostPort: 30911 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30912 - hostPort: 30912 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30913 - hostPort: 30913 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30914 - hostPort: 30914 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30915 - hostPort: 30915 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30916 - hostPort: 30916 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30917 - hostPort: 30917 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30918 - hostPort: 30918 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30919 - hostPort: 30919 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30920 - hostPort: 30920 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30921 - hostPort: 30921 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30922 - hostPort: 30922 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30923 - hostPort: 30923 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30924 - hostPort: 30924 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30925 - hostPort: 30925 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30926 - hostPort: 30926 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30927 - hostPort: 30927 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30928 - hostPort: 30928 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30929 - hostPort: 30929 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30930 - hostPort: 30930 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30931 - hostPort: 30931 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30932 - hostPort: 30932 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30933 - hostPort: 30933 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30934 - hostPort: 30934 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30935 - hostPort: 30935 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30936 - hostPort: 30936 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30937 - hostPort: 30937 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30938 - hostPort: 30938 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30939 - hostPort: 30939 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30940 - hostPort: 30940 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30941 - hostPort: 30941 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30942 - hostPort: 30942 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30943 - hostPort: 30943 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30944 - hostPort: 30944 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30945 - hostPort: 30945 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30946 - hostPort: 30946 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30947 - hostPort: 30947 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30948 - hostPort: 30948 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30949 - hostPort: 30949 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30950 - hostPort: 30950 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30951 - hostPort: 30951 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30952 - hostPort: 30952 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30953 - hostPort: 30953 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30954 - hostPort: 30954 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30955 - hostPort: 30955 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30956 - hostPort: 30956 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30957 - hostPort: 30957 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30958 - hostPort: 30958 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30959 - hostPort: 30959 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30960 - hostPort: 30960 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30961 - hostPort: 30961 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30962 - hostPort: 30962 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30963 - hostPort: 30963 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30964 - hostPort: 30964 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30965 - hostPort: 30965 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30966 - hostPort: 30966 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30967 - hostPort: 30967 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30968 - hostPort: 30968 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30969 - hostPort: 30969 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30970 - hostPort: 30970 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30971 - hostPort: 30971 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30972 - hostPort: 30972 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30973 - hostPort: 30973 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30974 - hostPort: 30974 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30975 - hostPort: 30975 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30976 - hostPort: 30976 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30977 - hostPort: 30977 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30978 - hostPort: 30978 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30979 - hostPort: 30979 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30980 - hostPort: 30980 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30981 - hostPort: 30981 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30982 - hostPort: 30982 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30983 - hostPort: 30983 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30984 - hostPort: 30984 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30985 - hostPort: 30985 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30986 - hostPort: 30986 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30987 - hostPort: 30987 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30988 - hostPort: 30988 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30989 - hostPort: 30989 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30990 - hostPort: 30990 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30991 - hostPort: 30991 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30992 - hostPort: 30992 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30993 - hostPort: 30993 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30994 - hostPort: 30994 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30995 - hostPort: 30995 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30996 - hostPort: 30996 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30997 - hostPort: 30997 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30998 - hostPort: 30998 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 30999 - hostPort: 30999 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31000 - hostPort: 31000 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31001 - hostPort: 31001 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31002 - hostPort: 31002 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31003 - hostPort: 31003 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31004 - hostPort: 31004 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31005 - hostPort: 31005 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31006 - hostPort: 31006 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31007 - hostPort: 31007 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31008 - hostPort: 31008 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31009 - hostPort: 31009 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31010 - hostPort: 31010 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31011 - hostPort: 31011 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31012 - hostPort: 31012 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31013 - hostPort: 31013 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31014 - hostPort: 31014 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31015 - hostPort: 31015 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31016 - hostPort: 31016 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31017 - hostPort: 31017 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31018 - hostPort: 31018 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31019 - hostPort: 31019 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31020 - hostPort: 31020 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31021 - hostPort: 31021 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31022 - hostPort: 31022 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31023 - hostPort: 31023 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31024 - hostPort: 31024 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31025 - hostPort: 31025 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31026 - hostPort: 31026 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31027 - hostPort: 31027 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31028 - hostPort: 31028 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31029 - hostPort: 31029 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31030 - hostPort: 31030 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31031 - hostPort: 31031 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31032 - hostPort: 31032 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31033 - hostPort: 31033 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31034 - hostPort: 31034 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31035 - hostPort: 31035 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31036 - hostPort: 31036 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31037 - hostPort: 31037 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31038 - hostPort: 31038 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31039 - hostPort: 31039 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31040 - hostPort: 31040 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31041 - hostPort: 31041 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31042 - hostPort: 31042 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31043 - hostPort: 31043 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31044 - hostPort: 31044 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31045 - hostPort: 31045 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31046 - hostPort: 31046 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31047 - hostPort: 31047 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31048 - hostPort: 31048 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31049 - hostPort: 31049 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31050 - hostPort: 31050 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31051 - hostPort: 31051 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31052 - hostPort: 31052 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31053 - hostPort: 31053 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31054 - hostPort: 31054 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31055 - hostPort: 31055 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31056 - hostPort: 31056 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31057 - hostPort: 31057 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31058 - hostPort: 31058 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31059 - hostPort: 31059 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31060 - hostPort: 31060 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31061 - hostPort: 31061 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31062 - hostPort: 31062 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31063 - hostPort: 31063 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31064 - hostPort: 31064 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31065 - hostPort: 31065 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31066 - hostPort: 31066 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31067 - hostPort: 31067 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31068 - hostPort: 31068 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31069 - hostPort: 31069 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31070 - hostPort: 31070 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31071 - hostPort: 31071 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31072 - hostPort: 31072 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31073 - hostPort: 31073 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31074 - hostPort: 31074 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31075 - hostPort: 31075 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31076 - hostPort: 31076 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31077 - hostPort: 31077 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31078 - hostPort: 31078 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31079 - hostPort: 31079 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31080 - hostPort: 31080 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31081 - hostPort: 31081 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31082 - hostPort: 31082 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31083 - hostPort: 31083 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31084 - hostPort: 31084 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31085 - hostPort: 31085 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31086 - hostPort: 31086 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31087 - hostPort: 31087 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31088 - hostPort: 31088 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31089 - hostPort: 31089 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31090 - hostPort: 31090 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31091 - hostPort: 31091 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31092 - hostPort: 31092 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31093 - hostPort: 31093 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31094 - hostPort: 31094 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31095 - hostPort: 31095 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31096 - hostPort: 31096 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31097 - hostPort: 31097 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31098 - hostPort: 31098 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31099 - hostPort: 31099 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31100 - hostPort: 31100 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31101 - hostPort: 31101 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31102 - hostPort: 31102 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31103 - hostPort: 31103 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31104 - hostPort: 31104 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31105 - hostPort: 31105 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31106 - hostPort: 31106 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31107 - hostPort: 31107 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31108 - hostPort: 31108 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31109 - hostPort: 31109 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31110 - hostPort: 31110 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31111 - hostPort: 31111 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31112 - hostPort: 31112 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31113 - hostPort: 31113 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31114 - hostPort: 31114 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31115 - hostPort: 31115 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31116 - hostPort: 31116 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31117 - hostPort: 31117 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31118 - hostPort: 31118 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31119 - hostPort: 31119 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31120 - hostPort: 31120 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31121 - hostPort: 31121 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31122 - hostPort: 31122 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31123 - hostPort: 31123 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31124 - hostPort: 31124 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31125 - hostPort: 31125 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31126 - hostPort: 31126 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31127 - hostPort: 31127 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31128 - hostPort: 31128 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31129 - hostPort: 31129 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31130 - hostPort: 31130 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31131 - hostPort: 31131 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31132 - hostPort: 31132 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31133 - hostPort: 31133 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31134 - hostPort: 31134 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31135 - hostPort: 31135 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31136 - hostPort: 31136 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31137 - hostPort: 31137 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31138 - hostPort: 31138 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31139 - hostPort: 31139 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31140 - hostPort: 31140 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31141 - hostPort: 31141 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31142 - hostPort: 31142 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31143 - hostPort: 31143 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31144 - hostPort: 31144 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31145 - hostPort: 31145 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31146 - hostPort: 31146 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31147 - hostPort: 31147 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31148 - hostPort: 31148 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31149 - hostPort: 31149 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31150 - hostPort: 31150 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31151 - hostPort: 31151 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31152 - hostPort: 31152 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31153 - hostPort: 31153 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31154 - hostPort: 31154 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31155 - hostPort: 31155 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31156 - hostPort: 31156 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31157 - hostPort: 31157 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31158 - hostPort: 31158 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31159 - hostPort: 31159 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31160 - hostPort: 31160 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31161 - hostPort: 31161 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31162 - hostPort: 31162 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31163 - hostPort: 31163 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31164 - hostPort: 31164 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31165 - hostPort: 31165 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31166 - hostPort: 31166 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31167 - hostPort: 31167 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31168 - hostPort: 31168 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31169 - hostPort: 31169 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31170 - hostPort: 31170 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31171 - hostPort: 31171 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31172 - hostPort: 31172 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31173 - hostPort: 31173 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31174 - hostPort: 31174 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31175 - hostPort: 31175 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31176 - hostPort: 31176 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31177 - hostPort: 31177 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31178 - hostPort: 31178 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31179 - hostPort: 31179 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31180 - hostPort: 31180 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31181 - hostPort: 31181 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31182 - hostPort: 31182 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31183 - hostPort: 31183 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31184 - hostPort: 31184 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31185 - hostPort: 31185 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31186 - hostPort: 31186 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31187 - hostPort: 31187 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31188 - hostPort: 31188 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31189 - hostPort: 31189 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31190 - hostPort: 31190 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31191 - hostPort: 31191 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31192 - hostPort: 31192 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31193 - hostPort: 31193 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31194 - hostPort: 31194 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31195 - hostPort: 31195 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31196 - hostPort: 31196 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31197 - hostPort: 31197 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31198 - hostPort: 31198 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31199 - hostPort: 31199 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31200 - hostPort: 31200 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31201 - hostPort: 31201 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31202 - hostPort: 31202 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31203 - hostPort: 31203 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31204 - hostPort: 31204 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31205 - hostPort: 31205 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31206 - hostPort: 31206 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31207 - hostPort: 31207 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31208 - hostPort: 31208 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31209 - hostPort: 31209 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31210 - hostPort: 31210 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31211 - hostPort: 31211 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31212 - hostPort: 31212 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31213 - hostPort: 31213 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31214 - hostPort: 31214 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31215 - hostPort: 31215 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31216 - hostPort: 31216 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31217 - hostPort: 31217 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31218 - hostPort: 31218 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31219 - hostPort: 31219 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31220 - hostPort: 31220 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31221 - hostPort: 31221 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31222 - hostPort: 31222 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31223 - hostPort: 31223 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31224 - hostPort: 31224 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31225 - hostPort: 31225 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31226 - hostPort: 31226 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31227 - hostPort: 31227 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31228 - hostPort: 31228 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31229 - hostPort: 31229 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31230 - hostPort: 31230 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31231 - hostPort: 31231 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31232 - hostPort: 31232 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31233 - hostPort: 31233 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31234 - hostPort: 31234 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31235 - hostPort: 31235 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31236 - hostPort: 31236 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31237 - hostPort: 31237 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31238 - hostPort: 31238 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31239 - hostPort: 31239 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31240 - hostPort: 31240 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31241 - hostPort: 31241 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31242 - hostPort: 31242 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31243 - hostPort: 31243 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31244 - hostPort: 31244 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31245 - hostPort: 31245 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31246 - hostPort: 31246 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31247 - hostPort: 31247 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31248 - hostPort: 31248 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31249 - hostPort: 31249 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31250 - hostPort: 31250 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31251 - hostPort: 31251 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31252 - hostPort: 31252 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31253 - hostPort: 31253 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31254 - hostPort: 31254 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31255 - hostPort: 31255 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31256 - hostPort: 31256 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31257 - hostPort: 31257 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31258 - hostPort: 31258 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31259 - hostPort: 31259 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31260 - hostPort: 31260 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31261 - hostPort: 31261 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31262 - hostPort: 31262 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31263 - hostPort: 31263 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31264 - hostPort: 31264 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31265 - hostPort: 31265 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31266 - hostPort: 31266 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31267 - hostPort: 31267 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31268 - hostPort: 31268 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31269 - hostPort: 31269 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31270 - hostPort: 31270 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31271 - hostPort: 31271 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31272 - hostPort: 31272 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31273 - hostPort: 31273 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31274 - hostPort: 31274 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31275 - hostPort: 31275 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31276 - hostPort: 31276 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31277 - hostPort: 31277 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31278 - hostPort: 31278 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31279 - hostPort: 31279 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31280 - hostPort: 31280 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31281 - hostPort: 31281 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31282 - hostPort: 31282 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31283 - hostPort: 31283 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31284 - hostPort: 31284 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31285 - hostPort: 31285 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31286 - hostPort: 31286 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31287 - hostPort: 31287 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31288 - hostPort: 31288 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31289 - hostPort: 31289 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31290 - hostPort: 31290 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31291 - hostPort: 31291 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31292 - hostPort: 31292 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31293 - hostPort: 31293 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31294 - hostPort: 31294 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31295 - hostPort: 31295 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31296 - hostPort: 31296 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31297 - hostPort: 31297 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31298 - hostPort: 31298 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31299 - hostPort: 31299 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31300 - hostPort: 31300 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31301 - hostPort: 31301 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31302 - hostPort: 31302 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31303 - hostPort: 31303 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31304 - hostPort: 31304 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31305 - hostPort: 31305 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31306 - hostPort: 31306 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31307 - hostPort: 31307 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31308 - hostPort: 31308 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31309 - hostPort: 31309 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31310 - hostPort: 31310 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31311 - hostPort: 31311 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31312 - hostPort: 31312 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31313 - hostPort: 31313 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31314 - hostPort: 31314 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31315 - hostPort: 31315 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31316 - hostPort: 31316 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31317 - hostPort: 31317 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31318 - hostPort: 31318 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31319 - hostPort: 31319 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31320 - hostPort: 31320 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31321 - hostPort: 31321 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31322 - hostPort: 31322 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31323 - hostPort: 31323 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31324 - hostPort: 31324 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31325 - hostPort: 31325 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31326 - hostPort: 31326 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31327 - hostPort: 31327 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31328 - hostPort: 31328 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31329 - hostPort: 31329 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31330 - hostPort: 31330 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31331 - hostPort: 31331 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31332 - hostPort: 31332 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31333 - hostPort: 31333 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31334 - hostPort: 31334 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31335 - hostPort: 31335 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31336 - hostPort: 31336 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31337 - hostPort: 31337 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31338 - hostPort: 31338 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31339 - hostPort: 31339 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31340 - hostPort: 31340 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31341 - hostPort: 31341 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31342 - hostPort: 31342 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31343 - hostPort: 31343 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31344 - hostPort: 31344 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31345 - hostPort: 31345 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31346 - hostPort: 31346 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31347 - hostPort: 31347 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31348 - hostPort: 31348 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31349 - hostPort: 31349 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31350 - hostPort: 31350 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31351 - hostPort: 31351 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31352 - hostPort: 31352 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31353 - hostPort: 31353 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31354 - hostPort: 31354 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31355 - hostPort: 31355 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31356 - hostPort: 31356 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31357 - hostPort: 31357 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31358 - hostPort: 31358 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31359 - hostPort: 31359 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31360 - hostPort: 31360 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31361 - hostPort: 31361 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31362 - hostPort: 31362 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31363 - hostPort: 31363 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31364 - hostPort: 31364 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31365 - hostPort: 31365 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31366 - hostPort: 31366 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31367 - hostPort: 31367 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31368 - hostPort: 31368 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31369 - hostPort: 31369 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31370 - hostPort: 31370 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31371 - hostPort: 31371 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31372 - hostPort: 31372 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31373 - hostPort: 31373 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31374 - hostPort: 31374 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31375 - hostPort: 31375 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31376 - hostPort: 31376 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31377 - hostPort: 31377 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31378 - hostPort: 31378 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31379 - hostPort: 31379 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31380 - hostPort: 31380 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31381 - hostPort: 31381 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31382 - hostPort: 31382 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31383 - hostPort: 31383 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31384 - hostPort: 31384 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31385 - hostPort: 31385 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31386 - hostPort: 31386 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31387 - hostPort: 31387 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31388 - hostPort: 31388 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31389 - hostPort: 31389 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31390 - hostPort: 31390 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31391 - hostPort: 31391 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31392 - hostPort: 31392 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31393 - hostPort: 31393 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31394 - hostPort: 31394 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31395 - hostPort: 31395 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31396 - hostPort: 31396 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31397 - hostPort: 31397 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31398 - hostPort: 31398 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31399 - hostPort: 31399 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31400 - hostPort: 31400 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31401 - hostPort: 31401 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31402 - hostPort: 31402 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31403 - hostPort: 31403 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31404 - hostPort: 31404 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31405 - hostPort: 31405 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31406 - hostPort: 31406 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31407 - hostPort: 31407 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31408 - hostPort: 31408 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31409 - hostPort: 31409 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31410 - hostPort: 31410 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31411 - hostPort: 31411 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31412 - hostPort: 31412 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31413 - hostPort: 31413 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31414 - hostPort: 31414 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31415 - hostPort: 31415 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31416 - hostPort: 31416 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31417 - hostPort: 31417 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31418 - hostPort: 31418 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31419 - hostPort: 31419 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31420 - hostPort: 31420 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31421 - hostPort: 31421 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31422 - hostPort: 31422 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31423 - hostPort: 31423 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31424 - hostPort: 31424 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31425 - hostPort: 31425 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31426 - hostPort: 31426 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31427 - hostPort: 31427 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31428 - hostPort: 31428 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31429 - hostPort: 31429 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31430 - hostPort: 31430 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31431 - hostPort: 31431 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31432 - hostPort: 31432 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31433 - hostPort: 31433 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31434 - hostPort: 31434 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31435 - hostPort: 31435 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31436 - hostPort: 31436 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31437 - hostPort: 31437 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31438 - hostPort: 31438 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31439 - hostPort: 31439 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31440 - hostPort: 31440 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31441 - hostPort: 31441 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31442 - hostPort: 31442 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31443 - hostPort: 31443 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31444 - hostPort: 31444 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31445 - hostPort: 31445 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31446 - hostPort: 31446 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31447 - hostPort: 31447 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31448 - hostPort: 31448 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31449 - hostPort: 31449 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31450 - hostPort: 31450 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31451 - hostPort: 31451 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31452 - hostPort: 31452 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31453 - hostPort: 31453 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31454 - hostPort: 31454 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31455 - hostPort: 31455 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31456 - hostPort: 31456 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31457 - hostPort: 31457 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31458 - hostPort: 31458 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31459 - hostPort: 31459 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31460 - hostPort: 31460 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31461 - hostPort: 31461 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31462 - hostPort: 31462 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31463 - hostPort: 31463 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31464 - hostPort: 31464 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31465 - hostPort: 31465 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31466 - hostPort: 31466 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31467 - hostPort: 31467 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31468 - hostPort: 31468 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31469 - hostPort: 31469 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31470 - hostPort: 31470 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31471 - hostPort: 31471 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31472 - hostPort: 31472 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31473 - hostPort: 31473 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31474 - hostPort: 31474 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31475 - hostPort: 31475 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31476 - hostPort: 31476 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31477 - hostPort: 31477 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31478 - hostPort: 31478 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31479 - hostPort: 31479 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31480 - hostPort: 31480 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31481 - hostPort: 31481 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31482 - hostPort: 31482 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31483 - hostPort: 31483 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31484 - hostPort: 31484 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31485 - hostPort: 31485 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31486 - hostPort: 31486 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31487 - hostPort: 31487 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31488 - hostPort: 31488 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31489 - hostPort: 31489 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31490 - hostPort: 31490 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31491 - hostPort: 31491 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31492 - hostPort: 31492 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31493 - hostPort: 31493 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31494 - hostPort: 31494 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31495 - hostPort: 31495 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31496 - hostPort: 31496 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31497 - hostPort: 31497 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31498 - hostPort: 31498 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31499 - hostPort: 31499 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31500 - hostPort: 31500 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31501 - hostPort: 31501 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31502 - hostPort: 31502 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31503 - hostPort: 31503 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31504 - hostPort: 31504 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31505 - hostPort: 31505 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31506 - hostPort: 31506 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31507 - hostPort: 31507 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31508 - hostPort: 31508 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31509 - hostPort: 31509 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31510 - hostPort: 31510 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31511 - hostPort: 31511 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31512 - hostPort: 31512 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31513 - hostPort: 31513 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31514 - hostPort: 31514 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31515 - hostPort: 31515 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31516 - hostPort: 31516 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31517 - hostPort: 31517 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31518 - hostPort: 31518 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31519 - hostPort: 31519 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31520 - hostPort: 31520 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31521 - hostPort: 31521 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31522 - hostPort: 31522 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31523 - hostPort: 31523 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31524 - hostPort: 31524 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31525 - hostPort: 31525 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31526 - hostPort: 31526 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31527 - hostPort: 31527 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31528 - hostPort: 31528 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31529 - hostPort: 31529 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31530 - hostPort: 31530 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31531 - hostPort: 31531 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31532 - hostPort: 31532 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31533 - hostPort: 31533 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31534 - hostPort: 31534 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31535 - hostPort: 31535 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31536 - hostPort: 31536 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31537 - hostPort: 31537 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31538 - hostPort: 31538 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31539 - hostPort: 31539 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31540 - hostPort: 31540 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31541 - hostPort: 31541 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31542 - hostPort: 31542 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31543 - hostPort: 31543 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31544 - hostPort: 31544 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31545 - hostPort: 31545 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31546 - hostPort: 31546 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31547 - hostPort: 31547 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31548 - hostPort: 31548 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31549 - hostPort: 31549 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31550 - hostPort: 31550 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31551 - hostPort: 31551 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31552 - hostPort: 31552 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31553 - hostPort: 31553 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31554 - hostPort: 31554 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31555 - hostPort: 31555 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31556 - hostPort: 31556 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31557 - hostPort: 31557 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31558 - hostPort: 31558 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31559 - hostPort: 31559 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31560 - hostPort: 31560 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31561 - hostPort: 31561 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31562 - hostPort: 31562 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31563 - hostPort: 31563 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31564 - hostPort: 31564 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31565 - hostPort: 31565 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31566 - hostPort: 31566 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31567 - hostPort: 31567 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31568 - hostPort: 31568 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31569 - hostPort: 31569 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31570 - hostPort: 31570 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31571 - hostPort: 31571 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31572 - hostPort: 31572 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31573 - hostPort: 31573 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31574 - hostPort: 31574 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31575 - hostPort: 31575 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31576 - hostPort: 31576 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31577 - hostPort: 31577 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31578 - hostPort: 31578 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31579 - hostPort: 31579 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31580 - hostPort: 31580 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31581 - hostPort: 31581 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31582 - hostPort: 31582 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31583 - hostPort: 31583 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31584 - hostPort: 31584 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31585 - hostPort: 31585 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31586 - hostPort: 31586 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31587 - hostPort: 31587 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31588 - hostPort: 31588 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31589 - hostPort: 31589 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31590 - hostPort: 31590 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31591 - hostPort: 31591 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31592 - hostPort: 31592 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31593 - hostPort: 31593 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31594 - hostPort: 31594 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31595 - hostPort: 31595 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31596 - hostPort: 31596 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31597 - hostPort: 31597 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31598 - hostPort: 31598 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31599 - hostPort: 31599 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31600 - hostPort: 31600 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31601 - hostPort: 31601 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31602 - hostPort: 31602 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31603 - hostPort: 31603 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31604 - hostPort: 31604 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31605 - hostPort: 31605 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31606 - hostPort: 31606 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31607 - hostPort: 31607 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31608 - hostPort: 31608 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31609 - hostPort: 31609 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31610 - hostPort: 31610 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31611 - hostPort: 31611 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31612 - hostPort: 31612 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31613 - hostPort: 31613 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31614 - hostPort: 31614 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31615 - hostPort: 31615 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31616 - hostPort: 31616 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31617 - hostPort: 31617 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31618 - hostPort: 31618 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31619 - hostPort: 31619 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31620 - hostPort: 31620 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31621 - hostPort: 31621 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31622 - hostPort: 31622 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31623 - hostPort: 31623 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31624 - hostPort: 31624 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31625 - hostPort: 31625 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31626 - hostPort: 31626 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31627 - hostPort: 31627 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31628 - hostPort: 31628 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31629 - hostPort: 31629 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31630 - hostPort: 31630 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31631 - hostPort: 31631 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31632 - hostPort: 31632 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31633 - hostPort: 31633 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31634 - hostPort: 31634 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31635 - hostPort: 31635 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31636 - hostPort: 31636 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31637 - hostPort: 31637 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31638 - hostPort: 31638 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31639 - hostPort: 31639 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31640 - hostPort: 31640 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31641 - hostPort: 31641 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31642 - hostPort: 31642 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31643 - hostPort: 31643 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31644 - hostPort: 31644 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31645 - hostPort: 31645 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31646 - hostPort: 31646 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31647 - hostPort: 31647 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31648 - hostPort: 31648 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31649 - hostPort: 31649 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31650 - hostPort: 31650 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31651 - hostPort: 31651 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31652 - hostPort: 31652 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31653 - hostPort: 31653 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31654 - hostPort: 31654 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31655 - hostPort: 31655 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31656 - hostPort: 31656 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31657 - hostPort: 31657 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31658 - hostPort: 31658 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31659 - hostPort: 31659 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31660 - hostPort: 31660 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31661 - hostPort: 31661 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31662 - hostPort: 31662 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31663 - hostPort: 31663 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31664 - hostPort: 31664 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31665 - hostPort: 31665 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31666 - hostPort: 31666 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31667 - hostPort: 31667 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31668 - hostPort: 31668 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31669 - hostPort: 31669 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31670 - hostPort: 31670 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31671 - hostPort: 31671 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31672 - hostPort: 31672 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31673 - hostPort: 31673 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31674 - hostPort: 31674 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31675 - hostPort: 31675 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31676 - hostPort: 31676 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31677 - hostPort: 31677 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31678 - hostPort: 31678 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31679 - hostPort: 31679 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31680 - hostPort: 31680 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31681 - hostPort: 31681 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31682 - hostPort: 31682 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31683 - hostPort: 31683 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31684 - hostPort: 31684 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31685 - hostPort: 31685 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31686 - hostPort: 31686 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31687 - hostPort: 31687 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31688 - hostPort: 31688 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31689 - hostPort: 31689 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31690 - hostPort: 31690 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31691 - hostPort: 31691 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31692 - hostPort: 31692 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31693 - hostPort: 31693 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31694 - hostPort: 31694 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31695 - hostPort: 31695 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31696 - hostPort: 31696 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31697 - hostPort: 31697 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31698 - hostPort: 31698 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31699 - hostPort: 31699 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31700 - hostPort: 31700 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31701 - hostPort: 31701 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31702 - hostPort: 31702 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31703 - hostPort: 31703 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31704 - hostPort: 31704 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31705 - hostPort: 31705 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31706 - hostPort: 31706 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31707 - hostPort: 31707 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31708 - hostPort: 31708 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31709 - hostPort: 31709 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31710 - hostPort: 31710 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31711 - hostPort: 31711 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31712 - hostPort: 31712 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31713 - hostPort: 31713 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31714 - hostPort: 31714 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31715 - hostPort: 31715 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31716 - hostPort: 31716 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31717 - hostPort: 31717 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31718 - hostPort: 31718 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31719 - hostPort: 31719 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31720 - hostPort: 31720 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31721 - hostPort: 31721 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31722 - hostPort: 31722 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31723 - hostPort: 31723 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31724 - hostPort: 31724 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31725 - hostPort: 31725 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31726 - hostPort: 31726 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31727 - hostPort: 31727 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31728 - hostPort: 31728 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31729 - hostPort: 31729 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31730 - hostPort: 31730 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31731 - hostPort: 31731 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31732 - hostPort: 31732 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31733 - hostPort: 31733 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31734 - hostPort: 31734 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31735 - hostPort: 31735 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31736 - hostPort: 31736 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31737 - hostPort: 31737 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31738 - hostPort: 31738 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31739 - hostPort: 31739 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31740 - hostPort: 31740 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31741 - hostPort: 31741 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31742 - hostPort: 31742 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31743 - hostPort: 31743 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31744 - hostPort: 31744 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31745 - hostPort: 31745 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31746 - hostPort: 31746 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31747 - hostPort: 31747 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31748 - hostPort: 31748 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31749 - hostPort: 31749 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31750 - hostPort: 31750 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31751 - hostPort: 31751 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31752 - hostPort: 31752 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31753 - hostPort: 31753 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31754 - hostPort: 31754 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31755 - hostPort: 31755 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31756 - hostPort: 31756 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31757 - hostPort: 31757 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31758 - hostPort: 31758 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31759 - hostPort: 31759 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31760 - hostPort: 31760 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31761 - hostPort: 31761 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31762 - hostPort: 31762 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31763 - hostPort: 31763 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31764 - hostPort: 31764 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31765 - hostPort: 31765 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31766 - hostPort: 31766 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31767 - hostPort: 31767 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31768 - hostPort: 31768 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31769 - hostPort: 31769 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31770 - hostPort: 31770 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31771 - hostPort: 31771 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31772 - hostPort: 31772 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31773 - hostPort: 31773 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31774 - hostPort: 31774 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31775 - hostPort: 31775 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31776 - hostPort: 31776 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31777 - hostPort: 31777 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31778 - hostPort: 31778 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31779 - hostPort: 31779 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31780 - hostPort: 31780 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31781 - hostPort: 31781 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31782 - hostPort: 31782 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31783 - hostPort: 31783 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31784 - hostPort: 31784 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31785 - hostPort: 31785 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31786 - hostPort: 31786 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31787 - hostPort: 31787 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31788 - hostPort: 31788 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31789 - hostPort: 31789 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31790 - hostPort: 31790 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31791 - hostPort: 31791 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31792 - hostPort: 31792 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31793 - hostPort: 31793 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31794 - hostPort: 31794 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31795 - hostPort: 31795 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31796 - hostPort: 31796 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31797 - hostPort: 31797 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31798 - hostPort: 31798 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31799 - hostPort: 31799 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31800 - hostPort: 31800 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31801 - hostPort: 31801 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31802 - hostPort: 31802 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31803 - hostPort: 31803 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31804 - hostPort: 31804 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31805 - hostPort: 31805 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31806 - hostPort: 31806 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31807 - hostPort: 31807 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31808 - hostPort: 31808 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31809 - hostPort: 31809 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31810 - hostPort: 31810 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31811 - hostPort: 31811 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31812 - hostPort: 31812 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31813 - hostPort: 31813 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31814 - hostPort: 31814 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31815 - hostPort: 31815 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31816 - hostPort: 31816 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31817 - hostPort: 31817 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31818 - hostPort: 31818 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31819 - hostPort: 31819 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31820 - hostPort: 31820 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31821 - hostPort: 31821 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31822 - hostPort: 31822 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31823 - hostPort: 31823 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31824 - hostPort: 31824 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31825 - hostPort: 31825 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31826 - hostPort: 31826 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31827 - hostPort: 31827 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31828 - hostPort: 31828 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31829 - hostPort: 31829 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31830 - hostPort: 31830 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31831 - hostPort: 31831 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31832 - hostPort: 31832 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31833 - hostPort: 31833 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31834 - hostPort: 31834 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31835 - hostPort: 31835 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31836 - hostPort: 31836 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31837 - hostPort: 31837 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31838 - hostPort: 31838 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31839 - hostPort: 31839 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31840 - hostPort: 31840 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31841 - hostPort: 31841 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31842 - hostPort: 31842 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31843 - hostPort: 31843 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31844 - hostPort: 31844 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31845 - hostPort: 31845 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31846 - hostPort: 31846 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31847 - hostPort: 31847 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31848 - hostPort: 31848 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31849 - hostPort: 31849 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31850 - hostPort: 31850 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31851 - hostPort: 31851 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31852 - hostPort: 31852 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31853 - hostPort: 31853 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31854 - hostPort: 31854 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31855 - hostPort: 31855 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31856 - hostPort: 31856 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31857 - hostPort: 31857 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31858 - hostPort: 31858 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31859 - hostPort: 31859 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31860 - hostPort: 31860 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31861 - hostPort: 31861 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31862 - hostPort: 31862 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31863 - hostPort: 31863 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31864 - hostPort: 31864 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31865 - hostPort: 31865 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31866 - hostPort: 31866 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31867 - hostPort: 31867 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31868 - hostPort: 31868 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31869 - hostPort: 31869 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31870 - hostPort: 31870 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31871 - hostPort: 31871 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31872 - hostPort: 31872 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31873 - hostPort: 31873 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31874 - hostPort: 31874 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31875 - hostPort: 31875 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31876 - hostPort: 31876 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31877 - hostPort: 31877 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31878 - hostPort: 31878 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31879 - hostPort: 31879 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31880 - hostPort: 31880 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31881 - hostPort: 31881 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31882 - hostPort: 31882 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31883 - hostPort: 31883 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31884 - hostPort: 31884 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31885 - hostPort: 31885 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31886 - hostPort: 31886 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31887 - hostPort: 31887 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31888 - hostPort: 31888 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31889 - hostPort: 31889 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31890 - hostPort: 31890 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31891 - hostPort: 31891 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31892 - hostPort: 31892 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31893 - hostPort: 31893 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31894 - hostPort: 31894 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31895 - hostPort: 31895 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31896 - hostPort: 31896 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31897 - hostPort: 31897 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31898 - hostPort: 31898 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31899 - hostPort: 31899 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31900 - hostPort: 31900 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31901 - hostPort: 31901 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31902 - hostPort: 31902 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31903 - hostPort: 31903 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31904 - hostPort: 31904 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31905 - hostPort: 31905 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31906 - hostPort: 31906 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31907 - hostPort: 31907 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31908 - hostPort: 31908 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31909 - hostPort: 31909 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31910 - hostPort: 31910 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31911 - hostPort: 31911 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31912 - hostPort: 31912 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31913 - hostPort: 31913 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31914 - hostPort: 31914 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31915 - hostPort: 31915 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31916 - hostPort: 31916 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31917 - hostPort: 31917 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31918 - hostPort: 31918 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31919 - hostPort: 31919 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31920 - hostPort: 31920 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31921 - hostPort: 31921 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31922 - hostPort: 31922 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31923 - hostPort: 31923 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31924 - hostPort: 31924 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31925 - hostPort: 31925 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31926 - hostPort: 31926 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31927 - hostPort: 31927 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31928 - hostPort: 31928 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31929 - hostPort: 31929 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31930 - hostPort: 31930 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31931 - hostPort: 31931 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31932 - hostPort: 31932 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31933 - hostPort: 31933 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31934 - hostPort: 31934 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31935 - hostPort: 31935 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31936 - hostPort: 31936 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31937 - hostPort: 31937 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31938 - hostPort: 31938 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31939 - hostPort: 31939 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31940 - hostPort: 31940 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31941 - hostPort: 31941 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31942 - hostPort: 31942 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31943 - hostPort: 31943 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31944 - hostPort: 31944 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31945 - hostPort: 31945 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31946 - hostPort: 31946 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31947 - hostPort: 31947 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31948 - hostPort: 31948 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31949 - hostPort: 31949 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31950 - hostPort: 31950 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31951 - hostPort: 31951 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31952 - hostPort: 31952 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31953 - hostPort: 31953 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31954 - hostPort: 31954 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31955 - hostPort: 31955 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31956 - hostPort: 31956 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31957 - hostPort: 31957 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31958 - hostPort: 31958 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31959 - hostPort: 31959 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31960 - hostPort: 31960 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31961 - hostPort: 31961 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31962 - hostPort: 31962 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31963 - hostPort: 31963 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31964 - hostPort: 31964 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31965 - hostPort: 31965 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31966 - hostPort: 31966 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31967 - hostPort: 31967 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31968 - hostPort: 31968 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31969 - hostPort: 31969 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31970 - hostPort: 31970 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31971 - hostPort: 31971 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31972 - hostPort: 31972 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31973 - hostPort: 31973 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31974 - hostPort: 31974 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31975 - hostPort: 31975 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31976 - hostPort: 31976 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31977 - hostPort: 31977 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31978 - hostPort: 31978 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31979 - hostPort: 31979 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31980 - hostPort: 31980 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31981 - hostPort: 31981 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31982 - hostPort: 31982 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31983 - hostPort: 31983 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31984 - hostPort: 31984 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31985 - hostPort: 31985 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31986 - hostPort: 31986 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31987 - hostPort: 31987 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31988 - hostPort: 31988 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31989 - hostPort: 31989 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31990 - hostPort: 31990 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31991 - hostPort: 31991 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31992 - hostPort: 31992 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31993 - hostPort: 31993 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31994 - hostPort: 31994 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31995 - hostPort: 31995 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31996 - hostPort: 31996 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31997 - hostPort: 31997 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31998 - hostPort: 31998 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 31999 - hostPort: 31999 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32000 - hostPort: 32000 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32001 - hostPort: 32001 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32002 - hostPort: 32002 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32003 - hostPort: 32003 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32004 - hostPort: 32004 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32005 - hostPort: 32005 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32006 - hostPort: 32006 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32007 - hostPort: 32007 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32008 - hostPort: 32008 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32009 - hostPort: 32009 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32010 - hostPort: 32010 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32011 - hostPort: 32011 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32012 - hostPort: 32012 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32013 - hostPort: 32013 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32014 - hostPort: 32014 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32015 - hostPort: 32015 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32016 - hostPort: 32016 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32017 - hostPort: 32017 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32018 - hostPort: 32018 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32019 - hostPort: 32019 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32020 - hostPort: 32020 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32021 - hostPort: 32021 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32022 - hostPort: 32022 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32023 - hostPort: 32023 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32024 - hostPort: 32024 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32025 - hostPort: 32025 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32026 - hostPort: 32026 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32027 - hostPort: 32027 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32028 - hostPort: 32028 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32029 - hostPort: 32029 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32030 - hostPort: 32030 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32031 - hostPort: 32031 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32032 - hostPort: 32032 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32033 - hostPort: 32033 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32034 - hostPort: 32034 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32035 - hostPort: 32035 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32036 - hostPort: 32036 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32037 - hostPort: 32037 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32038 - hostPort: 32038 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32039 - hostPort: 32039 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32040 - hostPort: 32040 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32041 - hostPort: 32041 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32042 - hostPort: 32042 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32043 - hostPort: 32043 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32044 - hostPort: 32044 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32045 - hostPort: 32045 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32046 - hostPort: 32046 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32047 - hostPort: 32047 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32048 - hostPort: 32048 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32049 - hostPort: 32049 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32050 - hostPort: 32050 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32051 - hostPort: 32051 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32052 - hostPort: 32052 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32053 - hostPort: 32053 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32054 - hostPort: 32054 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32055 - hostPort: 32055 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32056 - hostPort: 32056 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32057 - hostPort: 32057 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32058 - hostPort: 32058 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32059 - hostPort: 32059 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32060 - hostPort: 32060 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32061 - hostPort: 32061 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32062 - hostPort: 32062 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32063 - hostPort: 32063 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32064 - hostPort: 32064 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32065 - hostPort: 32065 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32066 - hostPort: 32066 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32067 - hostPort: 32067 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32068 - hostPort: 32068 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32069 - hostPort: 32069 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32070 - hostPort: 32070 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32071 - hostPort: 32071 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32072 - hostPort: 32072 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32073 - hostPort: 32073 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32074 - hostPort: 32074 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32075 - hostPort: 32075 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32076 - hostPort: 32076 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32077 - hostPort: 32077 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32078 - hostPort: 32078 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32079 - hostPort: 32079 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32080 - hostPort: 32080 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32081 - hostPort: 32081 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32082 - hostPort: 32082 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32083 - hostPort: 32083 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32084 - hostPort: 32084 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32085 - hostPort: 32085 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32086 - hostPort: 32086 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32087 - hostPort: 32087 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32088 - hostPort: 32088 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32089 - hostPort: 32089 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32090 - hostPort: 32090 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32091 - hostPort: 32091 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32092 - hostPort: 32092 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32093 - hostPort: 32093 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32094 - hostPort: 32094 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32095 - hostPort: 32095 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32096 - hostPort: 32096 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32097 - hostPort: 32097 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32098 - hostPort: 32098 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32099 - hostPort: 32099 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32100 - hostPort: 32100 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32101 - hostPort: 32101 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32102 - hostPort: 32102 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32103 - hostPort: 32103 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32104 - hostPort: 32104 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32105 - hostPort: 32105 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32106 - hostPort: 32106 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32107 - hostPort: 32107 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32108 - hostPort: 32108 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32109 - hostPort: 32109 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32110 - hostPort: 32110 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32111 - hostPort: 32111 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32112 - hostPort: 32112 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32113 - hostPort: 32113 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32114 - hostPort: 32114 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32115 - hostPort: 32115 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32116 - hostPort: 32116 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32117 - hostPort: 32117 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32118 - hostPort: 32118 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32119 - hostPort: 32119 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32120 - hostPort: 32120 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32121 - hostPort: 32121 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32122 - hostPort: 32122 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32123 - hostPort: 32123 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32124 - hostPort: 32124 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32125 - hostPort: 32125 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32126 - hostPort: 32126 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32127 - hostPort: 32127 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32128 - hostPort: 32128 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32129 - hostPort: 32129 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32130 - hostPort: 32130 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32131 - hostPort: 32131 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32132 - hostPort: 32132 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32133 - hostPort: 32133 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32134 - hostPort: 32134 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32135 - hostPort: 32135 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32136 - hostPort: 32136 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32137 - hostPort: 32137 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32138 - hostPort: 32138 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32139 - hostPort: 32139 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32140 - hostPort: 32140 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32141 - hostPort: 32141 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32142 - hostPort: 32142 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32143 - hostPort: 32143 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32144 - hostPort: 32144 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32145 - hostPort: 32145 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32146 - hostPort: 32146 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32147 - hostPort: 32147 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32148 - hostPort: 32148 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32149 - hostPort: 32149 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32150 - hostPort: 32150 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32151 - hostPort: 32151 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32152 - hostPort: 32152 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32153 - hostPort: 32153 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32154 - hostPort: 32154 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32155 - hostPort: 32155 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32156 - hostPort: 32156 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32157 - hostPort: 32157 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32158 - hostPort: 32158 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32159 - hostPort: 32159 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32160 - hostPort: 32160 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32161 - hostPort: 32161 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32162 - hostPort: 32162 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32163 - hostPort: 32163 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32164 - hostPort: 32164 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32165 - hostPort: 32165 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32166 - hostPort: 32166 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32167 - hostPort: 32167 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32168 - hostPort: 32168 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32169 - hostPort: 32169 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32170 - hostPort: 32170 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32171 - hostPort: 32171 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32172 - hostPort: 32172 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32173 - hostPort: 32173 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32174 - hostPort: 32174 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32175 - hostPort: 32175 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32176 - hostPort: 32176 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32177 - hostPort: 32177 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32178 - hostPort: 32178 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32179 - hostPort: 32179 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32180 - hostPort: 32180 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32181 - hostPort: 32181 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32182 - hostPort: 32182 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32183 - hostPort: 32183 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32184 - hostPort: 32184 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32185 - hostPort: 32185 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32186 - hostPort: 32186 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32187 - hostPort: 32187 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32188 - hostPort: 32188 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32189 - hostPort: 32189 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32190 - hostPort: 32190 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32191 - hostPort: 32191 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32192 - hostPort: 32192 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32193 - hostPort: 32193 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32194 - hostPort: 32194 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32195 - hostPort: 32195 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32196 - hostPort: 32196 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32197 - hostPort: 32197 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32198 - hostPort: 32198 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32199 - hostPort: 32199 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32200 - hostPort: 32200 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32201 - hostPort: 32201 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32202 - hostPort: 32202 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32203 - hostPort: 32203 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32204 - hostPort: 32204 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32205 - hostPort: 32205 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32206 - hostPort: 32206 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32207 - hostPort: 32207 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32208 - hostPort: 32208 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32209 - hostPort: 32209 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32210 - hostPort: 32210 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32211 - hostPort: 32211 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32212 - hostPort: 32212 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32213 - hostPort: 32213 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32214 - hostPort: 32214 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32215 - hostPort: 32215 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32216 - hostPort: 32216 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32217 - hostPort: 32217 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32218 - hostPort: 32218 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32219 - hostPort: 32219 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32220 - hostPort: 32220 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32221 - hostPort: 32221 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32222 - hostPort: 32222 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32223 - hostPort: 32223 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32224 - hostPort: 32224 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32225 - hostPort: 32225 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32226 - hostPort: 32226 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32227 - hostPort: 32227 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32228 - hostPort: 32228 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32229 - hostPort: 32229 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32230 - hostPort: 32230 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32231 - hostPort: 32231 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32232 - hostPort: 32232 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32233 - hostPort: 32233 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32234 - hostPort: 32234 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32235 - hostPort: 32235 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32236 - hostPort: 32236 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32237 - hostPort: 32237 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32238 - hostPort: 32238 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32239 - hostPort: 32239 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32240 - hostPort: 32240 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32241 - hostPort: 32241 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32242 - hostPort: 32242 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32243 - hostPort: 32243 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32244 - hostPort: 32244 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32245 - hostPort: 32245 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32246 - hostPort: 32246 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32247 - hostPort: 32247 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32248 - hostPort: 32248 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32249 - hostPort: 32249 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32250 - hostPort: 32250 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32251 - hostPort: 32251 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32252 - hostPort: 32252 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32253 - hostPort: 32253 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32254 - hostPort: 32254 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32255 - hostPort: 32255 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32256 - hostPort: 32256 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32257 - hostPort: 32257 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32258 - hostPort: 32258 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32259 - hostPort: 32259 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32260 - hostPort: 32260 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32261 - hostPort: 32261 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32262 - hostPort: 32262 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32263 - hostPort: 32263 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32264 - hostPort: 32264 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32265 - hostPort: 32265 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32266 - hostPort: 32266 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32267 - hostPort: 32267 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32268 - hostPort: 32268 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32269 - hostPort: 32269 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32270 - hostPort: 32270 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32271 - hostPort: 32271 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32272 - hostPort: 32272 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32273 - hostPort: 32273 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32274 - hostPort: 32274 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32275 - hostPort: 32275 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32276 - hostPort: 32276 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32277 - hostPort: 32277 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32278 - hostPort: 32278 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32279 - hostPort: 32279 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32280 - hostPort: 32280 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32281 - hostPort: 32281 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32282 - hostPort: 32282 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32283 - hostPort: 32283 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32284 - hostPort: 32284 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32285 - hostPort: 32285 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32286 - hostPort: 32286 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32287 - hostPort: 32287 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32288 - hostPort: 32288 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32289 - hostPort: 32289 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32290 - hostPort: 32290 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32291 - hostPort: 32291 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32292 - hostPort: 32292 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32293 - hostPort: 32293 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32294 - hostPort: 32294 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32295 - hostPort: 32295 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32296 - hostPort: 32296 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32297 - hostPort: 32297 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32298 - hostPort: 32298 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32299 - hostPort: 32299 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32300 - hostPort: 32300 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32301 - hostPort: 32301 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32302 - hostPort: 32302 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32303 - hostPort: 32303 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32304 - hostPort: 32304 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32305 - hostPort: 32305 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32306 - hostPort: 32306 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32307 - hostPort: 32307 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32308 - hostPort: 32308 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32309 - hostPort: 32309 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32310 - hostPort: 32310 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32311 - hostPort: 32311 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32312 - hostPort: 32312 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32313 - hostPort: 32313 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32314 - hostPort: 32314 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32315 - hostPort: 32315 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32316 - hostPort: 32316 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32317 - hostPort: 32317 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32318 - hostPort: 32318 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32319 - hostPort: 32319 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32320 - hostPort: 32320 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32321 - hostPort: 32321 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32322 - hostPort: 32322 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32323 - hostPort: 32323 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32324 - hostPort: 32324 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32325 - hostPort: 32325 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32326 - hostPort: 32326 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32327 - hostPort: 32327 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32328 - hostPort: 32328 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32329 - hostPort: 32329 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32330 - hostPort: 32330 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32331 - hostPort: 32331 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32332 - hostPort: 32332 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32333 - hostPort: 32333 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32334 - hostPort: 32334 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32335 - hostPort: 32335 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32336 - hostPort: 32336 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32337 - hostPort: 32337 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32338 - hostPort: 32338 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32339 - hostPort: 32339 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32340 - hostPort: 32340 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32341 - hostPort: 32341 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32342 - hostPort: 32342 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32343 - hostPort: 32343 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32344 - hostPort: 32344 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32345 - hostPort: 32345 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32346 - hostPort: 32346 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32347 - hostPort: 32347 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32348 - hostPort: 32348 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32349 - hostPort: 32349 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32350 - hostPort: 32350 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32351 - hostPort: 32351 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32352 - hostPort: 32352 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32353 - hostPort: 32353 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32354 - hostPort: 32354 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32355 - hostPort: 32355 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32356 - hostPort: 32356 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32357 - hostPort: 32357 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32358 - hostPort: 32358 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32359 - hostPort: 32359 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32360 - hostPort: 32360 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32361 - hostPort: 32361 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32362 - hostPort: 32362 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32363 - hostPort: 32363 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32364 - hostPort: 32364 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32365 - hostPort: 32365 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32366 - hostPort: 32366 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32367 - hostPort: 32367 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32368 - hostPort: 32368 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32369 - hostPort: 32369 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32370 - hostPort: 32370 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32371 - hostPort: 32371 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32372 - hostPort: 32372 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32373 - hostPort: 32373 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32374 - hostPort: 32374 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32375 - hostPort: 32375 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32376 - hostPort: 32376 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32377 - hostPort: 32377 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32378 - hostPort: 32378 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32379 - hostPort: 32379 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32380 - hostPort: 32380 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32381 - hostPort: 32381 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32382 - hostPort: 32382 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32383 - hostPort: 32383 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32384 - hostPort: 32384 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32385 - hostPort: 32385 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32386 - hostPort: 32386 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32387 - hostPort: 32387 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32388 - hostPort: 32388 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32389 - hostPort: 32389 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32390 - hostPort: 32390 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32391 - hostPort: 32391 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32392 - hostPort: 32392 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32393 - hostPort: 32393 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32394 - hostPort: 32394 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32395 - hostPort: 32395 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32396 - hostPort: 32396 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32397 - hostPort: 32397 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32398 - hostPort: 32398 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32399 - hostPort: 32399 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32400 - hostPort: 32400 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32401 - hostPort: 32401 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32402 - hostPort: 32402 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32403 - hostPort: 32403 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32404 - hostPort: 32404 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32405 - hostPort: 32405 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32406 - hostPort: 32406 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32407 - hostPort: 32407 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32408 - hostPort: 32408 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32409 - hostPort: 32409 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32410 - hostPort: 32410 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32411 - hostPort: 32411 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32412 - hostPort: 32412 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32413 - hostPort: 32413 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32414 - hostPort: 32414 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32415 - hostPort: 32415 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32416 - hostPort: 32416 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32417 - hostPort: 32417 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32418 - hostPort: 32418 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32419 - hostPort: 32419 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32420 - hostPort: 32420 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32421 - hostPort: 32421 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32422 - hostPort: 32422 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32423 - hostPort: 32423 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32424 - hostPort: 32424 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32425 - hostPort: 32425 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32426 - hostPort: 32426 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32427 - hostPort: 32427 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32428 - hostPort: 32428 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32429 - hostPort: 32429 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32430 - hostPort: 32430 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32431 - hostPort: 32431 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32432 - hostPort: 32432 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32433 - hostPort: 32433 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32434 - hostPort: 32434 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32435 - hostPort: 32435 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32436 - hostPort: 32436 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32437 - hostPort: 32437 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32438 - hostPort: 32438 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32439 - hostPort: 32439 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32440 - hostPort: 32440 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32441 - hostPort: 32441 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32442 - hostPort: 32442 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32443 - hostPort: 32443 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32444 - hostPort: 32444 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32445 - hostPort: 32445 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32446 - hostPort: 32446 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32447 - hostPort: 32447 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32448 - hostPort: 32448 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32449 - hostPort: 32449 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32450 - hostPort: 32450 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32451 - hostPort: 32451 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32452 - hostPort: 32452 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32453 - hostPort: 32453 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32454 - hostPort: 32454 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32455 - hostPort: 32455 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32456 - hostPort: 32456 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32457 - hostPort: 32457 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32458 - hostPort: 32458 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32459 - hostPort: 32459 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32460 - hostPort: 32460 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32461 - hostPort: 32461 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32462 - hostPort: 32462 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32463 - hostPort: 32463 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32464 - hostPort: 32464 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32465 - hostPort: 32465 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32466 - hostPort: 32466 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32467 - hostPort: 32467 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32468 - hostPort: 32468 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32469 - hostPort: 32469 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32470 - hostPort: 32470 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32471 - hostPort: 32471 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32472 - hostPort: 32472 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32473 - hostPort: 32473 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32474 - hostPort: 32474 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32475 - hostPort: 32475 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32476 - hostPort: 32476 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32477 - hostPort: 32477 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32478 - hostPort: 32478 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32479 - hostPort: 32479 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32480 - hostPort: 32480 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32481 - hostPort: 32481 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32482 - hostPort: 32482 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32483 - hostPort: 32483 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32484 - hostPort: 32484 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32485 - hostPort: 32485 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32486 - hostPort: 32486 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32487 - hostPort: 32487 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32488 - hostPort: 32488 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32489 - hostPort: 32489 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32490 - hostPort: 32490 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32491 - hostPort: 32491 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32492 - hostPort: 32492 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32493 - hostPort: 32493 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32494 - hostPort: 32494 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32495 - hostPort: 32495 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32496 - hostPort: 32496 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32497 - hostPort: 32497 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32498 - hostPort: 32498 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32499 - hostPort: 32499 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32500 - hostPort: 32500 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32501 - hostPort: 32501 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32502 - hostPort: 32502 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32503 - hostPort: 32503 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32504 - hostPort: 32504 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32505 - hostPort: 32505 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32506 - hostPort: 32506 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32507 - hostPort: 32507 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32508 - hostPort: 32508 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32509 - hostPort: 32509 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32510 - hostPort: 32510 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32511 - hostPort: 32511 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32512 - hostPort: 32512 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32513 - hostPort: 32513 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32514 - hostPort: 32514 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32515 - hostPort: 32515 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32516 - hostPort: 32516 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32517 - hostPort: 32517 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32518 - hostPort: 32518 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32519 - hostPort: 32519 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32520 - hostPort: 32520 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32521 - hostPort: 32521 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32522 - hostPort: 32522 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32523 - hostPort: 32523 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32524 - hostPort: 32524 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32525 - hostPort: 32525 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32526 - hostPort: 32526 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32527 - hostPort: 32527 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32528 - hostPort: 32528 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32529 - hostPort: 32529 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32530 - hostPort: 32530 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32531 - hostPort: 32531 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32532 - hostPort: 32532 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32533 - hostPort: 32533 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32534 - hostPort: 32534 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32535 - hostPort: 32535 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32536 - hostPort: 32536 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32537 - hostPort: 32537 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32538 - hostPort: 32538 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32539 - hostPort: 32539 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32540 - hostPort: 32540 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32541 - hostPort: 32541 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32542 - hostPort: 32542 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32543 - hostPort: 32543 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32544 - hostPort: 32544 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32545 - hostPort: 32545 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32546 - hostPort: 32546 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32547 - hostPort: 32547 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32548 - hostPort: 32548 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32549 - hostPort: 32549 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32550 - hostPort: 32550 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32551 - hostPort: 32551 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32552 - hostPort: 32552 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32553 - hostPort: 32553 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32554 - hostPort: 32554 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32555 - hostPort: 32555 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32556 - hostPort: 32556 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32557 - hostPort: 32557 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32558 - hostPort: 32558 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32559 - hostPort: 32559 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32560 - hostPort: 32560 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32561 - hostPort: 32561 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32562 - hostPort: 32562 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32563 - hostPort: 32563 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32564 - hostPort: 32564 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32565 - hostPort: 32565 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32566 - hostPort: 32566 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32567 - hostPort: 32567 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32568 - hostPort: 32568 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32569 - hostPort: 32569 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32570 - hostPort: 32570 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32571 - hostPort: 32571 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32572 - hostPort: 32572 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32573 - hostPort: 32573 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32574 - hostPort: 32574 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32575 - hostPort: 32575 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32576 - hostPort: 32576 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32577 - hostPort: 32577 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32578 - hostPort: 32578 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32579 - hostPort: 32579 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32580 - hostPort: 32580 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32581 - hostPort: 32581 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32582 - hostPort: 32582 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32583 - hostPort: 32583 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32584 - hostPort: 32584 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32585 - hostPort: 32585 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32586 - hostPort: 32586 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32587 - hostPort: 32587 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32588 - hostPort: 32588 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32589 - hostPort: 32589 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32590 - hostPort: 32590 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32591 - hostPort: 32591 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32592 - hostPort: 32592 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32593 - hostPort: 32593 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32594 - hostPort: 32594 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32595 - hostPort: 32595 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32596 - hostPort: 32596 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32597 - hostPort: 32597 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32598 - hostPort: 32598 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32599 - hostPort: 32599 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32600 - hostPort: 32600 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32601 - hostPort: 32601 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32602 - hostPort: 32602 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32603 - hostPort: 32603 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32604 - hostPort: 32604 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32605 - hostPort: 32605 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32606 - hostPort: 32606 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32607 - hostPort: 32607 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32608 - hostPort: 32608 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32609 - hostPort: 32609 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32610 - hostPort: 32610 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32611 - hostPort: 32611 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32612 - hostPort: 32612 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32613 - hostPort: 32613 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32614 - hostPort: 32614 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32615 - hostPort: 32615 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32616 - hostPort: 32616 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32617 - hostPort: 32617 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32618 - hostPort: 32618 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32619 - hostPort: 32619 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32620 - hostPort: 32620 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32621 - hostPort: 32621 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32622 - hostPort: 32622 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32623 - hostPort: 32623 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32624 - hostPort: 32624 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32625 - hostPort: 32625 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32626 - hostPort: 32626 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32627 - hostPort: 32627 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32628 - hostPort: 32628 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32629 - hostPort: 32629 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32630 - hostPort: 32630 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32631 - hostPort: 32631 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32632 - hostPort: 32632 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32633 - hostPort: 32633 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32634 - hostPort: 32634 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32635 - hostPort: 32635 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32636 - hostPort: 32636 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32637 - hostPort: 32637 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32638 - hostPort: 32638 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32639 - hostPort: 32639 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32640 - hostPort: 32640 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32641 - hostPort: 32641 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32642 - hostPort: 32642 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32643 - hostPort: 32643 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32644 - hostPort: 32644 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32645 - hostPort: 32645 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32646 - hostPort: 32646 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32647 - hostPort: 32647 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32648 - hostPort: 32648 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32649 - hostPort: 32649 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32650 - hostPort: 32650 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32651 - hostPort: 32651 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32652 - hostPort: 32652 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32653 - hostPort: 32653 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32654 - hostPort: 32654 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32655 - hostPort: 32655 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32656 - hostPort: 32656 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32657 - hostPort: 32657 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32658 - hostPort: 32658 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32659 - hostPort: 32659 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32660 - hostPort: 32660 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32661 - hostPort: 32661 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32662 - hostPort: 32662 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32663 - hostPort: 32663 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32664 - hostPort: 32664 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32665 - hostPort: 32665 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32666 - hostPort: 32666 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32667 - hostPort: 32667 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32668 - hostPort: 32668 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32669 - hostPort: 32669 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32670 - hostPort: 32670 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32671 - hostPort: 32671 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32672 - hostPort: 32672 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32673 - hostPort: 32673 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32674 - hostPort: 32674 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32675 - hostPort: 32675 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32676 - hostPort: 32676 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32677 - hostPort: 32677 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32678 - hostPort: 32678 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32679 - hostPort: 32679 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32680 - hostPort: 32680 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32681 - hostPort: 32681 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32682 - hostPort: 32682 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32683 - hostPort: 32683 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32684 - hostPort: 32684 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32685 - hostPort: 32685 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32686 - hostPort: 32686 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32687 - hostPort: 32687 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32688 - hostPort: 32688 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32689 - hostPort: 32689 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32690 - hostPort: 32690 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32691 - hostPort: 32691 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32692 - hostPort: 32692 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32693 - hostPort: 32693 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32694 - hostPort: 32694 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32695 - hostPort: 32695 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32696 - hostPort: 32696 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32697 - hostPort: 32697 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32698 - hostPort: 32698 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32699 - hostPort: 32699 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32700 - hostPort: 32700 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32701 - hostPort: 32701 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32702 - hostPort: 32702 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32703 - hostPort: 32703 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32704 - hostPort: 32704 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32705 - hostPort: 32705 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32706 - hostPort: 32706 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32707 - hostPort: 32707 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32708 - hostPort: 32708 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32709 - hostPort: 32709 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32710 - hostPort: 32710 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32711 - hostPort: 32711 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32712 - hostPort: 32712 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32713 - hostPort: 32713 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32714 - hostPort: 32714 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32715 - hostPort: 32715 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32716 - hostPort: 32716 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32717 - hostPort: 32717 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32718 - hostPort: 32718 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32719 - hostPort: 32719 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32720 - hostPort: 32720 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32721 - hostPort: 32721 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32722 - hostPort: 32722 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32723 - hostPort: 32723 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32724 - hostPort: 32724 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32725 - hostPort: 32725 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32726 - hostPort: 32726 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32727 - hostPort: 32727 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32728 - hostPort: 32728 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32729 - hostPort: 32729 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32730 - hostPort: 32730 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32731 - hostPort: 32731 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32732 - hostPort: 32732 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32733 - hostPort: 32733 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32734 - hostPort: 32734 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32735 - hostPort: 32735 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32736 - hostPort: 32736 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32737 - hostPort: 32737 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32738 - hostPort: 32738 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32739 - hostPort: 32739 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32740 - hostPort: 32740 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32741 - hostPort: 32741 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32742 - hostPort: 32742 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32743 - hostPort: 32743 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32744 - hostPort: 32744 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32745 - hostPort: 32745 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32746 - hostPort: 32746 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32747 - hostPort: 32747 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32748 - hostPort: 32748 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32749 - hostPort: 32749 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32750 - hostPort: 32750 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32751 - hostPort: 32751 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32752 - hostPort: 32752 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32753 - hostPort: 32753 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32754 - hostPort: 32754 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32755 - hostPort: 32755 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32756 - hostPort: 32756 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32757 - hostPort: 32757 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32758 - hostPort: 32758 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32759 - hostPort: 32759 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32760 - hostPort: 32760 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32761 - hostPort: 32761 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32762 - hostPort: 32762 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32763 - hostPort: 32763 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32764 - hostPort: 32764 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32765 - hostPort: 32765 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32766 - hostPort: 32766 - listenAddress: "0.0.0.0" - protocol: tcp - - containerPort: 32767 - hostPort: 32767 - listenAddress: "0.0.0.0" - protocol: tcp diff --git a/tests/playground/kind/create_cluster.sh b/tests/playground/kind/create_cluster.sh deleted file mode 100644 index 73990b0b795..00000000000 --- a/tests/playground/kind/create_cluster.sh +++ /dev/null @@ -1,8 +0,0 @@ -# Be sure to have built the latest image before running this script -# If running on apple silicon: -# docker buildx build --load --platform linux/arm64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky && docker tag us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest skypilot:latest && docker push us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -set -e -kind delete cluster -kind create cluster --config kind-cluster.yaml -# Load local skypilot image -kind load docker-image us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest diff --git a/tests/playground/ray_k8s_example_full.yaml b/tests/playground/ray_k8s_example_full.yaml deleted file mode 100644 index 93c42bab9cf..00000000000 --- a/tests/playground/ray_k8s_example_full.yaml +++ /dev/null @@ -1,213 +0,0 @@ -# A unique identifier for the head node and workers of this cluster. -cluster_name: example-cluster - -# The maximum number of workers nodes to launch in addition to the head -# node. -max_workers: 0 - -# The autoscaler will scale up the cluster faster with higher upscaling speed. -# E.g., if the task requires adding more nodes then autoscaler will gradually -# scale up the cluster in chunks of upscaling_speed*currently_running_nodes. -# This number should be > 0. -upscaling_speed: 1.0 - -# If a node is idle for this many minutes, it will be removed. -idle_timeout_minutes: 5 - -# Kubernetes resources that need to be configured for the autoscaler to be -# able to manage the Ray cluster. If any of the provided resources don't -# exist, the autoscaler will attempt to create them. If this fails, you may -# not have the required permissions and will have to request them to be -# created by your cluster administrator. -provider: - type: kubernetes - - # Exposing external IP addresses for ray pods isn't currently supported. - use_internal_ips: true - - # Namespace to use for all resources created. - namespace: ray - - # ServiceAccount created by the autoscaler for the head node pod that it - # runs in. If this field isn't provided, the head pod config below must - # contain a user-created service account with the proper permissions. - autoscaler_service_account: - apiVersion: v1 - kind: ServiceAccount - metadata: - name: autoscaler - - # Role created by the autoscaler for the head node pod that it runs in. - # If this field isn't provided, the role referenced in - # autoscaler_role_binding must exist and have at least these permissions. - autoscaler_role: - kind: Role - apiVersion: rbac.authorization.k8s.io/v1 - metadata: - name: autoscaler - rules: - - apiGroups: [""] - resources: ["pods", "pods/status", "pods/exec"] - verbs: ["get", "watch", "list", "create", "delete", "patch"] - - # RoleBinding created by the autoscaler for the head node pod that it runs - # in. If this field isn't provided, the head pod config below must contain - # a user-created service account with the proper permissions. - autoscaler_role_binding: - apiVersion: rbac.authorization.k8s.io/v1 - kind: RoleBinding - metadata: - name: autoscaler - subjects: - - kind: ServiceAccount - name: autoscaler - roleRef: - kind: Role - name: autoscaler - apiGroup: rbac.authorization.k8s.io - - services: - # Service that maps to the head node of the Ray cluster. - - apiVersion: v1 - kind: Service - metadata: - # NOTE: If you're running multiple Ray clusters with services - # on one Kubernetes cluster, they must have unique service - # names. - name: example-cluster-ray-head - spec: - # This selector must match the head node pod's selector below. - selector: - component: example-cluster-ray-head - ports: - - name: client - protocol: TCP - port: 10001 - targetPort: 10001 - - name: dashboard - protocol: TCP - port: 8265 - targetPort: 8265 - - name: ray-serve - protocol: TCP - port: 8000 - targetPort: 8000 - -# Specify the pod type for the ray head node (as configured below). -head_node_type: head_node -# Specify the allowed pod types for this ray cluster and the resources they provide. -available_node_types: - worker_node: - # Minimum number of Ray workers of this Pod type. - min_workers: 0 - # Maximum number of Ray workers of this Pod type. Takes precedence over min_workers. - max_workers: 2 - # User-specified custom resources for use by Ray. Object with string keys and integer values. - # (Ray detects CPU and GPU from pod spec resource requests and limits, so no need to fill those here.) - resources: {"example-resource-a": 1, "example-resource-b": 2} - node_config: - apiVersion: v1 - kind: Pod - metadata: - # Automatically generates a name for the pod with this prefix. - generateName: example-cluster-ray-worker- - spec: - restartPolicy: Never - volumes: - - name: dshm - emptyDir: - medium: Memory - containers: - - name: ray-node - imagePullPolicy: Always - image: rayproject/ray:latest - command: ["/bin/bash", "-c", "--"] - args: ["trap : TERM INT; sleep infinity & wait;"] - # This volume allocates shared memory for Ray to use for its plasma - # object store. If you do not provide this, Ray will fall back to - # /tmp which cause slowdowns if is not a shared memory volume. - volumeMounts: - - mountPath: /dev/shm - name: dshm - resources: - requests: - cpu: 1000m - memory: 1024Mi - limits: - # The maximum memory that this pod is allowed to use. The - # limit will be detected by ray and split to use 10% for - # redis, 30% for the shared memory object store, and the - # rest for application memory. If this limit is not set and - # the object store size is not set manually, ray will - # allocate a very large object store in each pod that may - # cause problems for other pods. - memory: 1024Mi - head_node: - node_config: - apiVersion: v1 - kind: Pod - metadata: - # Automatically generates a name for the pod with this prefix. - generateName: example-cluster-ray-head- - # Must match the head node service selector above if a head node - # service is required. - labels: - component: example-cluster-ray-head - spec: - # Change this if you altered the autoscaler_service_account above - # or want to provide your own. - serviceAccountName: autoscaler - - restartPolicy: Never - - # This volume allocates shared memory for Ray to use for its plasma - # object store. If you do not provide this, Ray will fall back to - # /tmp which cause slowdowns if is not a shared memory volume. - volumes: - - name: dshm - emptyDir: - medium: Memory - containers: - - name: ray-node - imagePullPolicy: Always - image: rayproject/ray:latest - # Do not change this command - it keeps the pod alive until it is - # explicitly killed. - command: ["/bin/bash", "-c", "--"] - args: ['trap : TERM INT; sleep infinity & wait;'] - ports: - - containerPort: 6379 # Redis port - - containerPort: 10001 # Used by Ray Client - - containerPort: 8265 # Used by Ray Dashboard - - # This volume allocates shared memory for Ray to use for its plasma - # object store. If you do not provide this, Ray will fall back to - # /tmp which cause slowdowns if is not a shared memory volume. - volumeMounts: - - mountPath: /dev/shm - name: dshm - resources: - requests: - cpu: 1000m - memory: 1024Mi - limits: - # The maximum memory that this pod is allowed to use. The - # limit will be detected by ray and split to use 10% for - # redis, 30% for the shared memory object store, and the - # rest for application memory. If this limit is not set and - # the object store size is not set manually, ray will - # allocate a very large object store in each pod that may - # cause problems for other pods. - memory: 1024Mi - - -# Command to start ray on the head node. You don't need to change this. -# Note dashboard-host is set to 0.0.0.0 so that kubernetes can port forward. -head_start_ray_commands: - - ray stop - - ulimit -n 65536; ray start --head --port=6379 --autoscaling-config=~/ray_bootstrap_config.yaml --dashboard-host 0.0.0.0 - -# Command to start ray on worker nodes. You don't need to change this. -worker_start_ray_commands: - - ray stop - - ulimit -n 65536; ray start --address=$RAY_HEAD_IP:6379 From 2c88daf5d2bc4f3c82dced3b6f7022640d24ba2c Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 16 Jun 2023 08:02:02 -0700 Subject: [PATCH 027/183] wip --- sky/skylet/providers/kubernetes/node_provider.py | 2 +- tests/kubernetes/README.md | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index e0302635060..66ea92a770e 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -223,7 +223,7 @@ def create_node(self, node_config, tags, count): # exception. If pod's container is ContainerCreating, then we can assume # that resources have been allocated and we can exit. - # TODO(romilb): Figure out a way to make this timeout configurable. + # TODO(romilb): Make timeout configurable in Kubernetes cloud class. TIMEOUT = 60 start = time.time() while True: diff --git a/tests/kubernetes/README.md b/tests/kubernetes/README.md index f628796664d..4917a0935c2 100644 --- a/tests/kubernetes/README.md +++ b/tests/kubernetes/README.md @@ -25,6 +25,8 @@ cd kind ./create_cluster.sh ``` +## Running a GKE cluster + ## Other useful scripts `scripts` directory contains other useful scripts for development, including Kubernetes dashboard, ray yaml for testing the SkyPilot Kubernetes node provider From 7ece7f7a8b7d996b5c71288d43a63e68b985e533 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 16 Jun 2023 08:38:32 -0700 Subject: [PATCH 028/183] Update setup --- sky/setup_files/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sky/setup_files/setup.py b/sky/setup_files/setup.py index 79f07f8a5ce..1c9bbc89531 100644 --- a/sky/setup_files/setup.py +++ b/sky/setup_files/setup.py @@ -141,6 +141,7 @@ def parse_readme(readme: str) -> str: 'cloudflare': aws_dependencies, 'scp': [], 'oci': ['oci'], + 'kubernetes': ['kubernetes'], } extras_require['all'] = sum(extras_require.values(), []) From cc85f9440def29d4e9bdec60d3b2b67d0110717d Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 16 Jun 2023 08:44:03 -0700 Subject: [PATCH 029/183] readme updates --- tests/kubernetes/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tests/kubernetes/README.md b/tests/kubernetes/README.md index 4917a0935c2..740f18683ac 100644 --- a/tests/kubernetes/README.md +++ b/tests/kubernetes/README.md @@ -26,6 +26,16 @@ cd kind ``` ## Running a GKE cluster +1. Make sure ports 30000-32767 are open in your node pool VPC's firewall. +2. Create a GKE cluster with at least 1 node. +3. Get the kubeconfig for your cluster and place it in `~/.kube/config`: +```bash +gcloud container clusters get-credentials --region +# Example: +# gcloud container clusters get-credentials testcluster --region us-central1-c +``` +4. Verify by running `kubectl get nodes`. You should see your nodes. +5. You can run SkyPilot tasks now. ## Other useful scripts `scripts` directory contains other useful scripts for development, including From 0450cee59046cbd1a9db2e48609642bb103e5937 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 16 Jun 2023 08:46:20 -0700 Subject: [PATCH 030/183] lint --- sky/skylet/providers/kubernetes/node_provider.py | 3 +-- tests/kubernetes/kind/portmap_gen.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 66ea92a770e..2dcbcac237e 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -231,8 +231,7 @@ def create_node(self, node_config, tags, count): raise KubernetesError( "Timed out while waiting for nodes to start. " "Cluster may be out of resources or " - "may be too slow to autoscale." - ) + "may be too slow to autoscale.") all_ready = True for node in new_nodes: pod = core_api().read_namespaced_pod(node.metadata.name, diff --git a/tests/kubernetes/kind/portmap_gen.py b/tests/kubernetes/kind/portmap_gen.py index a0d32b09ffe..672aa594af6 100644 --- a/tests/kubernetes/kind/portmap_gen.py +++ b/tests/kubernetes/kind/portmap_gen.py @@ -6,7 +6,7 @@ nodes: - role: control-plane extraPortMappings:""" -suffix = "" # """- role: worker""" # Uncomment this line to add a worker node +suffix = "" # """- role: worker""" # Uncomment this line to add a worker node with open('kind-cluster.yaml', 'w') as f: f.write(preamble) for port in range(30000, 32768): From f3f05783548a263f41aaf02b5885f3f941805a88 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 16 Jun 2023 09:05:37 -0700 Subject: [PATCH 031/183] Fix failover --- sky/skylet/providers/kubernetes/node_provider.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 2dcbcac237e..c94b6f8709b 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -244,8 +244,12 @@ def create_node(self, node_config, tags, count): # Container is creating, so we can assume resources # have been allocated. Safe to exit. break + else: + # Pod is pending but not in 'ContainerCreating' state + all_ready = False + break else: - # Pod is pending and not in 'ContainerCreating' state + # No conditions also indicates that the pod is pending all_ready = False break if all_ready: From 574a9c6521df5e6abd35979f591184445214241e Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 16 Jun 2023 09:08:32 -0700 Subject: [PATCH 032/183] Fix failover --- sky/templates/kubernetes-ray.yml.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 4e070e85423..a7180761e70 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -256,6 +256,7 @@ available_node_types: memory: {{memory}}G setup_commands: + # TODO(romilb): Clean up setup commands which are irrelevant to k8s. # Disable `unattended-upgrades` to prevent apt-get from hanging. It should be called at the beginning before the process started to avoid being blocked. (This is a temporary fix.) # Create ~/.ssh/config file in case the file does not exist in the image. # Line 'rm ..': there is another installation of pip. From 0632b4891e7008ecbbd295a71f6fc3353ca61d05 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 16 Jun 2023 15:32:57 -0700 Subject: [PATCH 033/183] optimize setup --- sky/templates/kubernetes-ray.yml.j2 | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index a7180761e70..c1df84a629b 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -20,6 +20,7 @@ provider: use_internal_ips: false # Namespace to use for all resources created. + # TODO(romilb): Make this configurable. namespace: default # ServiceAccount created by the autoscaler for the head node pod that it @@ -219,9 +220,9 @@ available_node_types: args: ['trap : TERM INT; sleep infinity & wait;'] ports: - containerPort: 22 # Used for SSH - - containerPort: 6379 # Redis port + - containerPort: {{ray_port}} # Redis port - containerPort: 10001 # Used by Ray Client - - containerPort: 8265 # Used by Ray Dashboard + - containerPort: {{ray_dashboard_port}} # Used by Ray Dashboard # This volume allocates shared memory for Ray to use for its plasma # object store. If you do not provide this, Ray will fall back to @@ -232,7 +233,7 @@ available_node_types: mountPath: "/etc/secret-volume" - mountPath: /dev/shm name: dshm - - mountPath: /dev/fuse # Required for fuse mounting + - mountPath: /dev/fuse # Required for FUSE mounting name: dev-fuse securityContext: # Required for FUSE mounting, but may be a security risk privileged: true @@ -259,17 +260,11 @@ setup_commands: # TODO(romilb): Clean up setup commands which are irrelevant to k8s. # Disable `unattended-upgrades` to prevent apt-get from hanging. It should be called at the beginning before the process started to avoid being blocked. (This is a temporary fix.) # Create ~/.ssh/config file in case the file does not exist in the image. - # Line 'rm ..': there is another installation of pip. # Line 'sudo bash ..': set the ulimit as suggested by ray docs for performance. https://docs.ray.io/en/latest/cluster/vms/user-guides/large-cluster-best-practices.html#system-configuration # Line 'sudo grep ..': set the number of threads per process to unlimited to avoid ray job submit stucking issue when the number of running ray jobs increase. # Line 'mkdir -p ..': disable host key check # Line 'python3 -c ..': patch the buggy ray files and enable `-o allow_other` option for `goofys` - - sudo systemctl stop unattended-upgrades || true; - sudo systemctl disable unattended-upgrades || true; - sudo pkill -9 apt-get; - sudo pkill -9 dpkg; - sudo dpkg --configure -a; - mkdir -p ~/.ssh; touch ~/.ssh/config; + - mkdir -p ~/.ssh; touch ~/.ssh/config; pip3 --version > /dev/null 2>&1 || (curl -sSL https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py && echo "PATH=$HOME/.local/bin:$PATH" >> ~/.bashrc); (type -a python | grep -q python3) || echo 'alias python=python3' >> ~/.bashrc; (type -a pip | grep -q pip3) || echo 'alias pip=pip3' >> ~/.bashrc; @@ -296,7 +291,6 @@ head_start_ray_commands: # NOTE: --disable-usage-stats in `ray start` saves 10 seconds of idle wait. # Line "which prlimit ..": increase the limit of the number of open files for the raylet process, as the `ulimit` may not take effect at this point, because it requires # all the sessions to be reloaded. This is a workaround. - - echo "Starting Ray." - ((ps aux | grep -v nohup | grep -v grep | grep -q -- "python3 -m sky.skylet.skylet") || nohup python3 -m sky.skylet.skylet >> ~/.sky/skylet.log 2>&1 &); ray stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 ray start --disable-usage-stats --head --port={{ray_port}} --dashboard-port={{ray_dashboard_port}} --dashboard-host 0.0.0.0 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1; which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; From 05508d31e0a500d31b0b3e85b8a0a628aad377a3 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 16 Jun 2023 16:20:02 -0700 Subject: [PATCH 034/183] Fix sync down logs for k8s --- sky/backends/cloud_vm_ray_backend.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 65719f6ae64..bd8fdd2e336 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2537,7 +2537,7 @@ def _provision( ssh_credentials = backend_utils.ssh_credential_from_yaml( handle.cluster_yaml) runners = command_runner.SSHCommandRunner.make_runner_list( - ip_list, **ssh_credentials, port_list=None) + ip_list, **ssh_credentials, port_list=ssh_port_list) def _get_zone(runner): retry_count = 0 @@ -3215,10 +3215,13 @@ def sync_down_logs( ip_list = handle.external_ips() assert ip_list is not None, 'external_ips is not cached in handle' + ssh_port_list = handle.external_ssh_ports() + assert ssh_port_list is not None, 'external_ssh_ports is not cached ' \ + 'in handle' ssh_credentials = backend_utils.ssh_credential_from_yaml( handle.cluster_yaml) runners = command_runner.SSHCommandRunner.make_runner_list( - ip_list, **ssh_credentials, port_list=None) + ip_list, **ssh_credentials, port_list=ssh_port_list) def _rsync_down(args) -> None: """Rsync down logs from remote nodes. From fb36a40287a95d2714863b686fa9577a20041cbd Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 18 Jun 2023 12:10:45 -0700 Subject: [PATCH 035/183] test wip --- sky/spot/constants.py | 2 +- tests/conftest.py | 4 +++- tests/test_smoke.py | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/sky/spot/constants.py b/sky/spot/constants.py index 605330691d4..78aca653e2a 100644 --- a/sky/spot/constants.py +++ b/sky/spot/constants.py @@ -1,6 +1,6 @@ """Constants used for Managed Spot.""" -SPOT_CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP = 10 +SPOT_CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP = None SPOT_CONTROLLER_TEMPLATE = 'spot-controller.yaml.j2' SPOT_CONTROLLER_YAML_PREFIX = '~/.sky/spot_controller' diff --git a/tests/conftest.py b/tests/conftest.py index be3e8bedaeb..154068d93bb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,7 +19,8 @@ # # To only run tests for managed spot (without generic tests), use --managed-spot. all_clouds_in_smoke_tests = [ - 'aws', 'gcp', 'azure', 'lambda', 'cloudflare', 'ibm', 'scp', 'oci' + 'aws', 'gcp', 'azure', 'lambda', 'cloudflare', 'ibm', 'scp', 'oci', + 'kubernetes' ] default_clouds_to_run = ['gcp', 'azure'] @@ -35,6 +36,7 @@ 'ibm': 'ibm', 'scp': 'scp', 'oci': 'oci', + 'kubernetes': 'kubernetes' } diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 9c3e7ea19bf..828ec29ca50 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -556,6 +556,7 @@ def test_image_no_conda(): # ------------ Test stale job ------------ @pytest.mark.no_lambda_cloud # Lambda Cloud does not support stopping instances +@pytest.mark.no_kubernetes # Kubernetes does not support stopping instances def test_stale_job(generic_cloud: str): name = _get_cluster_name() test = Test( From 7db4027d1bf93858c78debc05b74e94580a4864f Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 19 Jun 2023 16:43:58 -0700 Subject: [PATCH 036/183] instance name parsing wip --- sky/clouds/kubernetes.py | 85 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 0f0946eb9d9..429a7023dc1 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -1,6 +1,7 @@ """Kubernetes.""" import json import os +import re import typing from typing import Dict, Iterator, List, Optional, Tuple @@ -40,6 +41,90 @@ class Kubernetes(clouds.Cloud): IMAGE = 'us-central1-docker.pkg.dev/' \ 'skypilot-375900/skypilotk8s/skypilot:latest' + class KubernetesInstanceName: + """ + Class to represent the name of a Kubernetes instance. + + Since Kubernetes does not have a notion of instances, we generate + virtual instance types that represent the resources requested by a + pod ("node"). + + This name captures the following resource requests: + - CPU + - Memory + - Accelerators + + The name format is "{n}CPU--{k}GB" where n is the number of vCPUs and + k is the amount of memory in GB. Accelerators can be specified by + appending "--{a}{type}" where a is the number of accelerators and + type is the accelerator type. + + Examples: + - 4CPU--16GB + - 4CPU--16GB--1V100 + """ + + def __init__(self, + name: Optional[str] = None, + cpus: Optional[int] = None, + memory: Optional[int] = None, + accelerator_count: Optional[int] = None, + accelerator_type: Optional[str] = None): + self.name = name + self.cpus = cpus + self.memory = memory + self.accelerator_count = accelerator_count + self.accelerator_type = accelerator_type + + @staticmethod + def is_valid_instance_name(name: str) -> bool: + """Returns whether the given name is a valid instance name.""" + # Check if the name is of the form "{n}CPU--{k}GB--{a}{type}". + cpumemgpu_pattern = re.compile(r'^\d+CPU--\d+GB--\d+[A-Za-z]+$') + if cpumemgpu_pattern.match(name): + return True + else: + # Check if the name is of the form "{n}CPU--{k}GB". + cpumem_pattern = re.compile(r'^\d+CPU--\d+GB$') + if cpumem_pattern.match(name): + return True + return False + + @classmethod + def _parse_instance_name(cls, name: str) -> Tuple[int, int, int, str]: + """Returns the cpus, memory, accelerator_count, and accelerator_type + from the given name.""" + if not cls.is_valid_instance_name(name): + raise ValueError(f'Invalid instance name: {name}') + # Split the name into cpus, memory, and accelerators. + cpus, memory, accelerators = name.split('--') + # Parse the cpus and memory. + cpus = int(cpus.replace('CPU', '')) + memory = int(memory.replace('GB', '')) + # Parse the accelerators. + accelerator_count = 0 + accelerator_type = '' + if accelerators: + accelerator_count = int(accelerators[:-2]) + accelerator_type = accelerators[-2:] + return cpus, memory, accelerator_count, accelerator_type + + @classmethod + def from_instance_name(cls, name: str) -> 'KubernetesInstanceName': + """Returns an instance name object from the given name.""" + if not cls.is_valid_instance_name(name): + raise ValueError(f'Invalid instance name: {name}') + cpus, memory, accelerator_count, accelerator_type = \ + cls._parse_instance_name(name) + return cls(name=name, + cpus=cpus, + memory=memory, + accelerator_count=accelerator_count, + accelerator_type=accelerator_type) + + def __str__(self): + return self.name + @classmethod def _cloud_unsupported_features( cls) -> Dict[clouds.CloudImplementationFeatures, str]: From 632ed30c33f8d03cfcb0c2690a7ba9ca72caa7c6 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 19 Jun 2023 22:49:30 -0700 Subject: [PATCH 037/183] Fix instance name parsing --- sky/clouds/kubernetes.py | 222 ++++++++++++++++++--------------------- 1 file changed, 105 insertions(+), 117 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 429a7023dc1..10bfccd2f22 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -18,6 +18,104 @@ ] +class KubernetesInstanceType: + """ + Class to represent the "Instance Type" in a Kubernetes. + + Since Kubernetes does not have a notion of instances, we generate + virtual instance types that represent the resources requested by a + pod ("node"). + + This name captures the following resource requests: + - CPU + - Memory + - Accelerators + + The name format is "{n}CPU--{k}GB" where n is the number of vCPUs and + k is the amount of memory in GB. Accelerators can be specified by + appending "--{a}{type}" where a is the number of accelerators and + type is the accelerator type. + + Examples: + - 4CPU--16GB + - 0.5CPU--1.5GB + - 4CPU--16GB--1V100 + """ + + def __init__(self, + cpus: Optional[float] = None, + memory: Optional[float] = None, + accelerator_count: Optional[float] = None, + accelerator_type: Optional[str] = None): + self.cpus = cpus + self.memory = memory + self.accelerator_count = accelerator_count + self.accelerator_type = accelerator_type + + @property + def name(self) -> str: + """Returns the name of the instance.""" + name = f'{self.cpus}CPU--{self.memory}GB' + if self.accelerator_count: + name += f'--{self.accelerator_count}{self.accelerator_type}' + return name + + @staticmethod + def is_valid_instance_type(name: str) -> bool: + """Returns whether the given name is a valid instance type.""" + pattern = re.compile(r'^(\d+(\.\d+)?CPU--\d+(\.\d+)?GB)(--\d+\S+)?$') + return bool(pattern.match(name)) + + @classmethod + def _parse_instance_type(cls, name: str) -> Tuple[ + float, float, Optional[float], Optional[str]]: + """Returns the cpus, memory, accelerator_count, and accelerator_type + from the given name.""" + pattern = re.compile( + r'^(?P\d+(\.\d+)?)CPU--(?P\d+(\.\d+)?)GB(?:--(?P\d+)(?P\S+))?$') + match = pattern.match(name) + if match: + cpus = float(match.group('cpus')) + memory = float(match.group('memory')) + accelerator_count = match.group('accelerator_count') + accelerator_type = match.group('accelerator_type') + if accelerator_count: + accelerator_count = float(accelerator_count) + return cpus, memory, accelerator_count, accelerator_type + else: + raise ValueError(f'Invalid instance name: {name}') + + @classmethod + def from_instance_type(cls, name: str) -> 'KubernetesInstanceName': + """Returns an instance name object from the given name.""" + if not cls.is_valid_instance_type(name): + raise ValueError(f'Invalid instance name: {name}') + cpus, memory, accelerator_count, accelerator_type = \ + cls._parse_instance_type(name) + return cls(cpus=cpus, + memory=memory, + accelerator_count=accelerator_count, + accelerator_type=accelerator_type) + + @classmethod + def from_resources(cls, + cpus: float, + memory: float, + accelerator_count: float = 0, + accelerator_type: str = '') -> 'KubernetesInstanceName': + """Returns an instance name object from the given resources.""" + name = f'{cpus}CPU--{memory}GB' + if accelerator_count > 0: + name += f'--{accelerator_count}{accelerator_type}' + return cls(cpus=cpus, + memory=memory, + accelerator_count=accelerator_count, + accelerator_type=accelerator_type) + + def __str__(self): + return self.name + + @clouds.CLOUD_REGISTRY.register class Kubernetes(clouds.Cloud): """Kubernetes.""" @@ -41,90 +139,6 @@ class Kubernetes(clouds.Cloud): IMAGE = 'us-central1-docker.pkg.dev/' \ 'skypilot-375900/skypilotk8s/skypilot:latest' - class KubernetesInstanceName: - """ - Class to represent the name of a Kubernetes instance. - - Since Kubernetes does not have a notion of instances, we generate - virtual instance types that represent the resources requested by a - pod ("node"). - - This name captures the following resource requests: - - CPU - - Memory - - Accelerators - - The name format is "{n}CPU--{k}GB" where n is the number of vCPUs and - k is the amount of memory in GB. Accelerators can be specified by - appending "--{a}{type}" where a is the number of accelerators and - type is the accelerator type. - - Examples: - - 4CPU--16GB - - 4CPU--16GB--1V100 - """ - - def __init__(self, - name: Optional[str] = None, - cpus: Optional[int] = None, - memory: Optional[int] = None, - accelerator_count: Optional[int] = None, - accelerator_type: Optional[str] = None): - self.name = name - self.cpus = cpus - self.memory = memory - self.accelerator_count = accelerator_count - self.accelerator_type = accelerator_type - - @staticmethod - def is_valid_instance_name(name: str) -> bool: - """Returns whether the given name is a valid instance name.""" - # Check if the name is of the form "{n}CPU--{k}GB--{a}{type}". - cpumemgpu_pattern = re.compile(r'^\d+CPU--\d+GB--\d+[A-Za-z]+$') - if cpumemgpu_pattern.match(name): - return True - else: - # Check if the name is of the form "{n}CPU--{k}GB". - cpumem_pattern = re.compile(r'^\d+CPU--\d+GB$') - if cpumem_pattern.match(name): - return True - return False - - @classmethod - def _parse_instance_name(cls, name: str) -> Tuple[int, int, int, str]: - """Returns the cpus, memory, accelerator_count, and accelerator_type - from the given name.""" - if not cls.is_valid_instance_name(name): - raise ValueError(f'Invalid instance name: {name}') - # Split the name into cpus, memory, and accelerators. - cpus, memory, accelerators = name.split('--') - # Parse the cpus and memory. - cpus = int(cpus.replace('CPU', '')) - memory = int(memory.replace('GB', '')) - # Parse the accelerators. - accelerator_count = 0 - accelerator_type = '' - if accelerators: - accelerator_count = int(accelerators[:-2]) - accelerator_type = accelerators[-2:] - return cpus, memory, accelerator_count, accelerator_type - - @classmethod - def from_instance_name(cls, name: str) -> 'KubernetesInstanceName': - """Returns an instance name object from the given name.""" - if not cls.is_valid_instance_name(name): - raise ValueError(f'Invalid instance name: {name}') - cpus, memory, accelerator_count, accelerator_type = \ - cls._parse_instance_name(name) - return cls(name=name, - cpus=cpus, - memory=memory, - accelerator_count=accelerator_count, - accelerator_type=accelerator_type) - - def __str__(self): - return self.name - @classmethod def _cloud_unsupported_features( cls) -> Dict[clouds.CloudImplementationFeatures, str]: @@ -178,15 +192,10 @@ def get_default_instance_type( memory: Optional[str] = None, disk_tier: Optional[str] = None) -> Optional[str]: del disk_tier # Unused. - virtual_instance_type = '' - # Remove the + from the cpus/memory string - n_cpus = int( - cpus.strip('+')) if cpus is not None else cls._DEFAULT_NUM_VCPUS - mem = int( - memory.strip('+') - ) if memory is not None else n_cpus * cls._DEFAULT_MEMORY_CPU_RATIO - virtual_instance_type += f'{n_cpus}vCPU-' - virtual_instance_type += f'{mem}GB' + + instance_cpus = int(cpus.strip('+')) if cpus is not None else cls._DEFAULT_NUM_VCPUS + instance_mem = int(memory.strip('+')) if memory is not None else instance_cpus * cls._DEFAULT_MEMORY_CPU_RATIO + virtual_instance_type = KubernetesInstanceType(instance_cpus, instance_mem).name return virtual_instance_type @classmethod @@ -201,9 +210,8 @@ def get_accelerators_from_instance_type( def get_vcpus_mem_from_instance_type( cls, instance_type: str) -> Tuple[float, float]: """Returns the #vCPUs and memory that the instance type offers.""" - vcpus = cls.get_vcpus_from_instance_type(instance_type) - mem = cls.get_mem_from_instance_type(instance_type) - return vcpus, mem + k = KubernetesInstanceType.from_instance_type(instance_type) + return k.cpus, k.memory @classmethod def zones_provision_loop( @@ -219,24 +227,6 @@ def zones_provision_loop( for r in cls.regions(): yield r.zones - @classmethod - def get_vcpus_from_instance_type( - cls, - instance_type: str, - ) -> float: - """Returns the #vCPUs that the instance type offers.""" - # TODO(romilb): Need more robust parsing - return float(instance_type.split('vCPU')[0]) - - @classmethod - def get_mem_from_instance_type( - cls, - instance_type: str, - ) -> float: - """Returns the memory that the instance type offers.""" - # TODO(romilb): Need more robust parsing - return float(instance_type.split('vCPU-')[1].split('GB')[0]) - @classmethod def get_zone_shell_cmd(cls) -> Optional[str]: return None @@ -330,9 +320,7 @@ def get_credential_file_mounts(self) -> Dict[str, str]: # } def instance_type_exists(self, instance_type: str) -> bool: - # TODO(romilb): All instance types are supported for now. In the future - # we should check if the instance type is supported by the cluster. - return True + return KubernetesInstanceType.is_valid_instance_type(instance_type) def validate_region_zone(self, region: Optional[str], zone: Optional[str]): # Kubernetes doesn't have regions or zones, so we don't need to validate From 1a444d1629043b10164023513287fca00c501fc2 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 19 Jun 2023 23:30:39 -0700 Subject: [PATCH 038/183] Merge fixes for query_status --- sky/clouds/kubernetes.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 10bfccd2f22..fc1d0d018c5 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -5,7 +5,7 @@ import typing from typing import Dict, Iterator, List, Optional, Tuple -from sky import clouds +from sky import clouds, status_lib from sky.utils import common_utils from sky.skylet.providers.kubernetes.utils import get_port @@ -335,3 +335,14 @@ def accelerator_in_region_or_zone(self, # future, we should return false for accelerators that we know are not # supported by the cluster. return True + + def query_status(cls, name: str, tag_filters: Dict[str, str], + region: Optional[str], zone: Optional[str], + **kwargs) -> List['status_lib.ClusterStatus']: + # TODO(romilb): Implement this. For now, we return UP as the status. + # Assuming single node cluster. + del name, tag_filters, region, zone, kwargs # Unused. + # Assume single node cluster. + return [status_lib.ClusterStatus.UP] + + From da9cba2aa4ce37c04d15aa3d6d8a3f623abd2ab3 Mon Sep 17 00:00:00 2001 From: Avi Weit Date: Tue, 20 Jun 2023 18:10:15 +0300 Subject: [PATCH 039/183] [k8s_cloud] Delete k8s service resources. (#2105) Delete k8s service resources. - 'sky down' for Kubernetes cloud to remove cluster service resources. --- sky/skylet/providers/kubernetes/node_provider.py | 1 + sky/templates/kubernetes-ray.yml.j2 | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index c94b6f8709b..81b251169f0 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -268,6 +268,7 @@ def terminate_node(self, node_id): raise try: core_api().delete_namespaced_service(node_id, self.namespace) + core_api().delete_namespaced_service(f'{node_id}-ssh', self.namespace) except ApiException: pass try: diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index c1df84a629b..8906b0200bb 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -183,8 +183,7 @@ available_node_types: apiVersion: v1 kind: Pod metadata: - # Automatically generates a name for the pod with this prefix. - generateName: {{cluster_name}}-ray-head- + name: {{cluster_name}}-ray-head # Must match the head node service selector above if a head node # service is required. labels: From 81871ac9cf6dc06bcb29da171d8eb2175e946f2c Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 20 Jun 2023 09:41:26 -0700 Subject: [PATCH 040/183] Status refresh WIP --- sky/clouds/kubernetes.py | 9 +++++---- sky/skylet/providers/kubernetes/utils.py | 23 ++++++++++++++++++++++- sky/templates/kubernetes-ray.yml.j2 | 4 ++++ 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index fc1d0d018c5..aa51a4e255f 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -7,7 +7,7 @@ from sky import clouds, status_lib from sky.utils import common_utils -from sky.skylet.providers.kubernetes.utils import get_port +from sky.skylet.providers.kubernetes import utils as kubernetes_utils if typing.TYPE_CHECKING: # Renaming to avoid shadowing variables. @@ -183,7 +183,7 @@ def is_same_cloud(self, other: clouds.Cloud) -> bool: @classmethod def get_port(cls, svc_name, namespace) -> int: - return get_port(svc_name, namespace) + return kubernetes_utils.get_port(svc_name, namespace) @classmethod def get_default_instance_type( @@ -341,8 +341,9 @@ def query_status(cls, name: str, tag_filters: Dict[str, str], **kwargs) -> List['status_lib.ClusterStatus']: # TODO(romilb): Implement this. For now, we return UP as the status. # Assuming single node cluster. - del name, tag_filters, region, zone, kwargs # Unused. - # Assume single node cluster. + del tag_filters, region, zone, kwargs # Unused. return [status_lib.ClusterStatus.UP] + # TODO(romilb): Change from default namespace to user-specified namespace + # return kubernetes_utils.get_cluster_status(cluster_name=name, namespace='default') diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 1c1c5243c7b..67d2be1e986 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -1,12 +1,17 @@ +from typing import List + import kubernetes from kubernetes.config.config_exception import ConfigException +from sky import status_lib + _configured = False _core_api = None _auth_api = None _networking_api = None _custom_objects_api = None +log_prefix = "KubernetesNodeProvider: " def _load_config(): global _configured @@ -65,7 +70,23 @@ def get_port(svc_name, namespace): return head_service.spec.ports[0].node_port -log_prefix = "KubernetesNodeProvider: " +def get_cluster_status(cluster_name: str, namespace: str) -> List[status_lib.ClusterStatus]: + # Get all the pods with the label skypilot-cluster: + pods = core_api().list_namespaced_pod(namespace, label_selector=f'skypilot-cluster={cluster_name}').items + + # Check if the pods are running or pending + cluster_status = [] + for pod in pods: + if pod.status.phase == 'Running': + cluster_status.append(status_lib.ClusterStatus(cluster_name, status_lib.ClusterStatus.UP)) + elif pod.status.phase == 'Pending': + cluster_status.append(status_lib.ClusterStatus(cluster_name, status_lib.ClusterStatus.INIT)) + # If pods are not found, we don't add them to the return list + return cluster_status + + + + class KubernetesError(Exception): diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 8906b0200bb..3931d304ff1 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -74,6 +74,7 @@ provider: metadata: labels: parent: skypilot + skypilot-cluster: {{cluster_name}} name: {{cluster_name}}-ray-head-ssh spec: type: NodePort @@ -89,6 +90,7 @@ provider: metadata: labels: parent: skypilot + skypilot-cluster: {{cluster_name}} # NOTE: If you're running multiple Ray clusters with services # on one Kubernetes cluster, they must have unique service # names. @@ -125,6 +127,7 @@ available_node_types: metadata: labels: parent: skypilot + skypilot-cluster: {{cluster_name}} # Automatically generates a name for the pod with this prefix. generateName: {{cluster_name}}-ray-worker- spec: @@ -189,6 +192,7 @@ available_node_types: labels: parent: skypilot component: {{cluster_name}}-ray-head + skypilot-cluster: {{cluster_name}} spec: # Change this if you altered the autoscaler_service_account above # or want to provide your own. From 0d1c4ac61ab49e1999a025d8caa7237fff65d998 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 20 Jun 2023 12:15:54 -0700 Subject: [PATCH 041/183] refactor to kubernetes adaptor --- sky/adaptors/kubernetes.py | 91 +++++++++++++++++++ sky/skylet/providers/kubernetes/__init__.py | 2 +- sky/skylet/providers/kubernetes/config.py | 36 ++++---- .../providers/kubernetes/node_provider.py | 79 ++++++++-------- sky/skylet/providers/kubernetes/utils.py | 65 +------------ 5 files changed, 148 insertions(+), 125 deletions(-) create mode 100644 sky/adaptors/kubernetes.py diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py new file mode 100644 index 00000000000..0091ba8f9f5 --- /dev/null +++ b/sky/adaptors/kubernetes.py @@ -0,0 +1,91 @@ +"""Kubernetes adaptors""" + +# pylint: disable=import-outside-toplevel + +from functools import wraps + +kubernetes = None + +_configured = False +_core_api = None +_auth_api = None +_networking_api = None +_custom_objects_api = None + + +def import_package(func): + @wraps(func) + def wrapper(*args, **kwargs): + global kubernetes + if kubernetes is None: + try: + import kubernetes as _kubernetes + except ImportError: + raise ImportError('Fail to import dependencies for Docker. ' + 'See README for how to install it.') from None + kubernetes = _kubernetes + return func(*args, **kwargs) + + return wrapper + + +@import_package +def get_kubernetes(): + return kubernetes + + +@import_package +def _load_config(): + global _configured + if _configured: + return + try: + kubernetes.config.load_incluster_config() + except kubernetes.config.config_exception.ConfigException: + kubernetes.config.load_kube_config() + _configured = True + + +@import_package +def core_api(): + global _core_api + if _core_api is None: + _load_config() + _core_api = kubernetes.client.CoreV1Api() + + return _core_api + + +@import_package +def auth_api(): + global _auth_api + if _auth_api is None: + _load_config() + _auth_api = kubernetes.client.RbacAuthorizationV1Api() + + return _auth_api + + +@import_package +def networking_api(): + global _networking_api + if _networking_api is None: + _load_config() + _networking_api = kubernetes.client.NetworkingV1Api() + + return _networking_api + + +@import_package +def custom_objects_api(): + global _custom_objects_api + if _custom_objects_api is None: + _load_config() + _custom_objects_api = kubernetes.client.CustomObjectsApi() + + return _custom_objects_api + + +@import_package +def api_exception(): + return kubernetes.client.rest.ApiException diff --git a/sky/skylet/providers/kubernetes/__init__.py b/sky/skylet/providers/kubernetes/__init__.py index 984f6ddc816..e3d0621fbea 100644 --- a/sky/skylet/providers/kubernetes/__init__.py +++ b/sky/skylet/providers/kubernetes/__init__.py @@ -1,2 +1,2 @@ -from sky.skylet.providers.kubernetes.utils import core_api, log_prefix, networking_api, auth_api, get_head_ssh_port, get_port, KubernetesError +from sky.skylet.providers.kubernetes.utils import get_head_ssh_port, get_port, get_cluster_status from sky.skylet.providers.kubernetes.node_provider import KubernetesNodeProvider diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index b1809f4564e..0201a06523d 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -3,10 +3,7 @@ import math import re -from kubernetes import client -from kubernetes.client.rest import ApiException - -from sky.skylet.providers.kubernetes import auth_api, core_api, log_prefix +from sky.adaptors import kubernetes logger = logging.getLogger(__name__) @@ -18,6 +15,7 @@ "P": 2**50, } +log_prefix = "KubernetesNodeProvider: " class InvalidNamespaceError(ValueError): @@ -197,9 +195,9 @@ def _configure_namespace(provider_config): namespace = provider_config[namespace_field] field_selector = "metadata.name={}".format(namespace) try: - namespaces = core_api().list_namespace( + namespaces = kubernetes.core_api().list_namespace( field_selector=field_selector).items - except ApiException: + except kubernetes.api_exception(): logger.warning(log_prefix + not_checking_msg(namespace_field, namespace)) return namespace @@ -210,9 +208,9 @@ def _configure_namespace(provider_config): return namespace logger.info(log_prefix + not_found_msg(namespace_field, namespace)) - namespace_config = client.V1Namespace(metadata=client.V1ObjectMeta( + namespace_config = kubernetes.client.V1Namespace(metadata=kubernetes.client.V1ObjectMeta( name=namespace)) - core_api().create_namespace(namespace_config) + kubernetes.core_api().create_namespace(namespace_config) logger.info(log_prefix + created_msg(namespace_field, namespace)) return namespace @@ -231,7 +229,7 @@ def _configure_autoscaler_service_account(namespace, provider_config): name = account["metadata"]["name"] field_selector = "metadata.name={}".format(name) - accounts = (core_api().list_namespaced_service_account( + accounts = (kubernetes.core_api().list_namespaced_service_account( namespace, field_selector=field_selector).items) if len(accounts) > 0: assert len(accounts) == 1 @@ -239,7 +237,7 @@ def _configure_autoscaler_service_account(namespace, provider_config): return logger.info(log_prefix + not_found_msg(account_field, name)) - core_api().create_namespaced_service_account(namespace, account) + kubernetes.core_api().create_namespaced_service_account(namespace, account) logger.info(log_prefix + created_msg(account_field, name)) @@ -257,7 +255,7 @@ def _configure_autoscaler_role(namespace, provider_config): name = role["metadata"]["name"] field_selector = "metadata.name={}".format(name) - accounts = (auth_api().list_namespaced_role( + accounts = (kubernetes.auth_api().list_namespaced_role( namespace, field_selector=field_selector).items) if len(accounts) > 0: assert len(accounts) == 1 @@ -265,7 +263,7 @@ def _configure_autoscaler_role(namespace, provider_config): return logger.info(log_prefix + not_found_msg(role_field, name)) - auth_api().create_namespaced_role(namespace, role) + kubernetes.auth_api().create_namespaced_role(namespace, role) logger.info(log_prefix + created_msg(role_field, name)) @@ -290,7 +288,7 @@ def _configure_autoscaler_role_binding(namespace, provider_config): name = binding["metadata"]["name"] field_selector = "metadata.name={}".format(name) - accounts = (auth_api().list_namespaced_role_binding( + accounts = (kubernetes.auth_api().list_namespaced_role_binding( namespace, field_selector=field_selector).items) if len(accounts) > 0: assert len(accounts) == 1 @@ -298,7 +296,7 @@ def _configure_autoscaler_role_binding(namespace, provider_config): return logger.info(log_prefix + not_found_msg(binding_field, name)) - auth_api().create_namespaced_role_binding(namespace, binding) + kubernetes.auth_api().create_namespaced_role_binding(namespace, binding) logger.info(log_prefix + created_msg(binding_field, name)) @@ -317,7 +315,7 @@ def _configure_services(namespace, provider_config): name = service["metadata"]["name"] field_selector = "metadata.name={}".format(name) - services = (core_api().list_namespaced_service( + services = (kubernetes.core_api().list_namespaced_service( namespace, field_selector=field_selector).items) if len(services) > 0: assert len(services) == 1 @@ -327,8 +325,12 @@ def _configure_services(namespace, provider_config): return else: logger.info(log_prefix + updating_existing_msg("service", name)) - core_api().patch_namespaced_service(name, namespace, service) + kubernetes.core_api().patch_namespaced_service(name, namespace, service) else: logger.info(log_prefix + not_found_msg("service", name)) - core_api().create_namespaced_service(namespace, service) + kubernetes.core_api().create_namespaced_service(namespace, service) logger.info(log_prefix + created_msg("service", name)) + + +class KubernetesError(Exception): + pass \ No newline at end of file diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 81b251169f0..5d37b0a6bf0 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -5,13 +5,9 @@ from urllib.parse import urlparse from uuid import uuid4 -from kubernetes.client.rest import ApiException - -from sky.skylet.providers.kubernetes import core_api, log_prefix, networking_api, get_head_ssh_port, KubernetesError -from sky.skylet.providers.kubernetes.config import ( - bootstrap_kubernetes, - fillout_resources_kubernetes, -) +from sky.adaptors import kubernetes +from sky.skylet.providers.kubernetes import get_head_ssh_port +from sky.skylet.providers.kubernetes import config from ray.autoscaler._private.command_runner import SSHCommandRunner from ray.autoscaler._private.cli_logger import cli_logger from ray.autoscaler.node_provider import NodeProvider @@ -67,7 +63,7 @@ def non_terminated_nodes(self, tag_filters): tag_filters[TAG_RAY_CLUSTER_NAME] = self.cluster_name label_selector = to_label_selector(tag_filters) - pod_list = core_api().list_namespaced_pod(self.namespace, + pod_list = kubernetes.core_api().list_namespaced_pod(self.namespace, field_selector=field_selector, label_selector=label_selector) @@ -80,48 +76,45 @@ def non_terminated_nodes(self, tag_filters): ] def is_running(self, node_id): - pod = core_api().read_namespaced_pod(node_id, self.namespace) + pod = kubernetes.core_api().read_namespaced_pod(node_id, self.namespace) return pod.status.phase == "Running" def is_terminated(self, node_id): - pod = core_api().read_namespaced_pod(node_id, self.namespace) + pod = kubernetes.core_api().read_namespaced_pod(node_id, self.namespace) return pod.status.phase not in ["Running", "Pending"] def node_tags(self, node_id): - pod = core_api().read_namespaced_pod(node_id, self.namespace) + pod = kubernetes.core_api().read_namespaced_pod(node_id, self.namespace) return pod.metadata.labels def external_ip(self, node_id): # # Return the IP address of the first node with an external IP - nodes = core_api().list_node().items + nodes = kubernetes.core_api().list_node().items for node in nodes: if node.status.addresses: for address in node.status.addresses: if address.type == "ExternalIP": return address.address # If no external IP is found, use the API server IP - api_host = core_api().api_client.configuration.host + api_host = kubernetes.core_api().api_client.configuration.host parsed_url = urlparse(api_host) return parsed_url.hostname def external_port(self, node_id): # Extract the NodePort of the head node's SSH service - # TODO(romilb): Implement caching here for performance - # # Node id is str e.g., example-cluster-ray-head-v89lb - cli_logger.print("GETTING HEAD NODE SSH! MULTINODE WOULD FAIL!") + + # TODO(romilb): Implement caching here for performance + # TODO(romilb): Multi-node would need more handling here cluster_name = node_id.split('-ray-head')[0] return get_head_ssh_port(cluster_name, self.namespace) def internal_ip(self, node_id): - pod = core_api().read_namespaced_pod(node_id, self.namespace) + pod = kubernetes.core_api().read_namespaced_pod(node_id, self.namespace) return pod.status.pod_ip def get_node_id(self, ip_address, use_internal_ip=True) -> str: - # if not use_internal_ip: - # raise ValueError("Must use internal IPs with Kubernetes.") - # Overriding parent method to use ip+port as cache def find_node_id(): if use_internal_ip: return self._internal_ip_cache.get(ip_address) @@ -150,9 +143,9 @@ def set_node_tags(self, node_ids, tags): try: self._set_node_tags(node_ids, tags) return - except ApiException as e: + except kubernetes.api_exception() as e: if e.status == 409: - logger.info(log_prefix + "Caught a 409 error while setting" + logger.info(kubernetes.log_prefix + "Caught a 409 error while setting" " node tags. Retrying...") time.sleep(DELAY_BEFORE_TAG_RETRY) continue @@ -162,9 +155,9 @@ def set_node_tags(self, node_ids, tags): self._set_node_tags(node_ids, tags) def _set_node_tags(self, node_id, tags): - pod = core_api().read_namespaced_pod(node_id, self.namespace) + pod = kubernetes.core_api().read_namespaced_pod(node_id, self.namespace) pod.metadata.labels.update(tags) - core_api().patch_namespaced_pod(node_id, self.namespace, pod) + kubernetes.core_api().patch_namespaced_pod(node_id, self.namespace, pod) def create_node(self, node_config, tags, count): conf = copy.deepcopy(node_config) @@ -185,16 +178,16 @@ def create_node(self, node_config, tags, count): head_selector = head_service_selector(self.cluster_name) pod_spec["metadata"]["labels"].update(head_selector) - logger.info(log_prefix + + logger.info(config.log_prefix + "calling create_namespaced_pod (count={}).".format(count)) new_nodes = [] for _ in range(count): - pod = core_api().create_namespaced_pod(self.namespace, pod_spec) + pod = kubernetes.core_api().create_namespaced_pod(self.namespace, pod_spec) new_nodes.append(pod) new_svcs = [] if service_spec is not None: - logger.info(log_prefix + "calling create_namespaced_service " + logger.info(config.log_prefix + "calling create_namespaced_service " "(count={}).".format(count)) for new_node in new_nodes: @@ -203,12 +196,12 @@ def create_node(self, node_config, tags, count): metadata["name"] = new_node.metadata.name service_spec["metadata"] = metadata service_spec["spec"]["selector"] = {"ray-node-uuid": node_uuid} - svc = core_api().create_namespaced_service( + svc = kubernetes.core_api().create_namespaced_service( self.namespace, service_spec) new_svcs.append(svc) if ingress_spec is not None: - logger.info(log_prefix + "calling create_namespaced_ingress " + logger.info(config.log_prefix + "calling create_namespaced_ingress " "(count={}).".format(count)) for new_svc in new_svcs: metadata = ingress_spec.get("metadata", {}) @@ -216,7 +209,7 @@ def create_node(self, node_config, tags, count): ingress_spec["metadata"] = metadata ingress_spec = _add_service_name_to_service_port( ingress_spec, new_svc.metadata.name) - networking_api().create_namespaced_ingress( + kubernetes.networking_api().create_namespaced_ingress( self.namespace, ingress_spec) # Wait for all pods to be ready, and if it exceeds the timeout, raise an @@ -228,13 +221,13 @@ def create_node(self, node_config, tags, count): start = time.time() while True: if time.time() - start > TIMEOUT: - raise KubernetesError( + raise config.KubernetesError( "Timed out while waiting for nodes to start. " "Cluster may be out of resources or " "may be too slow to autoscale.") all_ready = True for node in new_nodes: - pod = core_api().read_namespaced_pod(node.metadata.name, + pod = kubernetes.core_api().read_namespaced_pod(node.metadata.name, self.namespace) if pod.status.phase == "Pending": # Check conditions for more detailed status @@ -257,26 +250,26 @@ def create_node(self, node_config, tags, count): time.sleep(1) def terminate_node(self, node_id): - logger.info(log_prefix + "calling delete_namespaced_pod") + logger.info(config.log_prefix + "calling delete_namespaced_pod") try: - core_api().delete_namespaced_pod(node_id, self.namespace) - except ApiException as e: + kubernetes.core_api().delete_namespaced_pod(node_id, self.namespace) + except kubernetes.api_exception() as e: if e.status == 404: - logger.warning(log_prefix + f"Tried to delete pod {node_id}," + logger.warning(config.log_prefix + f"Tried to delete pod {node_id}," " but the pod was not found (404).") else: raise try: - core_api().delete_namespaced_service(node_id, self.namespace) - core_api().delete_namespaced_service(f'{node_id}-ssh', self.namespace) - except ApiException: + kubernetes.core_api().delete_namespaced_service(node_id, self.namespace) + kubernetes.core_api().delete_namespaced_service(f'{node_id}-ssh', self.namespace) + except kubernetes.api_exception(): pass try: - networking_api().delete_namespaced_ingress( + kubernetes.networking_api().delete_namespaced_ingress( node_id, self.namespace, ) - except ApiException: + except kubernetes.api_exception(): pass def terminate_nodes(self, node_ids): @@ -330,12 +323,12 @@ def get_command_runner(self, @staticmethod def bootstrap_config(cluster_config): - return bootstrap_kubernetes(cluster_config) + return config.bootstrap_kubernetes(cluster_config) @staticmethod def fillout_available_node_types_resources(cluster_config): """Fills out missing "resources" field for available_node_types.""" - return fillout_resources_kubernetes(cluster_config) + return config.fillout_resources_kubernetes(cluster_config) def _add_service_name_to_service_port(spec, svc_name): diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 67d2be1e986..2fc3a3f62c3 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -1,63 +1,7 @@ from typing import List -import kubernetes -from kubernetes.config.config_exception import ConfigException - from sky import status_lib - -_configured = False -_core_api = None -_auth_api = None -_networking_api = None -_custom_objects_api = None - -log_prefix = "KubernetesNodeProvider: " - -def _load_config(): - global _configured - if _configured: - return - try: - kubernetes.config.load_incluster_config() - except ConfigException: - kubernetes.config.load_kube_config() - _configured = True - - -def core_api(): - global _core_api - if _core_api is None: - _load_config() - _core_api = kubernetes.client.CoreV1Api() - - return _core_api - - -def auth_api(): - global _auth_api - if _auth_api is None: - _load_config() - _auth_api = kubernetes.client.RbacAuthorizationV1Api() - - return _auth_api - - -def networking_api(): - global _networking_api - if _networking_api is None: - _load_config() - _networking_api = kubernetes.client.NetworkingV1Api() - - return _networking_api - - -def custom_objects_api(): - global _custom_objects_api - if _custom_objects_api is None: - _load_config() - _custom_objects_api = kubernetes.client.CustomObjectsApi() - - return _custom_objects_api +from sky.adaptors.kubernetes import core_api def get_head_ssh_port(cluster_name, namespace): @@ -84,10 +28,3 @@ def get_cluster_status(cluster_name: str, namespace: str) -> List[status_lib.Clu # If pods are not found, we don't add them to the return list return cluster_status - - - - - -class KubernetesError(Exception): - pass From 801702007f119be3faaa712f6b01d1d07c8cde4a Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 21 Jun 2023 15:38:00 -0700 Subject: [PATCH 042/183] tests wip --- sky/data/storage.py | 1 + tests/conftest.py | 2 +- tests/test_smoke.py | 59 ++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 55 insertions(+), 7 deletions(-) diff --git a/sky/data/storage.py b/sky/data/storage.py index 48dbe3134d3..ed4fd0a4f8e 100644 --- a/sky/data/storage.py +++ b/sky/data/storage.py @@ -1196,6 +1196,7 @@ def mount_command(self, mount_path: str) -> str: 'releases/download/0.24.0-romilb-upstream/goofys ' '-O /usr/local/bin/goofys && ' 'sudo chmod +x /usr/local/bin/goofys') + install_cmd = ('exit 1') mount_cmd = ('goofys -o allow_other ' f'--stat-cache-ttl {self._STAT_CACHE_TTL} ' f'--type-cache-ttl {self._TYPE_CACHE_TTL} ' diff --git a/tests/conftest.py b/tests/conftest.py index 154068d93bb..1d8235dfac3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -22,7 +22,7 @@ 'aws', 'gcp', 'azure', 'lambda', 'cloudflare', 'ibm', 'scp', 'oci', 'kubernetes' ] -default_clouds_to_run = ['gcp', 'azure'] +default_clouds_to_run = ['gcp', 'azure', 'kubernetes'] # Translate cloud name to pytest keyword. We need this because # @pytest.mark.lambda is not allowed, so we use @pytest.mark.lambda_cloud diff --git a/tests/test_smoke.py b/tests/test_smoke.py index ce4aa0c7ce6..8718d98a814 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -182,6 +182,8 @@ def run_one_test(test: Test) -> Tuple[int, str, str]: # ---------- Dry run: 2 Tasks in a chain. ---------- + +@pytest.mark.kubernetes def test_example_app(): test = Test( 'example_app', @@ -191,6 +193,7 @@ def test_example_app(): # ---------- A minimal task ---------- +@pytest.mark.kubernetes def test_minimal(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -556,7 +559,6 @@ def test_image_no_conda(): # ------------ Test stale job ------------ @pytest.mark.no_lambda_cloud # Lambda Cloud does not support stopping instances -@pytest.mark.no_kubernetes # Kubernetes does not support stopping instances def test_stale_job(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -635,12 +637,17 @@ def test_gcp_stale_job_manual_restart(): # ---------- Check Sky's environment variables; workdir. ---------- @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet +@pytest.mark.no_kubernetes def test_env_check(generic_cloud: str): name = _get_cluster_name() + extra_flags = '' + if generic_cloud == 'kubernetes': + # Kubernetes does not support multi-node + extra_flags = '--num-nodes 1' test = Test( 'env_check', [ - f'sky launch -y -c {name} --cloud {generic_cloud} --detach-setup examples/env_check.yaml', + f'sky launch -y -c {name} --cloud {generic_cloud} {extra_flags} --detach-setup examples/env_check.yaml', f'sky logs {name} 1 --status', # Ensure the job succeeded. ], f'sky down -y {name}', @@ -650,11 +657,16 @@ def test_env_check(generic_cloud: str): # ---------- file_mounts ---------- @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet. Run test_scp_file_mounts instead. +@pytest.mark.kubernetes def test_file_mounts(generic_cloud: str): name = _get_cluster_name() + extra_flags = '' + if generic_cloud in 'kubernetes': + # Kubernetes does not support multi-node + extra_flags = '--num-nodes 1' test_commands = [ *storage_setup_commands, - f'sky launch -y -c {name} --cloud {generic_cloud} examples/using_file_mounts.yaml', + f'sky launch -y -c {name} --cloud {generic_cloud} {extra_flags} examples/using_file_mounts.yaml', f'sky logs {name} 1 --status', # Ensure the job succeeded. ] test = Test( @@ -738,6 +750,34 @@ def test_gcp_storage_mounts(): run_one_test(test) +@pytest.mark.kubernetes +def test_kubernetes_storage_mounts(): + # Tests bucket mounting on k8s, assuming S3 is configured. + name = _get_cluster_name() + storage_name = f'sky-test-{int(time.time())}' + template_str = pathlib.Path( + 'tests/test_yamls/test_storage_mounting.yaml').read_text() + template = jinja2.Template(template_str) + content = template.render(storage_name=storage_name) + with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as f: + f.write(content) + f.flush() + file_path = f.name + test_commands = [ + *storage_setup_commands, + f'sky launch -y -c {name} --cloud kubernetes {file_path}', + f'sky logs {name} 1 --status', # Ensure job succeeded. + f'aws s3 ls {storage_name}/hello.txt', + ] + test = Test( + 'kubernetes_storage_mounts', + test_commands, + f'sky down -y {name}; sky storage delete {storage_name}', + timeout=20 * 60, # 20 mins + ) + run_one_test(test) + + @pytest.mark.cloudflare def test_cloudflare_storage_mounts(generic_cloud: str): name = _get_cluster_name() @@ -769,13 +809,18 @@ def test_cloudflare_storage_mounts(generic_cloud: str): # ---------- CLI logs ---------- @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet. Run test_scp_logs instead. +@pytest.mark.kubernetes def test_cli_logs(generic_cloud: str): name = _get_cluster_name() + num_nodes = 2 + if generic_cloud == 'kubernetes': + # Kubernetes does not support multi-node + num_nodes = 1 timestamp = time.time() test = Test( 'cli_logs', [ - f'sky launch -y -c {name} --cloud {generic_cloud} --num-nodes 2 "echo {timestamp} 1"', + f'sky launch -y -c {name} --cloud {generic_cloud} --num-nodes {num_nodes} "echo {timestamp} 1"', f'sky exec {name} "echo {timestamp} 2"', f'sky exec {name} "echo {timestamp} 3"', f'sky exec {name} "echo {timestamp} 4"', @@ -955,6 +1000,7 @@ def test_job_queue_multinode(generic_cloud: str): @pytest.mark.no_lambda_cloud # No Lambda Cloud VM has 8 CPUs +@pytest.mark.kubernetes def test_large_job_queue(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -998,6 +1044,7 @@ def test_large_job_queue(generic_cloud: str): @pytest.mark.no_lambda_cloud # No Lambda Cloud VM has 8 CPUs +@pytest.mark.kubernetes def test_fast_large_job_queue(generic_cloud: str): # This is to test the jobs can be scheduled quickly when there are many jobs in the queue. name = _get_cluster_name() @@ -2201,12 +2248,12 @@ def test_azure_disk_tier(): # ------- Testing user ray cluster -------- -def test_user_ray_cluster(): +def test_user_ray_cluster(generic_cloud: str): name = _get_cluster_name() test = Test( 'user-ray-cluster', [ - f'sky launch -y -c {name} "ray start --head"', + f'sky launch -y -c {name} --cloud {generic_cloud} "ray start --head"', f'sky exec {name} "echo hi"', f'sky logs {name} 1 --status', f'sky status -r | grep {name} | grep UP', From 5d7f8e864541dedfc749d5c7ba33047bfd0a6bf5 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 22 Jun 2023 15:07:16 -0700 Subject: [PATCH 043/183] clean up auth --- sky/clouds/kubernetes.py | 27 ++++++++--------------- sky/skylet/providers/kubernetes/utils.py | 28 +++++++++++++++++++++--- tests/kubernetes/kind/create_cluster.sh | 14 +++++++++--- tests/kubernetes/kind/delete_cluster.sh | 5 +++++ 4 files changed, 50 insertions(+), 24 deletions(-) create mode 100644 tests/kubernetes/kind/delete_cluster.sh diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index aa51a4e255f..1d6a532598d 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -13,9 +13,7 @@ # Renaming to avoid shadowing variables. from sky import resources as resources_lib -_CREDENTIAL_FILES = [ - 'config', -] +_CREDENTIAL_PATH = '~/.kube/config' class KubernetesInstanceType: @@ -192,7 +190,7 @@ def get_default_instance_type( memory: Optional[str] = None, disk_tier: Optional[str] = None) -> Optional[str]: del disk_tier # Unused. - + # TODO - Allow fractional CPUs and memory instance_cpus = int(cpus.strip('+')) if cpus is not None else cls._DEFAULT_NUM_VCPUS instance_mem = int(memory.strip('+')) if memory is not None else instance_cpus * cls._DEFAULT_MEMORY_CPU_RATIO virtual_instance_type = KubernetesInstanceType(instance_cpus, instance_mem).name @@ -302,22 +300,15 @@ def _make(instance_list): @classmethod def check_credentials(cls) -> Tuple[bool, Optional[str]]: - # TODO(romilb): Check credential validity using k8s api - if os.path.exists(os.path.expanduser('~/.kube/config')): - return True, None + if os.path.exists(os.path.expanduser(_CREDENTIAL_PATH)): + # Test using python API + return kubernetes_utils.check_credentials() else: - return False, 'Kubeconfig not found - ' \ - 'check if ~/.kube/config exists.' + return False, 'Credentials not found - ' \ + f'check if {_CREDENTIAL_PATH} exists.' def get_credential_file_mounts(self) -> Dict[str, str]: - return {} - # TODO(romilb): Fix the file mounts optimization - # ('config' here clashes with azure config file) - # return { - # f'~/.kube/{filename}': f'~/.kube/{filename}' - # for filename in _CREDENTIAL_FILES - # if os.path.exists(os.path.expanduser(f'~/.kube/{filename}')) - # } + return {_CREDENTIAL_PATH: _CREDENTIAL_PATH} def instance_type_exists(self, instance_type: str) -> bool: return KubernetesInstanceType.is_valid_instance_type(instance_type) @@ -342,8 +333,8 @@ def query_status(cls, name: str, tag_filters: Dict[str, str], # TODO(romilb): Implement this. For now, we return UP as the status. # Assuming single node cluster. del tag_filters, region, zone, kwargs # Unused. - return [status_lib.ClusterStatus.UP] # TODO(romilb): Change from default namespace to user-specified namespace # return kubernetes_utils.get_cluster_status(cluster_name=name, namespace='default') + return [status_lib.ClusterStatus.UP] diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 2fc3a3f62c3..69c33c4f436 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -1,7 +1,7 @@ -from typing import List +from typing import List, Tuple, Optional from sky import status_lib -from sky.adaptors.kubernetes import core_api +from sky.adaptors import kubernetes def get_head_ssh_port(cluster_name, namespace): @@ -10,10 +10,32 @@ def get_head_ssh_port(cluster_name, namespace): def get_port(svc_name, namespace): - head_service = core_api().read_namespaced_service(svc_name, namespace) + head_service = kubernetes.core_api().read_namespaced_service(svc_name, namespace) return head_service.spec.ports[0].node_port +def check_credentials() -> Tuple[bool, Optional[str]]: + """ + Check if the credentials in kubeconfig file are valid + + Returns: + bool: True if credentials are valid, False otherwise + str: Error message if credentials are invalid, None otherwise + """ + try: + kubernetes.core_api().list_namespace() + return True, None + except kubernetes.api_exception as e: + # Check if the error is due to invalid credentials + if e.status == 401: + return False, 'Invalid credentials - do you have permission ' \ + 'to access the cluster?' + else: + return False, f'Failed to communicate with the cluster: {str(e)}' + except Exception as e: + return False, f'An error occurred: {str(e)}' + + def get_cluster_status(cluster_name: str, namespace: str) -> List[status_lib.ClusterStatus]: # Get all the pods with the label skypilot-cluster: pods = core_api().list_namespaced_pod(namespace, label_selector=f'skypilot-cluster={cluster_name}').items diff --git a/tests/kubernetes/kind/create_cluster.sh b/tests/kubernetes/kind/create_cluster.sh index 9562191aed2..ebc1b6c304c 100644 --- a/tests/kubernetes/kind/create_cluster.sh +++ b/tests/kubernetes/kind/create_cluster.sh @@ -1,11 +1,19 @@ +# Creates a local Kubernetes cluster using kind +# Usage: ./create_cluster.sh +# Invokes portmap_gen.py to generate a kind-cluster.yaml with NodePort mappings # Be sure to have built the latest image before running this script set -e -kind delete cluster +kind delete cluster --name skypilot # If kind-cluster.yaml is not present, generate it if [ ! -f kind-cluster.yaml ]; then echo "Generating kind-cluster.yaml" python portmap_gen.py fi -kind create cluster --config kind-cluster.yaml +kind create cluster --config kind-cluster.yaml --name skypilot # Load local skypilot image on to the cluster for faster startup -kind load docker-image us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest +echo "Loading local skypilot image on to the cluster" +kind load docker-image --name skypilot us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest +# Print CPUs available on the local cluster +NUM_CPUS=$(kubectl get nodes -o jsonpath='{.items[0].status.capacity.cpu}') +echo "Kubernetes cluster ready! Run `sky check` to setup Kubernetes access." +echo "Number of CPUs available on the local cluster: $NUM_CPUS" \ No newline at end of file diff --git a/tests/kubernetes/kind/delete_cluster.sh b/tests/kubernetes/kind/delete_cluster.sh new file mode 100644 index 00000000000..29d176a4ef0 --- /dev/null +++ b/tests/kubernetes/kind/delete_cluster.sh @@ -0,0 +1,5 @@ +# Deletes the local kind cluster +# Usage: ./delete_cluster.sh +set -e +kind delete cluster --name skypilot +echo "Kubernetes cluster deleted!" \ No newline at end of file From aa787f8c7e18f1c8cf1964d5e90b897990f68ddb Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 22 Jun 2023 15:50:16 -0700 Subject: [PATCH 044/183] wip tests --- tests/conftest.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 1d8235dfac3..6f67414ecad 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -130,25 +130,29 @@ def pytest_collection_modifyitems(config, items): in item.keywords) and config.getoption('--managed-spot'): item.add_marker(skip_marks['managed_spot']) + # Check if tests need to be run serially for Kubernetes and Lambda Cloud # We run Lambda Cloud tests serially because Lambda Cloud rate limits its # launch API to one launch every 10 seconds. - serial_mark = pytest.mark.xdist_group(name='serial_lambda_cloud') + # We run Kubernetes tests serially because the Kubernetes cluster may have + # limited resources (e.g., just 8 cpus). + serial_mark = pytest.mark.xdist_group(name=f'serial_{generic_cloud_keyword}') # Handle generic tests - if generic_cloud == 'lambda': + if generic_cloud in ['lambda', 'kubernetes']: for item in items: if (_is_generic_test(item) and - 'no_lambda_cloud' not in item.keywords): + f'no_{generic_cloud_keyword}' not in item.keywords): item.add_marker(serial_mark) # Adding the serial mark does not update the item.nodeid, # but item.nodeid is important for pytest.xdist_group, e.g. # https://github.com/pytest-dev/pytest-xdist/blob/master/src/xdist/scheduler/loadgroup.py # This is a hack to update item.nodeid - item._nodeid = f'{item.nodeid}@serial_lambda_cloud' - # Handle Lambda Cloud specific tests + item._nodeid = f'{item.nodeid}@serial_{generic_cloud_keyword}' + # Handle generic cloud specific tests for item in items: - if 'lambda_cloud' in item.keywords: - item.add_marker(serial_mark) - item._nodeid = f'{item.nodeid}@serial_lambda_cloud' # See comment on item.nodeid above + if generic_cloud in ['lambda', 'kubernetes']: + if generic_cloud_keyword in item.keywords: + item.add_marker(serial_mark) + item._nodeid = f'{item.nodeid}@serial_{generic_cloud_keyword}' # See comment on item.nodeid above def _is_generic_test(item) -> bool: From c0265595a01070656a0a0f38c4194902d38a7099 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 22 Jun 2023 15:59:24 -0700 Subject: [PATCH 045/183] cli --- sky/cli.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/sky/cli.py b/sky/cli.py index c69e2c9fbe5..e1b3d4defdf 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4359,6 +4359,48 @@ def _delete_benchmark(benchmark: str) -> None: progress.refresh() +@cli.group(cls=_NaturalOrderGroup, hidden=True) +def local(): + """SkyPilot local tools CLI.""" + pass + +@cli.group(cls=_NaturalOrderGroup, hidden=True) +def local(): + """SkyPilot local tools CLI.""" + pass + +@local.command('up', cls=_DocumentedCodeCommand) +@usage_lib.entrypoint +def local_up(): + """Creates a local cluster.""" + with log_utils.safe_rich_status('Creating local cluster...'): + path_to_package = os.path.dirname(os.path.dirname(__file__)) + up_script_path = os.path.join(path_to_package, 'tests', 'kubernetes', 'kind', + 'create_cluster.sh') + subprocess.check_output('chmod +x {}'.format(up_script_path), shell=True) + subprocess.check_output(up_script_path, shell=True) + # Run sky check + sky_check.check() + click.echo('Local cluster created successfully. `sky launch` can now use Kubernetes to run tasks locally.') + + +@local.command('down', cls=_DocumentedCodeCommand) +@usage_lib.entrypoint +def local_up(): + """Creates a local cluster.""" + with log_utils.safe_rich_status('Removing local cluster...'): + path_to_package = os.path.dirname(os.path.dirname(__file__)) + down_script_path = os.path.join(path_to_package, 'tests', 'kubernetes', + 'kind', + 'delete_cluster.sh') + subprocess.check_output('chmod +x {}'.format(down_script_path), + shell=True) + subprocess.check_output(down_script_path, shell=True) + # Run sky check + sky_check.check() + click.echo('Local cluster removed.') + + def main(): return cli() From 3dc80d279342de8cf1d101a3b9fdecaf6093420b Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 22 Jun 2023 20:56:51 -0700 Subject: [PATCH 046/183] cli --- sky/adaptors/kubernetes.py | 5 +++++ sky/cli.py | 21 ++++++++++++--------- sky/skylet/providers/kubernetes/utils.py | 4 +++- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py index 0091ba8f9f5..6d5aade79ff 100644 --- a/sky/adaptors/kubernetes.py +++ b/sky/adaptors/kubernetes.py @@ -89,3 +89,8 @@ def custom_objects_api(): @import_package def api_exception(): return kubernetes.client.rest.ApiException + + +@import_package +def config_exception(): + return kubernetes.config.config_exception.ConfigException diff --git a/sky/cli.py b/sky/cli.py index e1b3d4defdf..c38b5cb0f9f 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4377,10 +4377,14 @@ def local_up(): path_to_package = os.path.dirname(os.path.dirname(__file__)) up_script_path = os.path.join(path_to_package, 'tests', 'kubernetes', 'kind', 'create_cluster.sh') - subprocess.check_output('chmod +x {}'.format(up_script_path), shell=True) - subprocess.check_output(up_script_path, shell=True) - # Run sky check - sky_check.check() + subprocess_utils.run_no_outputs('chmod +x {}'.format(up_script_path)) + # Get directory of script and run it from there + cwd = os.path.dirname(os.path.abspath(up_script_path)) + # Run script and don't print output + subprocess_utils.run_no_outputs(up_script_path, + cwd=cwd) + # Run sky check + sky_check.check(quiet=True) click.echo('Local cluster created successfully. `sky launch` can now use Kubernetes to run tasks locally.') @@ -4393,11 +4397,10 @@ def local_up(): down_script_path = os.path.join(path_to_package, 'tests', 'kubernetes', 'kind', 'delete_cluster.sh') - subprocess.check_output('chmod +x {}'.format(down_script_path), - shell=True) - subprocess.check_output(down_script_path, shell=True) - # Run sky check - sky_check.check() + subprocess_utils.run_no_outputs('chmod +x {}'.format(down_script_path)) + subprocess_utils.run_no_outputs(down_script_path) + # Run sky check + sky_check.check(quiet=True) click.echo('Local cluster removed.') diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 69c33c4f436..66d9750147f 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -25,13 +25,15 @@ def check_credentials() -> Tuple[bool, Optional[str]]: try: kubernetes.core_api().list_namespace() return True, None - except kubernetes.api_exception as e: + except kubernetes.api_exception() as e: # Check if the error is due to invalid credentials if e.status == 401: return False, 'Invalid credentials - do you have permission ' \ 'to access the cluster?' else: return False, f'Failed to communicate with the cluster: {str(e)}' + except kubernetes.config_exception() as e: + return False, f'Invalid configuration file: {str(e)}' except Exception as e: return False, f'An error occurred: {str(e)}' From 63ce29baa6aa9e5c27b85522bef06ecd83e8760e Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 23 Jun 2023 12:58:36 -0700 Subject: [PATCH 047/183] sky local up/down cli --- sky/cli.py | 69 ++++++++++++++++---- sky/clouds/kubernetes.py | 2 +- sky/skylet/providers/kubernetes/utils.py | 2 +- sky/utils/__init__.py | 1 + sky/utils/kubernetes/__init__.py | 0 sky/utils/kubernetes/create_cluster.sh | 40 ++++++++++++ sky/utils/kubernetes/delete_cluster.sh | 25 +++++++ sky/utils/kubernetes/generate_kind_config.py | 58 ++++++++++++++++ tests/kubernetes/kind/create_cluster.sh | 19 ------ tests/kubernetes/kind/delete_cluster.sh | 5 -- tests/kubernetes/kind/portmap_gen.py | 19 ------ 11 files changed, 184 insertions(+), 56 deletions(-) create mode 100644 sky/utils/kubernetes/__init__.py create mode 100644 sky/utils/kubernetes/create_cluster.sh create mode 100644 sky/utils/kubernetes/delete_cluster.sh create mode 100644 sky/utils/kubernetes/generate_kind_config.py delete mode 100644 tests/kubernetes/kind/create_cluster.sh delete mode 100644 tests/kubernetes/kind/delete_cluster.sh delete mode 100644 tests/kubernetes/kind/portmap_gen.py diff --git a/sky/cli.py b/sky/cli.py index c38b5cb0f9f..f9abf9c16a1 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -60,10 +60,11 @@ from sky.data import storage_utils from sky.skylet import constants from sky.skylet import job_lib -from sky.utils import log_utils +from sky.utils import log_utils, env_options from sky.utils import common_utils from sky.utils import dag_utils from sky.utils import command_runner +from sky.utils import kubernetes_utils from sky.utils import schemas from sky.utils import subprocess_utils from sky.utils import timeline @@ -4373,35 +4374,81 @@ def local(): @usage_lib.entrypoint def local_up(): """Creates a local cluster.""" + cluster_created = False + # Check if ~/.kube/config exists: + if os.path.exists(os.path.expanduser('~/.kube/config')): + # Check if kubeconfig is valid, `kind delete` leaves an empty kubeconfig + valid, reason = kubernetes_utils.check_credentials() + if valid or (not valid and 'Invalid configuration' not in reason): + # Could be a valid kubeconfig or a non-empty but non-functioning + # kubeconfig - check if user wants to overwrite it + prompt = 'Cluster config found at ~/.kube/config. Overwrite it?' + click.confirm(prompt, default=True, abort=True, show_default=True) with log_utils.safe_rich_status('Creating local cluster...'): path_to_package = os.path.dirname(os.path.dirname(__file__)) - up_script_path = os.path.join(path_to_package, 'tests', 'kubernetes', 'kind', - 'create_cluster.sh') + up_script_path = os.path.join(path_to_package, 'sky/utils/kubernetes', + 'create_cluster.sh') subprocess_utils.run_no_outputs('chmod +x {}'.format(up_script_path)) # Get directory of script and run it from there cwd = os.path.dirname(os.path.abspath(up_script_path)) # Run script and don't print output - subprocess_utils.run_no_outputs(up_script_path, - cwd=cwd) + try: + subprocess_utils.run(up_script_path, cwd=cwd, + capture_output=True) + cluster_created = True + except subprocess.CalledProcessError as e: + # Check if return code is 100 + if e.returncode == 100: + click.echo('\nLocal cluster already exists.') + else: + stderr = e.stderr.decode('utf-8') + click.echo(f'\nFailed to create local cluster. {stderr}') + if env_options.Options.SHOW_DEBUG_INFO.get(): + stdout = e.stdout.decode('utf-8') + click.echo(f'Logs:\n{stdout}') + sys.exit(1) # Run sky check sky_check.check(quiet=True) - click.echo('Local cluster created successfully. `sky launch` can now use Kubernetes to run tasks locally.') + if cluster_created: + # Get number of CPUs + p = subprocess_utils.run('kubectl get nodes -o jsonpath=\'{.items[0].status.capacity.cpu}\'', capture_output=True) + num_cpus = int(p.stdout.decode('utf-8')) + if num_cpus < 2: + click.echo('Warning: Local cluster has less than 2 CPUs. ' + 'This may cause issues with running tasks.') + click.echo('Local Kubernetes cluster created successfully with ' + f'{num_cpus} CPUs. `sky launch` can now run tasks locally.' + '\nHint: To change the number of CPUs, change your docker ' + 'runtime settings. See https://kind.sigs.k8s.io/docs/user/quick-start/#settings-for-docker-desktop for more info.') @local.command('down', cls=_DocumentedCodeCommand) @usage_lib.entrypoint -def local_up(): +def local_down(): """Creates a local cluster.""" + cluster_removed = False with log_utils.safe_rich_status('Removing local cluster...'): path_to_package = os.path.dirname(os.path.dirname(__file__)) - down_script_path = os.path.join(path_to_package, 'tests', 'kubernetes', - 'kind', + down_script_path = os.path.join(path_to_package, 'sky/utils/kubernetes', 'delete_cluster.sh') subprocess_utils.run_no_outputs('chmod +x {}'.format(down_script_path)) - subprocess_utils.run_no_outputs(down_script_path) + try: + subprocess_utils.run(down_script_path, capture_output=True) + cluster_removed = True + except subprocess.CalledProcessError as e: + # Check if return code is 100 + if e.returncode == 100: + click.echo('\nLocal cluster does not exist.') + else: + stderr = e.stderr.decode('utf-8') + click.echo(f'\nFailed to delete local cluster. {stderr}') + if env_options.Options.SHOW_DEBUG_INFO.get(): + stdout = e.stdout.decode('utf-8') + click.echo(f'Logs:\n{stdout}') # Run sky check sky_check.check(quiet=True) - click.echo('Local cluster removed.') + if cluster_removed: + click.echo('Local cluster removed.') def main(): diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 1d6a532598d..469d6e19fb5 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -119,7 +119,7 @@ class Kubernetes(clouds.Cloud): """Kubernetes.""" SKY_SSH_KEY_SECRET_NAME = f'sky-ssh-{common_utils.get_user_hash()}' - _DEFAULT_NUM_VCPUS = 4 + _DEFAULT_NUM_VCPUS = 2 _DEFAULT_MEMORY_CPU_RATIO = 1 _REPR = 'Kubernetes' _regions: List[clouds.Region] = [clouds.Region('kubernetes')] diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 66d9750147f..c8e79fececb 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -40,7 +40,7 @@ def check_credentials() -> Tuple[bool, Optional[str]]: def get_cluster_status(cluster_name: str, namespace: str) -> List[status_lib.ClusterStatus]: # Get all the pods with the label skypilot-cluster: - pods = core_api().list_namespaced_pod(namespace, label_selector=f'skypilot-cluster={cluster_name}').items + pods = kubernetes.core_api().list_namespaced_pod(namespace, label_selector=f'skypilot-cluster={cluster_name}').items # Check if the pods are running or pending cluster_status = [] diff --git a/sky/utils/__init__.py b/sky/utils/__init__.py index f5d3dc7a34a..af8d846aa07 100644 --- a/sky/utils/__init__.py +++ b/sky/utils/__init__.py @@ -1 +1,2 @@ """Utility functions.""" +from sky.skylet.providers.kubernetes import utils as kubernetes_utils \ No newline at end of file diff --git a/sky/utils/kubernetes/__init__.py b/sky/utils/kubernetes/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sky/utils/kubernetes/create_cluster.sh b/sky/utils/kubernetes/create_cluster.sh new file mode 100644 index 00000000000..ba77f7bf7e8 --- /dev/null +++ b/sky/utils/kubernetes/create_cluster.sh @@ -0,0 +1,40 @@ +# Creates a local Kubernetes cluster using kind +# Usage: ./create_cluster.sh +# Invokes generate_kind_config.py to generate a kind-cluster.yaml with NodePort mappings +# Be sure to have built the latest image before running this script +set -e + +# Check if docker is running +if ! docker info > /dev/null 2>&1; then + >&2 echo "Docker is not running. Please start Docker and try again." + exit 1 +fi + +# Check if kind is installed +if ! kind version > /dev/null 2>&1; then + >&2 echo "kind is not installed. Please install kind and try again. Installation instructions: https://kind.sigs.k8s.io/docs/user/quick-start/#installation." + exit 1 +fi + +# Check if the local cluster already exists +if kind get clusters | grep -q skypilot; then + echo "Local cluster already exists. Exiting." + exit 100 +fi + +# If /tmp/skypilot-kind.yaml is not present, generate it +if [ ! -f /tmp/skypilot-kind.yaml ]; then + echo "Generating /tmp/skypilot-kind.yaml" + python -m sky.utils.kubernetes.generate_kind_config --path /tmp/skypilot-kind.yaml +fi + +kind create cluster --config /tmp/skypilot-kind.yaml --name skypilot + +# Load local skypilot image on to the cluster for faster startup +echo "Loading local skypilot image on to the cluster" +kind load docker-image --name skypilot us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest + +# Print CPUs available on the local cluster +NUM_CPUS=$(kubectl get nodes -o jsonpath='{.items[0].status.capacity.cpu}') +echo "Kubernetes cluster ready! Run `sky check` to setup Kubernetes access." +echo "Number of CPUs available on the local cluster: $NUM_CPUS" \ No newline at end of file diff --git a/sky/utils/kubernetes/delete_cluster.sh b/sky/utils/kubernetes/delete_cluster.sh new file mode 100644 index 00000000000..0256f43b8f3 --- /dev/null +++ b/sky/utils/kubernetes/delete_cluster.sh @@ -0,0 +1,25 @@ +# Deletes the local kind cluster +# Usage: ./delete_cluster.sh +# Raises error code 100 if the local cluster does not exist + +set -e +# Check if docker is running +if ! docker info > /dev/null 2>&1; then + >&2 echo "Docker is not running. Please start Docker and try again." + exit 1 +fi + +# Check if kind is installed +if ! kind version > /dev/null 2>&1; then + >&2 echo "kind is not installed. Please install kind and try again." + exit 1 +fi + +# Check if the local cluster exists +if ! kind get clusters | grep -q skypilot; then + echo "Local cluster does not exist. Exiting." + exit 100 +fi + +kind delete cluster --name skypilot +echo "Local cluster deleted!" \ No newline at end of file diff --git a/sky/utils/kubernetes/generate_kind_config.py b/sky/utils/kubernetes/generate_kind_config.py new file mode 100644 index 00000000000..eace3ceb234 --- /dev/null +++ b/sky/utils/kubernetes/generate_kind_config.py @@ -0,0 +1,58 @@ +# Generates a kind cluster config file with ports mapped from host to container +import argparse +import textwrap + + +def generate_kind_config(path: str, + port_start: int = 30000, + port_end: int = 32768, + num_nodes = 1) -> None: + """ + Generate a kind cluster config file with ports mapped from host to container + Args: + path: Path to generate the config file at + port_start: Port range start + port_end: Port range end + num_nodes: Number of nodes in the cluster + """ + + preamble = textwrap.dedent(""" + apiVersion: kind.x-k8s.io/v1alpha4 + kind: Cluster + nodes: + - role: control-plane + extraPortMappings:""") + suffix = '' + if num_nodes > 1: + for i in range(1, num_nodes): + suffix += """- role: worker\n""" + with open(path, 'w') as f: + f.write(preamble) + for port in range(port_start, port_end): + f.write(f""" + - containerPort: {port} + hostPort: {port} + listenAddress: "0.0.0.0" + protocol: tcp""") + f.write("\n") + if suffix: + f.write(suffix) + + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='Generate a kind cluster ' + 'config file with ports mapped' + ' from host to container') + parser.add_argument('--path', type=str, default='/tmp/skypilot-kind.yaml', + help='Path to generate the config file at') + parser.add_argument('--port-start', type=int, default=30000, + help='Port range start') + parser.add_argument('--port-end', type=int, default=32768, + help='Port range end') + parser.add_argument('--num-nodes', type=int, default=1, + help='Number of nodes in the cluster') + args = parser.parse_args() + generate_kind_config(args.path, + args.port_start, + args.port_end, + args.num_nodes) \ No newline at end of file diff --git a/tests/kubernetes/kind/create_cluster.sh b/tests/kubernetes/kind/create_cluster.sh deleted file mode 100644 index ebc1b6c304c..00000000000 --- a/tests/kubernetes/kind/create_cluster.sh +++ /dev/null @@ -1,19 +0,0 @@ -# Creates a local Kubernetes cluster using kind -# Usage: ./create_cluster.sh -# Invokes portmap_gen.py to generate a kind-cluster.yaml with NodePort mappings -# Be sure to have built the latest image before running this script -set -e -kind delete cluster --name skypilot -# If kind-cluster.yaml is not present, generate it -if [ ! -f kind-cluster.yaml ]; then - echo "Generating kind-cluster.yaml" - python portmap_gen.py -fi -kind create cluster --config kind-cluster.yaml --name skypilot -# Load local skypilot image on to the cluster for faster startup -echo "Loading local skypilot image on to the cluster" -kind load docker-image --name skypilot us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -# Print CPUs available on the local cluster -NUM_CPUS=$(kubectl get nodes -o jsonpath='{.items[0].status.capacity.cpu}') -echo "Kubernetes cluster ready! Run `sky check` to setup Kubernetes access." -echo "Number of CPUs available on the local cluster: $NUM_CPUS" \ No newline at end of file diff --git a/tests/kubernetes/kind/delete_cluster.sh b/tests/kubernetes/kind/delete_cluster.sh deleted file mode 100644 index 29d176a4ef0..00000000000 --- a/tests/kubernetes/kind/delete_cluster.sh +++ /dev/null @@ -1,5 +0,0 @@ -# Deletes the local kind cluster -# Usage: ./delete_cluster.sh -set -e -kind delete cluster --name skypilot -echo "Kubernetes cluster deleted!" \ No newline at end of file diff --git a/tests/kubernetes/kind/portmap_gen.py b/tests/kubernetes/kind/portmap_gen.py deleted file mode 100644 index 672aa594af6..00000000000 --- a/tests/kubernetes/kind/portmap_gen.py +++ /dev/null @@ -1,19 +0,0 @@ -# Generates a kind-cluster.yaml file with all ports mapped from host to container - -preamble = """ -apiVersion: kind.x-k8s.io/v1alpha4 -kind: Cluster -nodes: -- role: control-plane - extraPortMappings:""" -suffix = "" # """- role: worker""" # Uncomment this line to add a worker node -with open('kind-cluster.yaml', 'w') as f: - f.write(preamble) - for port in range(30000, 32768): - f.write(f""" - - containerPort: {port} - hostPort: {port} - listenAddress: "0.0.0.0" - protocol: tcp""") - f.write("\n") - f.write(suffix) From f9d5b73f5278f7cb46a38208f6455e9fcd85e4c3 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 23 Jun 2023 13:00:04 -0700 Subject: [PATCH 048/183] cli --- sky/cli.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sky/cli.py b/sky/cli.py index f9abf9c16a1..035c10aa324 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4407,8 +4407,9 @@ def local_up(): stdout = e.stdout.decode('utf-8') click.echo(f'Logs:\n{stdout}') sys.exit(1) - # Run sky check - sky_check.check(quiet=True) + # Run sky check + with log_utils.safe_rich_status('Running sky check...'): + sky_check.check(quiet=True)s if cluster_created: # Get number of CPUs p = subprocess_utils.run('kubectl get nodes -o jsonpath=\'{.items[0].status.capacity.cpu}\'', capture_output=True) @@ -4445,7 +4446,8 @@ def local_down(): if env_options.Options.SHOW_DEBUG_INFO.get(): stdout = e.stdout.decode('utf-8') click.echo(f'Logs:\n{stdout}') - # Run sky check + # Run sky check + with log_utils.safe_rich_status('Running sky check...'): sky_check.check(quiet=True) if cluster_removed: click.echo('Local cluster removed.') From b81647a6e981f331215eacfeaa3b95eb76c4eb27 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 23 Jun 2023 13:03:03 -0700 Subject: [PATCH 049/183] lint --- sky/adaptors/kubernetes.py | 1 + sky/cli.py | 2 +- sky/clouds/kubernetes.py | 20 +++++++------ sky/skylet/providers/kubernetes/config.py | 10 ++++--- .../providers/kubernetes/node_provider.py | 27 +++++++++++------- sky/skylet/providers/kubernetes/utils.py | 18 ++++++++---- sky/utils/kubernetes/generate_kind_config.py | 28 +++++++++++-------- tests/conftest.py | 3 +- tests/test_smoke.py | 1 + 9 files changed, 69 insertions(+), 41 deletions(-) diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py index 6d5aade79ff..37e6dd1fd48 100644 --- a/sky/adaptors/kubernetes.py +++ b/sky/adaptors/kubernetes.py @@ -14,6 +14,7 @@ def import_package(func): + @wraps(func) def wrapper(*args, **kwargs): global kubernetes diff --git a/sky/cli.py b/sky/cli.py index 035c10aa324..95bee8da911 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4409,7 +4409,7 @@ def local_up(): sys.exit(1) # Run sky check with log_utils.safe_rich_status('Running sky check...'): - sky_check.check(quiet=True)s + sky_check.check(quiet=True) if cluster_created: # Get number of CPUs p = subprocess_utils.run('kubectl get nodes -o jsonpath=\'{.items[0].status.capacity.cpu}\'', capture_output=True) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 469d6e19fb5..536d0790d76 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -65,12 +65,14 @@ def is_valid_instance_type(name: str) -> bool: return bool(pattern.match(name)) @classmethod - def _parse_instance_type(cls, name: str) -> Tuple[ - float, float, Optional[float], Optional[str]]: + def _parse_instance_type( + cls, + name: str) -> Tuple[float, float, Optional[float], Optional[str]]: """Returns the cpus, memory, accelerator_count, and accelerator_type from the given name.""" pattern = re.compile( - r'^(?P\d+(\.\d+)?)CPU--(?P\d+(\.\d+)?)GB(?:--(?P\d+)(?P\S+))?$') + r'^(?P\d+(\.\d+)?)CPU--(?P\d+(\.\d+)?)GB(?:--(?P\d+)(?P\S+))?$' + ) match = pattern.match(name) if match: cpus = float(match.group('cpus')) @@ -191,9 +193,13 @@ def get_default_instance_type( disk_tier: Optional[str] = None) -> Optional[str]: del disk_tier # Unused. # TODO - Allow fractional CPUs and memory - instance_cpus = int(cpus.strip('+')) if cpus is not None else cls._DEFAULT_NUM_VCPUS - instance_mem = int(memory.strip('+')) if memory is not None else instance_cpus * cls._DEFAULT_MEMORY_CPU_RATIO - virtual_instance_type = KubernetesInstanceType(instance_cpus, instance_mem).name + instance_cpus = int( + cpus.strip('+')) if cpus is not None else cls._DEFAULT_NUM_VCPUS + instance_mem = int( + memory.strip('+') + ) if memory is not None else instance_cpus * cls._DEFAULT_MEMORY_CPU_RATIO + virtual_instance_type = KubernetesInstanceType(instance_cpus, + instance_mem).name return virtual_instance_type @classmethod @@ -336,5 +342,3 @@ def query_status(cls, name: str, tag_filters: Dict[str, str], # TODO(romilb): Change from default namespace to user-specified namespace # return kubernetes_utils.get_cluster_status(cluster_name=name, namespace='default') return [status_lib.ClusterStatus.UP] - - diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index 0201a06523d..00d1a601c8d 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -17,6 +17,7 @@ log_prefix = "KubernetesNodeProvider: " + class InvalidNamespaceError(ValueError): def __init__(self, field_name, namespace): @@ -208,8 +209,8 @@ def _configure_namespace(provider_config): return namespace logger.info(log_prefix + not_found_msg(namespace_field, namespace)) - namespace_config = kubernetes.client.V1Namespace(metadata=kubernetes.client.V1ObjectMeta( - name=namespace)) + namespace_config = kubernetes.client.V1Namespace( + metadata=kubernetes.client.V1ObjectMeta(name=namespace)) kubernetes.core_api().create_namespace(namespace_config) logger.info(log_prefix + created_msg(namespace_field, namespace)) return namespace @@ -325,7 +326,8 @@ def _configure_services(namespace, provider_config): return else: logger.info(log_prefix + updating_existing_msg("service", name)) - kubernetes.core_api().patch_namespaced_service(name, namespace, service) + kubernetes.core_api().patch_namespaced_service( + name, namespace, service) else: logger.info(log_prefix + not_found_msg("service", name)) kubernetes.core_api().create_namespaced_service(namespace, service) @@ -333,4 +335,4 @@ def _configure_services(namespace, provider_config): class KubernetesError(Exception): - pass \ No newline at end of file + pass diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 5d37b0a6bf0..4a5f2f51f01 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -63,9 +63,10 @@ def non_terminated_nodes(self, tag_filters): tag_filters[TAG_RAY_CLUSTER_NAME] = self.cluster_name label_selector = to_label_selector(tag_filters) - pod_list = kubernetes.core_api().list_namespaced_pod(self.namespace, - field_selector=field_selector, - label_selector=label_selector) + pod_list = kubernetes.core_api().list_namespaced_pod( + self.namespace, + field_selector=field_selector, + label_selector=label_selector) # Don't return pods marked for deletion, # i.e. pods with non-null metadata.DeletionTimestamp. @@ -115,6 +116,7 @@ def internal_ip(self, node_id): return pod.status.pod_ip def get_node_id(self, ip_address, use_internal_ip=True) -> str: + def find_node_id(): if use_internal_ip: return self._internal_ip_cache.get(ip_address) @@ -145,7 +147,8 @@ def set_node_tags(self, node_ids, tags): return except kubernetes.api_exception() as e: if e.status == 409: - logger.info(kubernetes.log_prefix + "Caught a 409 error while setting" + logger.info(kubernetes.log_prefix + + "Caught a 409 error while setting" " node tags. Retrying...") time.sleep(DELAY_BEFORE_TAG_RETRY) continue @@ -182,7 +185,8 @@ def create_node(self, node_config, tags, count): "calling create_namespaced_pod (count={}).".format(count)) new_nodes = [] for _ in range(count): - pod = kubernetes.core_api().create_namespaced_pod(self.namespace, pod_spec) + pod = kubernetes.core_api().create_namespaced_pod( + self.namespace, pod_spec) new_nodes.append(pod) new_svcs = [] @@ -227,8 +231,8 @@ def create_node(self, node_config, tags, count): "may be too slow to autoscale.") all_ready = True for node in new_nodes: - pod = kubernetes.core_api().read_namespaced_pod(node.metadata.name, - self.namespace) + pod = kubernetes.core_api().read_namespaced_pod( + node.metadata.name, self.namespace) if pod.status.phase == "Pending": # Check conditions for more detailed status if pod.status.conditions is not None: @@ -255,13 +259,16 @@ def terminate_node(self, node_id): kubernetes.core_api().delete_namespaced_pod(node_id, self.namespace) except kubernetes.api_exception() as e: if e.status == 404: - logger.warning(config.log_prefix + f"Tried to delete pod {node_id}," + logger.warning(config.log_prefix + + f"Tried to delete pod {node_id}," " but the pod was not found (404).") else: raise try: - kubernetes.core_api().delete_namespaced_service(node_id, self.namespace) - kubernetes.core_api().delete_namespaced_service(f'{node_id}-ssh', self.namespace) + kubernetes.core_api().delete_namespaced_service( + node_id, self.namespace) + kubernetes.core_api().delete_namespaced_service( + f'{node_id}-ssh', self.namespace) except kubernetes.api_exception(): pass try: diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index c8e79fececb..4c93137a6ff 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -10,7 +10,8 @@ def get_head_ssh_port(cluster_name, namespace): def get_port(svc_name, namespace): - head_service = kubernetes.core_api().read_namespaced_service(svc_name, namespace) + head_service = kubernetes.core_api().read_namespaced_service( + svc_name, namespace) return head_service.spec.ports[0].node_port @@ -38,17 +39,22 @@ def check_credentials() -> Tuple[bool, Optional[str]]: return False, f'An error occurred: {str(e)}' -def get_cluster_status(cluster_name: str, namespace: str) -> List[status_lib.ClusterStatus]: +def get_cluster_status(cluster_name: str, + namespace: str) -> List[status_lib.ClusterStatus]: # Get all the pods with the label skypilot-cluster: - pods = kubernetes.core_api().list_namespaced_pod(namespace, label_selector=f'skypilot-cluster={cluster_name}').items + pods = kubernetes.core_api().list_namespaced_pod( + namespace, label_selector=f'skypilot-cluster={cluster_name}').items # Check if the pods are running or pending cluster_status = [] for pod in pods: if pod.status.phase == 'Running': - cluster_status.append(status_lib.ClusterStatus(cluster_name, status_lib.ClusterStatus.UP)) + cluster_status.append( + status_lib.ClusterStatus(cluster_name, + status_lib.ClusterStatus.UP)) elif pod.status.phase == 'Pending': - cluster_status.append(status_lib.ClusterStatus(cluster_name, status_lib.ClusterStatus.INIT)) + cluster_status.append( + status_lib.ClusterStatus(cluster_name, + status_lib.ClusterStatus.INIT)) # If pods are not found, we don't add them to the return list return cluster_status - diff --git a/sky/utils/kubernetes/generate_kind_config.py b/sky/utils/kubernetes/generate_kind_config.py index eace3ceb234..c205bd26623 100644 --- a/sky/utils/kubernetes/generate_kind_config.py +++ b/sky/utils/kubernetes/generate_kind_config.py @@ -6,7 +6,7 @@ def generate_kind_config(path: str, port_start: int = 30000, port_end: int = 32768, - num_nodes = 1) -> None: + num_nodes=1) -> None: """ Generate a kind cluster config file with ports mapped from host to container Args: @@ -41,18 +41,24 @@ def generate_kind_config(path: str, if __name__ == '__main__': parser = argparse.ArgumentParser(description='Generate a kind cluster ' - 'config file with ports mapped' - ' from host to container') - parser.add_argument('--path', type=str, default='/tmp/skypilot-kind.yaml', + 'config file with ports mapped' + ' from host to container') + parser.add_argument('--path', + type=str, + default='/tmp/skypilot-kind.yaml', help='Path to generate the config file at') - parser.add_argument('--port-start', type=int, default=30000, + parser.add_argument('--port-start', + type=int, + default=30000, help='Port range start') - parser.add_argument('--port-end', type=int, default=32768, + parser.add_argument('--port-end', + type=int, + default=32768, help='Port range end') - parser.add_argument('--num-nodes', type=int, default=1, + parser.add_argument('--num-nodes', + type=int, + default=1, help='Number of nodes in the cluster') args = parser.parse_args() - generate_kind_config(args.path, - args.port_start, - args.port_end, - args.num_nodes) \ No newline at end of file + generate_kind_config(args.path, args.port_start, args.port_end, + args.num_nodes) diff --git a/tests/conftest.py b/tests/conftest.py index 6f67414ecad..8aebf34854e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -135,7 +135,8 @@ def pytest_collection_modifyitems(config, items): # launch API to one launch every 10 seconds. # We run Kubernetes tests serially because the Kubernetes cluster may have # limited resources (e.g., just 8 cpus). - serial_mark = pytest.mark.xdist_group(name=f'serial_{generic_cloud_keyword}') + serial_mark = pytest.mark.xdist_group( + name=f'serial_{generic_cloud_keyword}') # Handle generic tests if generic_cloud in ['lambda', 'kubernetes']: for item in items: diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 8718d98a814..d447da4d934 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -183,6 +183,7 @@ def run_one_test(test: Test) -> Tuple[int, str, str]: # ---------- Dry run: 2 Tasks in a chain. ---------- + @pytest.mark.kubernetes def test_example_app(): test = Test( From 050cfc266a25b852e1bb5890d44e125c28dcbe9d Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 23 Jun 2023 13:04:48 -0700 Subject: [PATCH 050/183] lint --- sky/cli.py | 23 +++++++++++------------ sky/skylet/providers/kubernetes/utils.py | 7 ++----- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/sky/cli.py b/sky/cli.py index 95bee8da911..f9261e7b0c5 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4365,10 +4365,6 @@ def local(): """SkyPilot local tools CLI.""" pass -@cli.group(cls=_NaturalOrderGroup, hidden=True) -def local(): - """SkyPilot local tools CLI.""" - pass @local.command('up', cls=_DocumentedCodeCommand) @usage_lib.entrypoint @@ -4393,8 +4389,7 @@ def local_up(): cwd = os.path.dirname(os.path.abspath(up_script_path)) # Run script and don't print output try: - subprocess_utils.run(up_script_path, cwd=cwd, - capture_output=True) + subprocess_utils.run(up_script_path, cwd=cwd, capture_output=True) cluster_created = True except subprocess.CalledProcessError as e: # Check if return code is 100 @@ -4412,15 +4407,19 @@ def local_up(): sky_check.check(quiet=True) if cluster_created: # Get number of CPUs - p = subprocess_utils.run('kubectl get nodes -o jsonpath=\'{.items[0].status.capacity.cpu}\'', capture_output=True) + p = subprocess_utils.run( + 'kubectl get nodes -o jsonpath=\'{.items[0].status.capacity.cpu}\'', + capture_output=True) num_cpus = int(p.stdout.decode('utf-8')) if num_cpus < 2: click.echo('Warning: Local cluster has less than 2 CPUs. ' 'This may cause issues with running tasks.') - click.echo('Local Kubernetes cluster created successfully with ' - f'{num_cpus} CPUs. `sky launch` can now run tasks locally.' - '\nHint: To change the number of CPUs, change your docker ' - 'runtime settings. See https://kind.sigs.k8s.io/docs/user/quick-start/#settings-for-docker-desktop for more info.') + click.echo( + 'Local Kubernetes cluster created successfully with ' + f'{num_cpus} CPUs. `sky launch` can now run tasks locally.' + '\nHint: To change the number of CPUs, change your docker ' + 'runtime settings. See https://kind.sigs.k8s.io/docs/user/quick-start/#settings-for-docker-desktop for more info.' + ) @local.command('down', cls=_DocumentedCodeCommand) @@ -4431,7 +4430,7 @@ def local_down(): with log_utils.safe_rich_status('Removing local cluster...'): path_to_package = os.path.dirname(os.path.dirname(__file__)) down_script_path = os.path.join(path_to_package, 'sky/utils/kubernetes', - 'delete_cluster.sh') + 'delete_cluster.sh') subprocess_utils.run_no_outputs('chmod +x {}'.format(down_script_path)) try: subprocess_utils.run(down_script_path, capture_output=True) diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 4c93137a6ff..100e918c9fb 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -49,12 +49,9 @@ def get_cluster_status(cluster_name: str, cluster_status = [] for pod in pods: if pod.status.phase == 'Running': - cluster_status.append( - status_lib.ClusterStatus(cluster_name, - status_lib.ClusterStatus.UP)) + cluster_status.append(status_lib.ClusterStatus.UP) elif pod.status.phase == 'Pending': cluster_status.append( - status_lib.ClusterStatus(cluster_name, - status_lib.ClusterStatus.INIT)) + status_lib.ClusterStatus.INIT) # If pods are not found, we don't add them to the return list return cluster_status From d64c3943ea6af139fe8b5c05e7b2a13f60998d28 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 23 Jun 2023 13:27:37 -0700 Subject: [PATCH 051/183] lint --- sky/cli.py | 2 +- sky/clouds/kubernetes.py | 18 +++++++++++------- sky/skylet/providers/kubernetes/utils.py | 3 +-- sky/utils/__init__.py | 2 +- sky/utils/kubernetes/generate_kind_config.py | 9 ++++++--- 5 files changed, 20 insertions(+), 14 deletions(-) diff --git a/sky/cli.py b/sky/cli.py index f9261e7b0c5..79a20d0855c 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4418,7 +4418,7 @@ def local_up(): 'Local Kubernetes cluster created successfully with ' f'{num_cpus} CPUs. `sky launch` can now run tasks locally.' '\nHint: To change the number of CPUs, change your docker ' - 'runtime settings. See https://kind.sigs.k8s.io/docs/user/quick-start/#settings-for-docker-desktop for more info.' + 'runtime settings. See https://kind.sigs.k8s.io/docs/user/quick-start/#settings-for-docker-desktop for more info.' # pylint: disable=line-too-long ) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 536d0790d76..b853bcb4278 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -71,7 +71,7 @@ def _parse_instance_type( """Returns the cpus, memory, accelerator_count, and accelerator_type from the given name.""" pattern = re.compile( - r'^(?P\d+(\.\d+)?)CPU--(?P\d+(\.\d+)?)GB(?:--(?P\d+)(?P\S+))?$' + r'^(?P\d+(\.\d+)?)CPU--(?P\d+(\.\d+)?)GB(?:--(?P\d+)(?P\S+))?$' # pylint: disable=line-too-long ) match = pattern.match(name) if match: @@ -81,12 +81,16 @@ def _parse_instance_type( accelerator_type = match.group('accelerator_type') if accelerator_count: accelerator_count = float(accelerator_count) + accelerator_type = str(accelerator_type) + else: + accelerator_count = None + accelerator_type = None return cpus, memory, accelerator_count, accelerator_type else: raise ValueError(f'Invalid instance name: {name}') @classmethod - def from_instance_type(cls, name: str) -> 'KubernetesInstanceName': + def from_instance_type(cls, name: str) -> 'KubernetesInstanceType': """Returns an instance name object from the given name.""" if not cls.is_valid_instance_type(name): raise ValueError(f'Invalid instance name: {name}') @@ -102,7 +106,7 @@ def from_resources(cls, cpus: float, memory: float, accelerator_count: float = 0, - accelerator_type: str = '') -> 'KubernetesInstanceName': + accelerator_type: str = '') -> 'KubernetesInstanceType': """Returns an instance name object from the given resources.""" name = f'{cpus}CPU--{memory}GB' if accelerator_count > 0: @@ -197,7 +201,8 @@ def get_default_instance_type( cpus.strip('+')) if cpus is not None else cls._DEFAULT_NUM_VCPUS instance_mem = int( memory.strip('+') - ) if memory is not None else instance_cpus * cls._DEFAULT_MEMORY_CPU_RATIO + ) if memory is not None else \ + instance_cpus * cls._DEFAULT_MEMORY_CPU_RATIO virtual_instance_type = KubernetesInstanceType(instance_cpus, instance_mem).name return virtual_instance_type @@ -212,7 +217,7 @@ def get_accelerators_from_instance_type( @classmethod def get_vcpus_mem_from_instance_type( - cls, instance_type: str) -> Tuple[float, float]: + cls, instance_type: str) -> Tuple[Optional[float], Optional[float]]: """Returns the #vCPUs and memory that the instance type offers.""" k = KubernetesInstanceType.from_instance_type(instance_type) return k.cpus, k.memory @@ -333,12 +338,11 @@ def accelerator_in_region_or_zone(self, # supported by the cluster. return True + @classmethod def query_status(cls, name: str, tag_filters: Dict[str, str], region: Optional[str], zone: Optional[str], **kwargs) -> List['status_lib.ClusterStatus']: # TODO(romilb): Implement this. For now, we return UP as the status. # Assuming single node cluster. del tag_filters, region, zone, kwargs # Unused. - # TODO(romilb): Change from default namespace to user-specified namespace - # return kubernetes_utils.get_cluster_status(cluster_name=name, namespace='default') return [status_lib.ClusterStatus.UP] diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 100e918c9fb..46d77d1bcf0 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -51,7 +51,6 @@ def get_cluster_status(cluster_name: str, if pod.status.phase == 'Running': cluster_status.append(status_lib.ClusterStatus.UP) elif pod.status.phase == 'Pending': - cluster_status.append( - status_lib.ClusterStatus.INIT) + cluster_status.append(status_lib.ClusterStatus.INIT) # If pods are not found, we don't add them to the return list return cluster_status diff --git a/sky/utils/__init__.py b/sky/utils/__init__.py index af8d846aa07..eff27bdd65b 100644 --- a/sky/utils/__init__.py +++ b/sky/utils/__init__.py @@ -1,2 +1,2 @@ """Utility functions.""" -from sky.skylet.providers.kubernetes import utils as kubernetes_utils \ No newline at end of file +from sky.skylet.providers.kubernetes import utils as kubernetes_utils diff --git a/sky/utils/kubernetes/generate_kind_config.py b/sky/utils/kubernetes/generate_kind_config.py index c205bd26623..6df52bfcf09 100644 --- a/sky/utils/kubernetes/generate_kind_config.py +++ b/sky/utils/kubernetes/generate_kind_config.py @@ -1,4 +1,7 @@ -# Generates a kind cluster config file with ports mapped from host to container +"""Generates a kind cluster config file + +Maps specified ports from host to cluster container. +""" import argparse import textwrap @@ -24,7 +27,7 @@ def generate_kind_config(path: str, extraPortMappings:""") suffix = '' if num_nodes > 1: - for i in range(1, num_nodes): + for _ in range(1, num_nodes): suffix += """- role: worker\n""" with open(path, 'w') as f: f.write(preamble) @@ -34,7 +37,7 @@ def generate_kind_config(path: str, hostPort: {port} listenAddress: "0.0.0.0" protocol: tcp""") - f.write("\n") + f.write('\n') if suffix: f.write(suffix) From 7367b4a711c60da7588cf0afd6330ea99e5aa855 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 23 Jun 2023 15:05:33 -0700 Subject: [PATCH 052/183] Speed up kind cluster creation --- sky/cli.py | 2 +- sky/setup_files/MANIFEST.in | 1 + sky/utils/kubernetes/create_cluster.sh | 14 ++++++++------ sky/utils/kubernetes/generate_kind_config.py | 10 ++++++++-- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/sky/cli.py b/sky/cli.py index 79a20d0855c..6c536ff71ed 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4394,7 +4394,7 @@ def local_up(): except subprocess.CalledProcessError as e: # Check if return code is 100 if e.returncode == 100: - click.echo('\nLocal cluster already exists.') + click.echo('\nLocal cluster already exists. Run `sky local down` to delete it.') else: stderr = e.stderr.decode('utf-8') click.echo(f'\nFailed to create local cluster. {stderr}') diff --git a/sky/setup_files/MANIFEST.in b/sky/setup_files/MANIFEST.in index 1ae9b400a52..1e3212e520d 100644 --- a/sky/setup_files/MANIFEST.in +++ b/sky/setup_files/MANIFEST.in @@ -17,3 +17,4 @@ include sky/spot/dashboard/* include sky/spot/dashboard/templates/* include sky/spot/dashboard/static/* include sky/templates/* +include sky/utils/kubernetes/* diff --git a/sky/utils/kubernetes/create_cluster.sh b/sky/utils/kubernetes/create_cluster.sh index ba77f7bf7e8..574c1fdd9e9 100644 --- a/sky/utils/kubernetes/create_cluster.sh +++ b/sky/utils/kubernetes/create_cluster.sh @@ -1,9 +1,12 @@ # Creates a local Kubernetes cluster using kind # Usage: ./create_cluster.sh # Invokes generate_kind_config.py to generate a kind-cluster.yaml with NodePort mappings -# Be sure to have built the latest image before running this script set -e +# Limit port range to speed up kind cluster creation +PORT_RANGE_START=30000 +PORT_RANGE_END=30100 + # Check if docker is running if ! docker info > /dev/null 2>&1; then >&2 echo "Docker is not running. Please start Docker and try again." @@ -22,16 +25,15 @@ if kind get clusters | grep -q skypilot; then exit 100 fi -# If /tmp/skypilot-kind.yaml is not present, generate it -if [ ! -f /tmp/skypilot-kind.yaml ]; then - echo "Generating /tmp/skypilot-kind.yaml" - python -m sky.utils.kubernetes.generate_kind_config --path /tmp/skypilot-kind.yaml -fi +# Generate cluster YAML +echo "Generating /tmp/skypilot-kind.yaml" +python -m sky.utils.kubernetes.generate_kind_config --path /tmp/skypilot-kind.yaml --port-start ${PORT_RANGE_START} --port-end ${PORT_RANGE_END} kind create cluster --config /tmp/skypilot-kind.yaml --name skypilot # Load local skypilot image on to the cluster for faster startup echo "Loading local skypilot image on to the cluster" +docker pull us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest kind load docker-image --name skypilot us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest # Print CPUs available on the local cluster diff --git a/sky/utils/kubernetes/generate_kind_config.py b/sky/utils/kubernetes/generate_kind_config.py index 6df52bfcf09..66d33ca98ca 100644 --- a/sky/utils/kubernetes/generate_kind_config.py +++ b/sky/utils/kubernetes/generate_kind_config.py @@ -19,9 +19,15 @@ def generate_kind_config(path: str, num_nodes: Number of nodes in the cluster """ - preamble = textwrap.dedent(""" + preamble = textwrap.dedent(f""" apiVersion: kind.x-k8s.io/v1alpha4 kind: Cluster + kubeadmConfigPatches: + - | + kind: ClusterConfiguration + apiServer: + extraArgs: + "service-node-port-range": {port_start}-{port_end} nodes: - role: control-plane extraPortMappings:""") @@ -31,7 +37,7 @@ def generate_kind_config(path: str, suffix += """- role: worker\n""" with open(path, 'w') as f: f.write(preamble) - for port in range(port_start, port_end): + for port in range(port_start, port_end+1): f.write(f""" - containerPort: {port} hostPort: {port} From 756c56c4c511a5c4a6229e8e1ae6676bef46f1d6 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 23 Jun 2023 15:50:01 -0700 Subject: [PATCH 053/183] tests --- tests/test_smoke.py | 53 +++++++++++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 16 deletions(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index d447da4d934..27a1fe30f73 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -182,9 +182,6 @@ def run_one_test(test: Test) -> Tuple[int, str, str]: # ---------- Dry run: 2 Tasks in a chain. ---------- - - -@pytest.mark.kubernetes def test_example_app(): test = Test( 'example_app', @@ -194,7 +191,6 @@ def test_example_app(): # ---------- A minimal task ---------- -@pytest.mark.kubernetes def test_minimal(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -560,6 +556,7 @@ def test_image_no_conda(): # ------------ Test stale job ------------ @pytest.mark.no_lambda_cloud # Lambda Cloud does not support stopping instances +@pytest.mark.no_kubernetes # Kubernetes does not support stopping instances def test_stale_job(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -638,17 +635,13 @@ def test_gcp_stale_job_manual_restart(): # ---------- Check Sky's environment variables; workdir. ---------- @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet -@pytest.mark.no_kubernetes +@pytest.mark.no_kubernetes # K8s does not support num_nodes > 1 yet def test_env_check(generic_cloud: str): name = _get_cluster_name() - extra_flags = '' - if generic_cloud == 'kubernetes': - # Kubernetes does not support multi-node - extra_flags = '--num-nodes 1' test = Test( 'env_check', [ - f'sky launch -y -c {name} --cloud {generic_cloud} {extra_flags} --detach-setup examples/env_check.yaml', + f'sky launch -y -c {name} --cloud {generic_cloud} --detach-setup examples/env_check.yaml', f'sky logs {name} 1 --status', # Ensure the job succeeded. ], f'sky down -y {name}', @@ -658,7 +651,6 @@ def test_env_check(generic_cloud: str): # ---------- file_mounts ---------- @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet. Run test_scp_file_mounts instead. -@pytest.mark.kubernetes def test_file_mounts(generic_cloud: str): name = _get_cluster_name() extra_flags = '' @@ -695,6 +687,21 @@ def test_scp_file_mounts(): ) run_one_test(test) +@pytest.mark.kubernetes +def test_kubernetes_file_mounts(): + name = _get_cluster_name() + test_commands = [ + *storage_setup_commands, + f'sky launch -y -c {name} --cloud kubernetes --num-nodes 1 examples/using_file_mounts.yaml', + f'sky logs {name} 1 --status', # Ensure the job succeeded. + ] + test = Test( + 'kubernetes_using_file_mounts', + test_commands, + f'sky down -y {name}', + timeout=20 * 60, # 20 mins + ) + run_one_test(test) # ---------- storage ---------- @pytest.mark.aws @@ -810,7 +817,6 @@ def test_cloudflare_storage_mounts(generic_cloud: str): # ---------- CLI logs ---------- @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet. Run test_scp_logs instead. -@pytest.mark.kubernetes def test_cli_logs(generic_cloud: str): name = _get_cluster_name() num_nodes = 2 @@ -863,6 +869,7 @@ def test_scp_logs(): @pytest.mark.no_ibm # IBM Cloud does not have K80 gpus. run test_ibm_job_queue instead @pytest.mark.no_scp # SCP does not have K80 gpus. Run test_scp_job_queue instead @pytest.mark.no_oci # OCI does not have K80 gpus +@pytest.mark.no_kubernetes # Kubernetes not have gpus def test_job_queue(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -964,6 +971,7 @@ def test_scp_job_queue(): @pytest.mark.no_ibm # IBM Cloud does not have T4 gpus. run test_ibm_job_queue_multinode instead @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet @pytest.mark.no_oci # OCI Cloud does not have T4 gpus. +@pytest.mark.no_kubernetes # Kubernetes not have gpus def test_job_queue_multinode(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -1001,13 +1009,12 @@ def test_job_queue_multinode(generic_cloud: str): @pytest.mark.no_lambda_cloud # No Lambda Cloud VM has 8 CPUs -@pytest.mark.kubernetes def test_large_job_queue(generic_cloud: str): name = _get_cluster_name() test = Test( 'large_job_queue', [ - f'sky launch -y -c {name} --cloud {generic_cloud}', + f'sky launch -y -c {name} --cpus 8 --cloud {generic_cloud}', f'for i in `seq 1 75`; do sky exec {name} -n {name}-$i -d "echo $i; sleep 100000000"; done', f'sky cancel -y {name} 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16', 'sleep 75', @@ -1045,14 +1052,13 @@ def test_large_job_queue(generic_cloud: str): @pytest.mark.no_lambda_cloud # No Lambda Cloud VM has 8 CPUs -@pytest.mark.kubernetes def test_fast_large_job_queue(generic_cloud: str): # This is to test the jobs can be scheduled quickly when there are many jobs in the queue. name = _get_cluster_name() test = Test( 'fast_large_job_queue', [ - f'sky launch -y -c {name} --cloud {generic_cloud}', + f'sky launch -y -c {name} --cpus 8 --cloud {generic_cloud}', f'for i in `seq 1 32`; do sky exec {name} -n {name}-$i -d "echo $i"; done', 'sleep 60', f's=$(sky queue {name}); echo "$s"; echo; echo; echo "$s" | grep -v grep | grep SUCCEEDED | wc -l | grep 32', @@ -1106,6 +1112,7 @@ def test_ibm_job_queue_multinode(): @pytest.mark.no_ibm # IBM Cloud does not have K80 gpus @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet @pytest.mark.no_oci # OCI Cloud does not have K80 gpus +@pytest.mark.no_kubernetes # Kubernetes not have gpus def test_multi_echo(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -1129,6 +1136,7 @@ def test_multi_echo(generic_cloud: str): @pytest.mark.no_lambda_cloud # Lambda Cloud does not have V100 gpus @pytest.mark.no_ibm # IBM cloud currently doesn't provide public image with CUDA @pytest.mark.no_scp # SCP does not have V100 (16GB) GPUs. Run test_scp_huggingface instead. +@pytest.mark.no_kubernetes # Kubernetes not have gpus def test_huggingface(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -1239,6 +1247,7 @@ def test_tpu_vm_pod(): # ---------- Simple apps. ---------- @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet +@pytest.mark.no_kubernetes # Kubernetes does not support num_nodes > 1 node yet def test_multi_hostname(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -1259,6 +1268,7 @@ def test_multi_hostname(generic_cloud: str): @pytest.mark.no_lambda_cloud # Lambda Cloud does not have V100 gpus @pytest.mark.no_ibm # IBM cloud currently doesn't provide public image with CUDA @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet +@pytest.mark.no_kubernetes # Kubernetes does not support num_nodes > 1 node yet def test_distributed_tf(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -1329,6 +1339,7 @@ def test_azure_start_stop(): @pytest.mark.no_lambda_cloud # Lambda Cloud does not support stopping instances @pytest.mark.no_ibm # FIX(IBM) sporadically fails, as restarted workers stay uninitialized indefinitely @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet +@pytest.mark.no_kubernetes # Kubernetes does not autostop yet def test_autostop(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -1385,6 +1396,7 @@ def test_autostop(generic_cloud: str): # ---------- Testing Autodowning ---------- @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet. Run test_scp_autodown instead. +@pytest.mark.no_kubernetes # Kubernetes does not support autodown yet def test_autodown(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -1500,6 +1512,7 @@ def test_cancel_azure(): @pytest.mark.no_lambda_cloud # Lambda Cloud does not have V100 gpus @pytest.mark.no_ibm # IBM cloud currently doesn't provide public image with CUDA @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet +@pytest.mark.no_kubernetes # Kubernetes does not support GPU yet def test_cancel_pytorch(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -1548,6 +1561,7 @@ def test_cancel_ibm(): @pytest.mark.no_lambda_cloud # Lambda Cloud does not support spot instances @pytest.mark.no_ibm # IBM Cloud does not support spot instances @pytest.mark.no_scp # SCP does not support spot instances +@pytest.mark.no_kubernetes # Kubernetes does not have a notion of spot instances def test_use_spot(generic_cloud: str): """Test use-spot and sky exec.""" name = _get_cluster_name() @@ -1568,6 +1582,7 @@ def test_use_spot(generic_cloud: str): @pytest.mark.no_lambda_cloud # Lambda Cloud does not support spot instances @pytest.mark.no_ibm # IBM Cloud does not support spot instances @pytest.mark.no_scp # SCP does not support spot instances +@pytest.mark.no_kubernetes # Kubernetes does not have a notion of spot instances @pytest.mark.managed_spot def test_spot(generic_cloud: str): """Test the spot yaml.""" @@ -1600,6 +1615,7 @@ def test_spot(generic_cloud: str): @pytest.mark.no_lambda_cloud # Lambda Cloud does not support spot instances @pytest.mark.no_ibm # IBM Cloud does not support spot instances @pytest.mark.no_scp # SCP does not support spot instances +@pytest.mark.no_kubernetes # Kubernetes does not have a notion of spot instances @pytest.mark.managed_spot def test_spot_pipeline(generic_cloud: str): """Test a spot pipeline.""" @@ -1638,6 +1654,7 @@ def test_spot_pipeline(generic_cloud: str): @pytest.mark.no_lambda_cloud # Lambda Cloud does not support spot instances @pytest.mark.no_ibm # IBM Cloud does not support spot instances @pytest.mark.no_scp # SCP does not support spot instances +@pytest.mark.no_kubernetes # Kubernetes does not have a notion of spot instances @pytest.mark.managed_spot def test_spot_failed_setup(generic_cloud: str): """Test managed spot job with failed setup.""" @@ -1660,6 +1677,7 @@ def test_spot_failed_setup(generic_cloud: str): @pytest.mark.no_lambda_cloud # Lambda Cloud does not support spot instances @pytest.mark.no_ibm # IBM Cloud does not support spot instances @pytest.mark.no_scp # SCP does not support spot instances +@pytest.mark.no_kubernetes # Kubernetes does not have a notion of spot instances @pytest.mark.managed_spot def test_spot_pipeline_failed_setup(generic_cloud: str): """Test managed spot job with failed setup for a pipeline.""" @@ -1838,6 +1856,7 @@ def test_spot_pipeline_recovery_gcp(): @pytest.mark.no_lambda_cloud # Lambda Cloud does not support spot instances @pytest.mark.no_ibm # IBM Cloud does not support spot instances @pytest.mark.no_scp # SCP does not support spot instances +@pytest.mark.no_kubernetes # Kubernetes does not have a notion of spot instances @pytest.mark.managed_spot def test_spot_recovery_default_resources(generic_cloud: str): """Test managed spot recovery for default resources.""" @@ -2044,6 +2063,7 @@ def test_spot_cancellation_gcp(): @pytest.mark.no_lambda_cloud # Lambda Cloud does not support spot instances @pytest.mark.no_ibm # IBM Cloud does not support spot instances @pytest.mark.no_scp # SCP does not support spot instances +@pytest.mark.no_kubernetes # Kubernetes does not have a notion of spot instances @pytest.mark.managed_spot def test_spot_storage(generic_cloud: str): """Test storage with managed spot""" @@ -2098,6 +2118,7 @@ def test_spot_tpu(): @pytest.mark.no_lambda_cloud # Lambda Cloud does not support spot instances @pytest.mark.no_ibm # IBM Cloud does not support spot instances @pytest.mark.no_scp # SCP does not support spot instances +@pytest.mark.no_kubernetes # Kubernetes does not have a notion of spot instances @pytest.mark.managed_spot def test_spot_inline_env(generic_cloud: str): """Test spot env""" From d4c0990c60ab9f498f5943fd95adeed6502d0106 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 23 Jun 2023 15:52:25 -0700 Subject: [PATCH 054/183] lint --- sky/cli.py | 3 ++- sky/utils/kubernetes/generate_kind_config.py | 2 +- tests/test_smoke.py | 4 +++- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/sky/cli.py b/sky/cli.py index 6c536ff71ed..8c6f9093d64 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4394,7 +4394,8 @@ def local_up(): except subprocess.CalledProcessError as e: # Check if return code is 100 if e.returncode == 100: - click.echo('\nLocal cluster already exists. Run `sky local down` to delete it.') + click.echo('\nLocal cluster already exists. ' + 'Run `sky local down` to delete it.') else: stderr = e.stderr.decode('utf-8') click.echo(f'\nFailed to create local cluster. {stderr}') diff --git a/sky/utils/kubernetes/generate_kind_config.py b/sky/utils/kubernetes/generate_kind_config.py index 66d33ca98ca..404deabcec6 100644 --- a/sky/utils/kubernetes/generate_kind_config.py +++ b/sky/utils/kubernetes/generate_kind_config.py @@ -37,7 +37,7 @@ def generate_kind_config(path: str, suffix += """- role: worker\n""" with open(path, 'w') as f: f.write(preamble) - for port in range(port_start, port_end+1): + for port in range(port_start, port_end + 1): f.write(f""" - containerPort: {port} hostPort: {port} diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 27a1fe30f73..8b71f56ab19 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -635,7 +635,7 @@ def test_gcp_stale_job_manual_restart(): # ---------- Check Sky's environment variables; workdir. ---------- @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet -@pytest.mark.no_kubernetes # K8s does not support num_nodes > 1 yet +@pytest.mark.no_kubernetes # K8s does not support num_nodes > 1 yet def test_env_check(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -687,6 +687,7 @@ def test_scp_file_mounts(): ) run_one_test(test) + @pytest.mark.kubernetes def test_kubernetes_file_mounts(): name = _get_cluster_name() @@ -703,6 +704,7 @@ def test_kubernetes_file_mounts(): ) run_one_test(test) + # ---------- storage ---------- @pytest.mark.aws def test_aws_storage_mounts(): From b64dd1962004b0bace8ec1ac32463fa4208ea839 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 23 Jun 2023 17:17:46 -0700 Subject: [PATCH 055/183] tests --- tests/test_smoke.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 8b71f56ab19..e1fe9183a98 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -2272,6 +2272,7 @@ def test_azure_disk_tier(): # ------- Testing user ray cluster -------- +@pytest.mark.no_kubernetes # Kubernetes does not support sky status -r yet. def test_user_ray_cluster(generic_cloud: str): name = _get_cluster_name() test = Test( From 10333d7a5be25f479cd7ebff5cbff96634f0598f Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 25 Jun 2023 09:58:46 -0700 Subject: [PATCH 056/183] handling for non-reachable clusters --- sky/authentication.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sky/authentication.py b/sky/authentication.py index 74fb6d37869..7070e617fb6 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -19,7 +19,7 @@ from sky import clouds from sky import sky_logging from sky.adaptors import gcp, ibm -from sky.utils import common_utils +from sky.utils import common_utils, env_options from sky.utils import subprocess_utils from sky.utils import ux_utils from sky.skylet.providers.lambda_cloud import lambda_utils @@ -460,11 +460,23 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) except subprocess.CalledProcessError as e: output = e.output.decode('utf-8') + suffix = '' + if env_options.Options.SHOW_DEBUG_INFO.get(): + suffix = f' Error message: {output}' if 'already exists' in output: logger.warning( f'Key {key_label} already exists in the cluster, using it...') pass + elif any([err in output for err in ['connection refused', 'timeout']]): + with ux_utils.print_exception_no_traceback(): + raise ConnectionError( + 'Failed to connect to the cluster. Check if your ' + 'cluster is running, your kubeconfig is correct ' + 'and you can connect to it using ' + f'kubectl get namespaces.{suffix}') from e else: + if suffix: + logger.error(suffix) raise e # Need to use ~ relative path because Ray uses the same From b07fc581dabe29f90f05db1ca554ac87ad8e0b31 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 26 Jun 2023 11:19:59 -0700 Subject: [PATCH 057/183] Invalid kubeconfig handling --- sky/adaptors/kubernetes.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py index 37e6dd1fd48..ff99890eb43 100644 --- a/sky/adaptors/kubernetes.py +++ b/sky/adaptors/kubernetes.py @@ -4,6 +4,8 @@ from functools import wraps +from sky.utils import ux_utils, env_options + kubernetes = None _configured = False @@ -43,7 +45,17 @@ def _load_config(): try: kubernetes.config.load_incluster_config() except kubernetes.config.config_exception.ConfigException: - kubernetes.config.load_kube_config() + try: + kubernetes.config.load_kube_config() + except kubernetes.config.config_exception.ConfigException as e: + with ux_utils.print_exception_no_traceback(): + suffix = '' + if env_options.Options.SHOW_DEBUG_INFO.get(): + suffix += f' Error: {str(e)}' + raise ValueError('Failed to load Kubernetes configuration. ' + f'Please check your kubeconfig file is it valid. {suffix}') from None + + _configured = True From 5af58aa9f8a8ef09a6d363a39f9afbeca2d06b4d Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 26 Jun 2023 12:40:48 -0700 Subject: [PATCH 058/183] Timeout for sky check --- sky/adaptors/kubernetes.py | 11 ++++++++++- sky/cli.py | 6 +++--- sky/skylet/providers/kubernetes/utils.py | 11 +++++++++-- 3 files changed, 22 insertions(+), 6 deletions(-) diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py index ff99890eb43..94bd1865df8 100644 --- a/sky/adaptors/kubernetes.py +++ b/sky/adaptors/kubernetes.py @@ -7,6 +7,7 @@ from sky.utils import ux_utils, env_options kubernetes = None +urllib3 = None _configured = False _core_api = None @@ -20,13 +21,16 @@ def import_package(func): @wraps(func) def wrapper(*args, **kwargs): global kubernetes + global urllib3 if kubernetes is None: try: import kubernetes as _kubernetes + import urllib3 as _urllib3 except ImportError: - raise ImportError('Fail to import dependencies for Docker. ' + raise ImportError('Fail to import dependencies for Kubernetes. ' 'See README for how to install it.') from None kubernetes = _kubernetes + urllib3 = _urllib3 return func(*args, **kwargs) return wrapper @@ -107,3 +111,8 @@ def api_exception(): @import_package def config_exception(): return kubernetes.config.config_exception.ConfigException + + +@import_package +def max_retry_error(): + return urllib3.exceptions.MaxRetryError \ No newline at end of file diff --git a/sky/cli.py b/sky/cli.py index 8c6f9093d64..2cba7cc2fcb 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4446,10 +4446,10 @@ def local_down(): if env_options.Options.SHOW_DEBUG_INFO.get(): stdout = e.stdout.decode('utf-8') click.echo(f'Logs:\n{stdout}') - # Run sky check - with log_utils.safe_rich_status('Running sky check...'): - sky_check.check(quiet=True) if cluster_removed: + # Run sky check + with log_utils.safe_rich_status('Running sky check...'): + sky_check.check(quiet=True) click.echo('Local cluster removed.') diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 46d77d1bcf0..eb6c9b5fbac 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -15,16 +15,19 @@ def get_port(svc_name, namespace): return head_service.spec.ports[0].node_port -def check_credentials() -> Tuple[bool, Optional[str]]: +def check_credentials(timeout: int = 3) -> Tuple[bool, Optional[str]]: """ Check if the credentials in kubeconfig file are valid + Args: + timeout (int): Timeout in seconds for the test API call + Returns: bool: True if credentials are valid, False otherwise str: Error message if credentials are invalid, None otherwise """ try: - kubernetes.core_api().list_namespace() + kubernetes.core_api().list_namespace(_request_timeout=timeout) return True, None except kubernetes.api_exception() as e: # Check if the error is due to invalid credentials @@ -35,6 +38,10 @@ def check_credentials() -> Tuple[bool, Optional[str]]: return False, f'Failed to communicate with the cluster: {str(e)}' except kubernetes.config_exception() as e: return False, f'Invalid configuration file: {str(e)}' + except kubernetes.max_retry_error(): + return False, 'Failed to communicate with the cluster - timeout. ' \ + 'Check if your cluster is running and your network ' \ + 'is stable.' except Exception as e: return False, f'An error occurred: {str(e)}' From 4d6710f6db0a336f80edc57c926f5fcf939d02c2 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 27 Jun 2023 15:48:03 -0700 Subject: [PATCH 059/183] code cleanup --- sky/adaptors/kubernetes.py | 9 +- sky/authentication.py | 5 +- sky/backends/backend_utils.py | 2 +- sky/backends/cloud_vm_ray_backend.py | 1 + sky/cli.py | 9 +- sky/clouds/kubernetes.py | 15 +- sky/data/storage.py | 1 - sky/setup_files/MANIFEST.in | 6 +- sky/skylet/providers/kubernetes/config.py | 168 +++++++++--------- .../providers/kubernetes/node_provider.py | 141 ++++++++------- sky/spot/constants.py | 2 +- sky/templates/kubernetes-ray.yml.j2 | 11 +- tests/kubernetes/README.md | 14 +- tests/test_smoke.py | 2 + 14 files changed, 202 insertions(+), 184 deletions(-) diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py index 94bd1865df8..5648557d4ba 100644 --- a/sky/adaptors/kubernetes.py +++ b/sky/adaptors/kubernetes.py @@ -56,9 +56,10 @@ def _load_config(): suffix = '' if env_options.Options.SHOW_DEBUG_INFO.get(): suffix += f' Error: {str(e)}' - raise ValueError('Failed to load Kubernetes configuration. ' - f'Please check your kubeconfig file is it valid. {suffix}') from None - + raise ValueError( + 'Failed to load Kubernetes configuration. ' + f'Please check your kubeconfig file is it valid. {suffix}' + ) from None _configured = True @@ -115,4 +116,4 @@ def config_exception(): @import_package def max_retry_error(): - return urllib3.exceptions.MaxRetryError \ No newline at end of file + return urllib3.exceptions.MaxRetryError diff --git a/sky/authentication.py b/sky/authentication.py index 7070e617fb6..a357fb766a5 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -19,7 +19,8 @@ from sky import clouds from sky import sky_logging from sky.adaptors import gcp, ibm -from sky.utils import common_utils, env_options +from sky.utils import common_utils +from sky.utils import env_options from sky.utils import subprocess_utils from sky.utils import ux_utils from sky.skylet.providers.lambda_cloud import lambda_utils @@ -467,7 +468,7 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: logger.warning( f'Key {key_label} already exists in the cluster, using it...') pass - elif any([err in output for err in ['connection refused', 'timeout']]): + elif any(err in output for err in ['connection refused', 'timeout']): with ux_utils.print_exception_no_traceback(): raise ConnectionError( 'Failed to connect to the cluster. Check if your ' diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index fe27b4adc49..02eb57cc75d 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -860,8 +860,8 @@ def write_cluster_config( k8s_image = None ssh_key_secret_name = None if isinstance(cloud, clouds.Kubernetes): + # TODO(romilb): Make this read from image id in the task k8s_image = cloud.IMAGE - ssh_key_secret_name = cloud.SKY_SSH_KEY_SECRET_NAME ip_list = None auth_config = {'ssh_private_key': auth.PRIVATE_SSH_KEY_PATH} diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 33c32a262c8..6dea6f63eed 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2131,6 +2131,7 @@ def _update_cluster_region(self): self.launched_resources = self.launched_resources.copy(region=region) def _update_stable_ssh_ports(self, max_attempts: int = 1) -> None: + # TODO(romilb): Replace this with a call to the cloud class to get ports if isinstance(self.launched_resources.cloud, clouds.Kubernetes): head_port = backend_utils.get_head_ssh_port( self, use_cache=False, max_attempts=max_attempts) diff --git a/sky/cli.py b/sky/cli.py index 2cba7cc2fcb..c94a3f74e3f 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -60,11 +60,12 @@ from sky.data import storage_utils from sky.skylet import constants from sky.skylet import job_lib -from sky.utils import log_utils, env_options from sky.utils import common_utils -from sky.utils import dag_utils from sky.utils import command_runner +from sky.utils import dag_utils +from sky.utils import env_options from sky.utils import kubernetes_utils +from sky.utils import log_utils from sky.utils import schemas from sky.utils import subprocess_utils from sky.utils import timeline @@ -3038,6 +3039,10 @@ def show_gpus( type is the lowest across all regions for both on-demand and spot instances. There may be multiple regions with the same lowest price. """ + # validation for the --cloud kubernetes + if cloud == 'kubernetes': + raise click.UsageError( + 'Kubernetes does not have a service catalog.') # validation for the --region flag if region is not None and cloud is None: raise click.UsageError( diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index b853bcb4278..8fde32c200e 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -125,6 +125,10 @@ class Kubernetes(clouds.Cloud): """Kubernetes.""" SKY_SSH_KEY_SECRET_NAME = f'sky-ssh-{common_utils.get_user_hash()}' + + # TODO(romilb): Make the timeout configurable. + TIMEOUT = 60 # Timeout for resource provisioning + _DEFAULT_NUM_VCPUS = 2 _DEFAULT_MEMORY_CPU_RATIO = 1 _REPR = 'Kubernetes' @@ -196,7 +200,9 @@ def get_default_instance_type( memory: Optional[str] = None, disk_tier: Optional[str] = None) -> Optional[str]: del disk_tier # Unused. - # TODO - Allow fractional CPUs and memory + # TODO(romilb): Allow fractional CPUs and memory + # We strip '+' from resource requests since Kubernetes can provision + # exactly the requested resources. instance_cpus = int( cpus.strip('+')) if cpus is not None else cls._DEFAULT_NUM_VCPUS instance_mem = int( @@ -259,15 +265,14 @@ def make_deploy_resources_variables( # We fetch the default values for the instance type in that case. cpus, mem = self.get_vcpus_mem_from_instance_type( resources.instance_type) - # TODO(romilb): Allow fractional resources here - # cpus = int(cpus) - # mem = int(mem) return { 'instance_type': resources.instance_type, 'custom_resources': custom_resources, 'region': region.name, 'cpus': str(cpus), - 'memory': str(mem) + 'memory': str(mem), + 'timeout': self.TIMEOUT, + 'k8s_ssh_key_secret_name': self.SKY_SSH_KEY_SECRET_NAME, } def get_feasible_launchable_resources(self, diff --git a/sky/data/storage.py b/sky/data/storage.py index ed4fd0a4f8e..48dbe3134d3 100644 --- a/sky/data/storage.py +++ b/sky/data/storage.py @@ -1196,7 +1196,6 @@ def mount_command(self, mount_path: str) -> str: 'releases/download/0.24.0-romilb-upstream/goofys ' '-O /usr/local/bin/goofys && ' 'sudo chmod +x /usr/local/bin/goofys') - install_cmd = ('exit 1') mount_cmd = ('goofys -o allow_other ' f'--stat-cache-ttl {self._STAT_CACHE_TTL} ' f'--type-cache-ttl {self._TYPE_CACHE_TTL} ' diff --git a/sky/setup_files/MANIFEST.in b/sky/setup_files/MANIFEST.in index 1e3212e520d..a23e53ee203 100644 --- a/sky/setup_files/MANIFEST.in +++ b/sky/setup_files/MANIFEST.in @@ -7,11 +7,11 @@ include sky/skylet/providers/aws/* include sky/skylet/providers/aws/cloudwatch/* include sky/skylet/providers/azure/* include sky/skylet/providers/gcp/* -include sky/skylet/providers/lambda_cloud/* -include sky/skylet/providers/kubernetes/* include sky/skylet/providers/ibm/* -include sky/skylet/providers/scp/* +include sky/skylet/providers/kubernetes/* +include sky/skylet/providers/lambda_cloud/* include sky/skylet/providers/oci/* +include sky/skylet/providers/scp/* include sky/skylet/ray_patches/*.patch include sky/spot/dashboard/* include sky/spot/dashboard/templates/* diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index 00d1a601c8d..7002325871c 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -12,61 +12,60 @@ "M": 2**20, "G": 2**30, "T": 2**40, - "P": 2**50, + 'P': 2**50, } -log_prefix = "KubernetesNodeProvider: " +log_prefix = 'KubernetesNodeProvider: ' class InvalidNamespaceError(ValueError): def __init__(self, field_name, namespace): - self.message = ("Namespace of {} config doesn't match provided " - "namespace '{}'. Either set it to {} or remove the " - "field".format(field_name, namespace, namespace)) + self.message = (f'Namespace of {field_name} config does not match provided ' + f'namespace "{namespace}". Either set it to {namespace} or remove the ' + 'field') def __str__(self): return self.message def using_existing_msg(resource_type, name): - return "using existing {} '{}'".format(resource_type, name) + return f'using existing {resource_type} "{name}"' def updating_existing_msg(resource_type, name): - return "updating existing {} '{}'".format(resource_type, name) + return f'updating existing {resource_type} "{name}"' def not_found_msg(resource_type, name): - return "{} '{}' not found, attempting to create it".format( - resource_type, name) + return f'{resource_type} "{name}" not found, attempting to create it' def not_checking_msg(resource_type, name): - return "not checking if {} '{}' exists".format(resource_type, name) + return f'not checking if {resource_type} "{name}" exists' def created_msg(resource_type, name): - return "successfully created {} '{}'".format(resource_type, name) + return f'successfully created {resource_type} "{name}"' def not_provided_msg(resource_type): - return "no {} config provided, must already exist".format(resource_type) + return f'no {resource_type} config provided, must already exist' def bootstrap_kubernetes(config): - if config["provider"].get("_operator"): - namespace = config["provider"]["namespace"] + if config['provider'].get('_operator'): + namespace = config['provider']['namespace'] else: - namespace = _configure_namespace(config["provider"]) + namespace = _configure_namespace(config['provider']) - _configure_services(namespace, config["provider"]) + _configure_services(namespace, config['provider']) - if not config["provider"].get("_operator"): + if not config['provider'].get('_operator'): # These steps are unecessary when using the Operator. - _configure_autoscaler_service_account(namespace, config["provider"]) - _configure_autoscaler_role(namespace, config["provider"]) - _configure_autoscaler_role_binding(namespace, config["provider"]) + _configure_autoscaler_service_account(namespace, config['provider']) + _configure_autoscaler_role(namespace, config['provider']) + _configure_autoscaler_role_binding(namespace, config['provider']) return config @@ -79,47 +78,47 @@ def fillout_resources_kubernetes(config): and limits, takes min of the two. The result is rounded up, as Ray does not currently support fractional CPU. """ - if "available_node_types" not in config: + if 'available_node_types' not in config: return config - node_types = copy.deepcopy(config["available_node_types"]) - head_node_type = config["head_node_type"] + node_types = copy.deepcopy(config['available_node_types']) + head_node_type = config['head_node_type'] for node_type in node_types: - node_config = node_types[node_type]["node_config"] + node_config = node_types[node_type]['node_config'] # The next line is for compatibility with configs like # kubernetes/example-ingress.yaml, # cf. KubernetesNodeProvider.create_node(). - pod = node_config.get("pod", node_config) - container_data = pod["spec"]["containers"][0] + pod = node_config.get('pod', node_config) + container_data = pod['spec']['containers'][0] autodetected_resources = get_autodetected_resources(container_data) if node_types == head_node_type: # we only autodetect worker type node memory resource - autodetected_resources.pop("memory") - if "resources" not in config["available_node_types"][node_type]: - config["available_node_types"][node_type]["resources"] = {} + autodetected_resources.pop('memory') + if 'resources' not in config['available_node_types'][node_type]: + config['available_node_types'][node_type]['resources'] = {} autodetected_resources.update( - config["available_node_types"][node_type]["resources"]) - config["available_node_types"][node_type][ - "resources"] = autodetected_resources + config['available_node_types'][node_type]['resources']) + config['available_node_types'][node_type][ + 'resources'] = autodetected_resources logger.debug( - "Updating the resources of node type {} to include {}.".format( - node_type, autodetected_resources)) + f'Updating the resources of node type {node_type} ' + f'to include {autodetected_resources}.') return config def get_autodetected_resources(container_data): - container_resources = container_data.get("resources", None) + container_resources = container_data.get('resources', None) if container_resources is None: - return {"CPU": 0, "GPU": 0} + return {'CPU': 0, 'GPU': 0} node_type_resources = { resource_name.upper(): get_resource(container_resources, resource_name) - for resource_name in ["cpu", "gpu"] + for resource_name in ['cpu', 'gpu'] } - memory_limits = get_resource(container_resources, "memory") - node_type_resources["memory"] = int(memory_limits) + memory_limits = get_resource(container_resources, 'memory') + node_type_resources['memory'] = int(memory_limits) return node_type_resources @@ -127,9 +126,9 @@ def get_autodetected_resources(container_data): def get_resource(container_resources, resource_name): limit = _get_resource(container_resources, resource_name, - field_name="limits") - # float("inf") means there's no limit set - return 0 if limit == float("inf") else int(limit) + field_name='limits') + # float('inf') means there's no limit set + return 0 if limit == float('inf') else int(limit) def _get_resource(container_resources, resource_name, field_name): @@ -148,20 +147,20 @@ def _get_resource(container_resources, resource_name, field_name): """ if field_name not in container_resources: # No limit/resource field. - return float("inf") + return float('inf') resources = container_resources[field_name] # Look for keys containing the resource_name. For example, # the key 'nvidia.com/gpu' contains the key 'gpu'. matching_keys = [key for key in resources if resource_name in key.lower()] if len(matching_keys) == 0: - return float("inf") + return float('inf') if len(matching_keys) > 1: # Should have only one match -- mostly relevant for gpu. - raise ValueError(f"Multiple {resource_name} types not supported.") + raise ValueError(f'Multiple {resource_name} types not supported.') # E.g. 'nvidia.com/gpu' or 'cpu'. resource_key = matching_keys.pop() resource_quantity = resources[resource_key] - if resource_name == "memory": + if resource_name == 'memory': return _parse_memory_resource(resource_quantity) else: return _parse_cpu_or_gpu_resource(resource_quantity) @@ -169,7 +168,7 @@ def _get_resource(container_resources, resource_name, field_name): def _parse_cpu_or_gpu_resource(resource): resource_str = str(resource) - if resource_str[-1] == "m": + if resource_str[-1] == 'm': # For example, '500m' rounds up to 1. return math.ceil(int(resource_str[:-1]) / 1000) else: @@ -182,19 +181,19 @@ def _parse_memory_resource(resource): return int(resource_str) except ValueError: pass - memory_size = re.sub(r"([KMGTP]+)", r" \1", resource_str) + memory_size = re.sub(r'([KMGTP]+)', r' \1', resource_str) number, unit_index = [item.strip() for item in memory_size.split()] unit_index = unit_index[0] return float(number) * MEMORY_SIZE_UNITS[unit_index] def _configure_namespace(provider_config): - namespace_field = "namespace" + namespace_field = 'namespace' if namespace_field not in provider_config: - raise ValueError("Must specify namespace in Kubernetes config.") + raise ValueError('Must specify namespace in Kubernetes config.') namespace = provider_config[namespace_field] - field_selector = "metadata.name={}".format(namespace) + field_selector = f'metadata.name={namespace}' try: namespaces = kubernetes.core_api().list_namespace( field_selector=field_selector).items @@ -217,19 +216,19 @@ def _configure_namespace(provider_config): def _configure_autoscaler_service_account(namespace, provider_config): - account_field = "autoscaler_service_account" + account_field = 'autoscaler_service_account' if account_field not in provider_config: logger.info(log_prefix + not_provided_msg(account_field)) return account = provider_config[account_field] - if "namespace" not in account["metadata"]: - account["metadata"]["namespace"] = namespace - elif account["metadata"]["namespace"] != namespace: + if 'namespace' not in account['metadata']: + account['metadata']['namespace'] = namespace + elif account['metadata']['namespace'] != namespace: raise InvalidNamespaceError(account_field, namespace) - name = account["metadata"]["name"] - field_selector = "metadata.name={}".format(name) + name = account['metadata']['name'] + field_selector = f'metadata.name={name}' accounts = (kubernetes.core_api().list_namespaced_service_account( namespace, field_selector=field_selector).items) if len(accounts) > 0: @@ -243,19 +242,19 @@ def _configure_autoscaler_service_account(namespace, provider_config): def _configure_autoscaler_role(namespace, provider_config): - role_field = "autoscaler_role" + role_field = 'autoscaler_role' if role_field not in provider_config: logger.info(log_prefix + not_provided_msg(role_field)) return role = provider_config[role_field] - if "namespace" not in role["metadata"]: - role["metadata"]["namespace"] = namespace - elif role["metadata"]["namespace"] != namespace: + if 'namespace' not in role['metadata']: + role['metadata']['namespace'] = namespace + elif role['metadata']['namespace'] != namespace: raise InvalidNamespaceError(role_field, namespace) - name = role["metadata"]["name"] - field_selector = "metadata.name={}".format(name) + name = role['metadata']['name'] + field_selector = f'metadata.name={name}' accounts = (kubernetes.auth_api().list_namespaced_role( namespace, field_selector=field_selector).items) if len(accounts) > 0: @@ -269,26 +268,27 @@ def _configure_autoscaler_role(namespace, provider_config): def _configure_autoscaler_role_binding(namespace, provider_config): - binding_field = "autoscaler_role_binding" + binding_field = 'autoscaler_role_binding' if binding_field not in provider_config: logger.info(log_prefix + not_provided_msg(binding_field)) return binding = provider_config[binding_field] - if "namespace" not in binding["metadata"]: - binding["metadata"]["namespace"] = namespace - elif binding["metadata"]["namespace"] != namespace: + if 'namespace' not in binding['metadata']: + binding['metadata']['namespace'] = namespace + elif binding['metadata']['namespace'] != namespace: raise InvalidNamespaceError(binding_field, namespace) - for subject in binding["subjects"]: - if "namespace" not in subject: - subject["namespace"] = namespace - elif subject["namespace"] != namespace: + for subject in binding['subjects']: + if 'namespace' not in subject: + subject['namespace'] = namespace + elif subject['namespace'] != namespace: + subject_name = subject['name'] raise InvalidNamespaceError( - binding_field + " subject '{}'".format(subject["name"]), + binding_field + f' subject {subject_name}', namespace) - name = binding["metadata"]["name"] - field_selector = "metadata.name={}".format(name) + name = binding['metadata']['name'] + field_selector = f'metadata.name={name}' accounts = (kubernetes.auth_api().list_namespaced_role_binding( namespace, field_selector=field_selector).items) if len(accounts) > 0: @@ -302,36 +302,36 @@ def _configure_autoscaler_role_binding(namespace, provider_config): def _configure_services(namespace, provider_config): - service_field = "services" + service_field = 'services' if service_field not in provider_config: logger.info(log_prefix + not_provided_msg(service_field)) return services = provider_config[service_field] for service in services: - if "namespace" not in service["metadata"]: - service["metadata"]["namespace"] = namespace - elif service["metadata"]["namespace"] != namespace: + if 'namespace' not in service['metadata']: + service['metadata']['namespace'] = namespace + elif service['metadata']['namespace'] != namespace: raise InvalidNamespaceError(service_field, namespace) - name = service["metadata"]["name"] - field_selector = "metadata.name={}".format(name) + name = service['metadata']['name'] + field_selector = f'metadata.name={name}' services = (kubernetes.core_api().list_namespaced_service( namespace, field_selector=field_selector).items) if len(services) > 0: assert len(services) == 1 existing_service = services[0] if service == existing_service: - logger.info(log_prefix + using_existing_msg("service", name)) + logger.info(log_prefix + using_existing_msg('service', name)) return else: - logger.info(log_prefix + updating_existing_msg("service", name)) + logger.info(log_prefix + updating_existing_msg('service', name)) kubernetes.core_api().patch_namespaced_service( name, namespace, service) else: - logger.info(log_prefix + not_found_msg("service", name)) + logger.info(log_prefix + not_found_msg('service', name)) kubernetes.core_api().create_namespaced_service(namespace, service) - logger.info(log_prefix + created_msg("service", name)) + logger.info(log_prefix + created_msg('service', name)) class KubernetesError(Exception): diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 4a5f2f51f01..bd9e017a286 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -18,12 +18,12 @@ MAX_TAG_RETRIES = 3 DELAY_BEFORE_TAG_RETRY = 0.5 -RAY_COMPONENT_LABEL = "cluster.ray.io/component" +RAY_COMPONENT_LABEL = 'cluster.ray.io/component' # Monkey patch SSHCommandRunner to allow specifying SSH port def set_port(self, port): - self.ssh_options.arg_dict["Port"] = port + self.ssh_options.arg_dict['Port'] = port SSHCommandRunner.set_port = set_port @@ -31,15 +31,15 @@ def set_port(self, port): def head_service_selector(cluster_name: str) -> Dict[str, str]: """Selector for Operator-configured head service.""" - return {RAY_COMPONENT_LABEL: f"{cluster_name}-ray-head"} + return {RAY_COMPONENT_LABEL: f'{cluster_name}-ray-head'} def to_label_selector(tags): - label_selector = "" + label_selector = '' for k, v in tags.items(): - if label_selector != "": - label_selector += "," - label_selector += "{}={}".format(k, v) + if label_selector != '': + label_selector += ',' + label_selector += '{}={}'.format(k, v) return label_selector @@ -48,17 +48,25 @@ class KubernetesNodeProvider(NodeProvider): def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.cluster_name = cluster_name - self.namespace = provider_config["namespace"] + + # Kubernetes namespace to user + self.namespace = provider_config['namespace'] + + # Timeout for resource provisioning. If it takes longer than this + # timeout, the resource provisioning will be considered failed. + # This is useful for failover. May need to be adjusted for different + # kubernetes setups. + self.timeout = provider_config['timeout'] def non_terminated_nodes(self, tag_filters): # Match pods that are in the 'Pending' or 'Running' phase. # Unfortunately there is no OR operator in field selectors, so we # have to match on NOT any of the other phases. - field_selector = ",".join([ - "status.phase!=Failed", - "status.phase!=Unknown", - "status.phase!=Succeeded", - "status.phase!=Terminating", + field_selector = ','.join([ + 'status.phase!=Failed', + 'status.phase!=Unknown', + 'status.phase!=Succeeded', + 'status.phase!=Terminating', ]) tag_filters[TAG_RAY_CLUSTER_NAME] = self.cluster_name @@ -78,24 +86,23 @@ def non_terminated_nodes(self, tag_filters): def is_running(self, node_id): pod = kubernetes.core_api().read_namespaced_pod(node_id, self.namespace) - return pod.status.phase == "Running" + return pod.status.phase == 'Running' def is_terminated(self, node_id): pod = kubernetes.core_api().read_namespaced_pod(node_id, self.namespace) - return pod.status.phase not in ["Running", "Pending"] + return pod.status.phase not in ['Running', 'Pending'] def node_tags(self, node_id): pod = kubernetes.core_api().read_namespaced_pod(node_id, self.namespace) return pod.metadata.labels def external_ip(self, node_id): - # # Return the IP address of the first node with an external IP nodes = kubernetes.core_api().list_node().items for node in nodes: if node.status.addresses: for address in node.status.addresses: - if address.type == "ExternalIP": + if address.type == 'ExternalIP': return address.address # If no external IP is found, use the API server IP api_host = kubernetes.core_api().api_client.configuration.host @@ -106,8 +113,8 @@ def external_port(self, node_id): # Extract the NodePort of the head node's SSH service # Node id is str e.g., example-cluster-ray-head-v89lb - # TODO(romilb): Implement caching here for performance - # TODO(romilb): Multi-node would need more handling here + # TODO(romilb): Implement caching here for performance. + # TODO(romilb): Multi-node would need more handling here. cluster_name = node_id.split('-ray-head')[0] return get_head_ssh_port(cluster_name, self.namespace) @@ -133,10 +140,10 @@ def find_node_id(): if not find_node_id(): if use_internal_ip: - known_msg = f"Worker internal IPs: {list(self._internal_ip_cache)}" + known_msg = f'Worker internal IPs: {list(self._internal_ip_cache)}' else: - known_msg = f"Worker external IP: {list(self._external_ip_cache)}" - raise ValueError(f"ip {ip_address} not found. " + known_msg) + known_msg = f'Worker external IP: {list(self._external_ip_cache)}' + raise ValueError(f'ip {ip_address} not found. ' + known_msg) return find_node_id() @@ -148,8 +155,8 @@ def set_node_tags(self, node_ids, tags): except kubernetes.api_exception() as e: if e.status == 409: logger.info(kubernetes.log_prefix + - "Caught a 409 error while setting" - " node tags. Retrying...") + 'Caught a 409 error while setting' + ' node tags. Retrying...') time.sleep(DELAY_BEFORE_TAG_RETRY) continue else: @@ -164,25 +171,25 @@ def _set_node_tags(self, node_id, tags): def create_node(self, node_config, tags, count): conf = copy.deepcopy(node_config) - pod_spec = conf.get("pod", conf) - service_spec = conf.get("service") - ingress_spec = conf.get("ingress") + pod_spec = conf.get('pod', conf) + service_spec = conf.get('service') + ingress_spec = conf.get('ingress') node_uuid = str(uuid4()) tags[TAG_RAY_CLUSTER_NAME] = self.cluster_name - tags["ray-node-uuid"] = node_uuid - pod_spec["metadata"]["namespace"] = self.namespace - if "labels" in pod_spec["metadata"]: - pod_spec["metadata"]["labels"].update(tags) + tags['ray-node-uuid'] = node_uuid + pod_spec['metadata']['namespace'] = self.namespace + if 'labels' in pod_spec['metadata']: + pod_spec['metadata']['labels'].update(tags) else: - pod_spec["metadata"]["labels"] = tags + pod_spec['metadata']['labels'] = tags # Allow Operator-configured service to access the head node. if tags[TAG_RAY_NODE_KIND] == NODE_KIND_HEAD: head_selector = head_service_selector(self.cluster_name) - pod_spec["metadata"]["labels"].update(head_selector) + pod_spec['metadata']['labels'].update(head_selector) logger.info(config.log_prefix + - "calling create_namespaced_pod (count={}).".format(count)) + 'calling create_namespaced_pod (count={}).'.format(count)) new_nodes = [] for _ in range(count): pod = kubernetes.core_api().create_namespaced_pod( @@ -191,26 +198,26 @@ def create_node(self, node_config, tags, count): new_svcs = [] if service_spec is not None: - logger.info(config.log_prefix + "calling create_namespaced_service " - "(count={}).".format(count)) + logger.info(config.log_prefix + 'calling create_namespaced_service ' + '(count={}).'.format(count)) for new_node in new_nodes: - metadata = service_spec.get("metadata", {}) - metadata["name"] = new_node.metadata.name - service_spec["metadata"] = metadata - service_spec["spec"]["selector"] = {"ray-node-uuid": node_uuid} + metadata = service_spec.get('metadata', {}) + metadata['name'] = new_node.metadata.name + service_spec['metadata'] = metadata + service_spec['spec']['selector'] = {'ray-node-uuid': node_uuid} svc = kubernetes.core_api().create_namespaced_service( self.namespace, service_spec) new_svcs.append(svc) if ingress_spec is not None: - logger.info(config.log_prefix + "calling create_namespaced_ingress " - "(count={}).".format(count)) + logger.info(config.log_prefix + 'calling create_namespaced_ingress ' + '(count={}).'.format(count)) for new_svc in new_svcs: - metadata = ingress_spec.get("metadata", {}) - metadata["name"] = new_svc.metadata.name - ingress_spec["metadata"] = metadata + metadata = ingress_spec.get('metadata', {}) + metadata['name'] = new_svc.metadata.name + ingress_spec['metadata'] = metadata ingress_spec = _add_service_name_to_service_port( ingress_spec, new_svc.metadata.name) kubernetes.networking_api().create_namespaced_ingress( @@ -220,20 +227,18 @@ def create_node(self, node_config, tags, count): # exception. If pod's container is ContainerCreating, then we can assume # that resources have been allocated and we can exit. - # TODO(romilb): Make timeout configurable in Kubernetes cloud class. - TIMEOUT = 60 start = time.time() while True: - if time.time() - start > TIMEOUT: + if time.time() - start > self.timeout: raise config.KubernetesError( - "Timed out while waiting for nodes to start. " - "Cluster may be out of resources or " - "may be too slow to autoscale.") + 'Timed out while waiting for nodes to start. ' + 'Cluster may be out of resources or ' + 'may be too slow to autoscale.') all_ready = True for node in new_nodes: pod = kubernetes.core_api().read_namespaced_pod( node.metadata.name, self.namespace) - if pod.status.phase == "Pending": + if pod.status.phase == 'Pending': # Check conditions for more detailed status if pod.status.conditions is not None: for condition in pod.status.conditions: @@ -254,14 +259,14 @@ def create_node(self, node_config, tags, count): time.sleep(1) def terminate_node(self, node_id): - logger.info(config.log_prefix + "calling delete_namespaced_pod") + logger.info(config.log_prefix + 'calling delete_namespaced_pod') try: kubernetes.core_api().delete_namespaced_pod(node_id, self.namespace) except kubernetes.api_exception() as e: if e.status == 404: logger.warning(config.log_prefix + - f"Tried to delete pod {node_id}," - " but the pod was not found (404).") + f'Tried to delete pod {node_id},' + ' but the pod was not found (404).') else: raise try: @@ -308,13 +313,13 @@ def get_command_runner(self, container that commands should be run on. """ common_args = { - "log_prefix": log_prefix, - "node_id": node_id, - "provider": self, - "auth_config": auth_config, - "cluster_name": cluster_name, - "process_runner": process_runner, - "use_internal_ip": use_internal_ip, + 'log_prefix': log_prefix, + 'node_id': node_id, + 'provider': self, + 'auth_config': auth_config, + 'cluster_name': cluster_name, + 'process_runner': process_runner, + 'use_internal_ip': use_internal_ip, } command_runner = SSHCommandRunner(**common_args) if use_internal_ip: @@ -347,11 +352,11 @@ def _add_service_name_to_service_port(spec, svc_name): for k in dict_keys: spec[k] = _add_service_name_to_service_port(spec[k], svc_name) - if k == "serviceName" and spec[k] != svc_name: + if k == 'serviceName' and spec[k] != svc_name: raise ValueError( - "The value of serviceName must be set to " - "${RAY_POD_NAME}. It is automatically replaced " - "when using the autoscaler.") + 'The value of serviceName must be set to ' + '${RAY_POD_NAME}. It is automatically replaced ' + 'when using the autoscaler.') elif isinstance(spec, list): spec = [ @@ -361,6 +366,6 @@ def _add_service_name_to_service_port(spec, svc_name): elif isinstance(spec, str): # The magic string ${RAY_POD_NAME} is replaced with # the true service name, which is equal to the worker pod name. - if "${RAY_POD_NAME}" in spec: - spec = spec.replace("${RAY_POD_NAME}", svc_name) + if '${RAY_POD_NAME}' in spec: + spec = spec.replace('${RAY_POD_NAME}', svc_name) return spec diff --git a/sky/spot/constants.py b/sky/spot/constants.py index 78aca653e2a..605330691d4 100644 --- a/sky/spot/constants.py +++ b/sky/spot/constants.py @@ -1,6 +1,6 @@ """Constants used for Managed Spot.""" -SPOT_CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP = None +SPOT_CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP = 10 SPOT_CONTROLLER_TEMPLATE = 'spot-controller.yaml.j2' SPOT_CONTROLLER_YAML_PREFIX = '~/.sky/spot_controller' diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 3931d304ff1..37001457ba8 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -23,6 +23,8 @@ provider: # TODO(romilb): Make this configurable. namespace: default + timeout: {{timeout}} + # ServiceAccount created by the autoscaler for the head node pod that it # runs in. If this field isn't provided, the head pod config below must # contain a user-created service account with the proper permissions. @@ -120,7 +122,7 @@ available_node_types: max_workers: {{num_nodes - 1}} # User-specified custom resources for use by Ray. Object with string keys and integer values. # (Ray detects CPU and GPU from pod spec resource requests and limits, so no need to fill those here.) - resources: {"example-resource-a": 1, "example-resource-b": 2} + # resources: {"example-resource-a": 1, "example-resource-b": 2} node_config: apiVersion: v1 kind: Pod @@ -135,7 +137,7 @@ available_node_types: volumes: - name: secret-volume secret: - secretName: {{ssh_key_secret_name}} + secretName: {{k8s_ssh_key_secret_name}} - name: dshm emptyDir: medium: Memory @@ -165,7 +167,7 @@ available_node_types: name: dshm - mountPath: /dev/fuse # Required for fuse mounting name: dev-fuse - securityContext: # Required for FUSE mounting, but may be a security risk + securityContext: # Required for FUSE mounting. TODO(romilb) - evaluate security risk privileged: true resources: requests: @@ -206,7 +208,7 @@ available_node_types: volumes: - name: secret-volume secret: - secretName: {{ssh_key_secret_name}} + secretName: {{k8s_ssh_key_secret_name}} - name: dshm emptyDir: medium: Memory @@ -260,7 +262,6 @@ available_node_types: memory: {{memory}}G setup_commands: - # TODO(romilb): Clean up setup commands which are irrelevant to k8s. # Disable `unattended-upgrades` to prevent apt-get from hanging. It should be called at the beginning before the process started to avoid being blocked. (This is a temporary fix.) # Create ~/.ssh/config file in case the file does not exist in the image. # Line 'sudo bash ..': set the ulimit as suggested by ray docs for performance. https://docs.ray.io/en/latest/cluster/vms/user-guides/large-cluster-best-practices.html#system-configuration diff --git a/tests/kubernetes/README.md b/tests/kubernetes/README.md index 740f18683ac..18dc58e8191 100644 --- a/tests/kubernetes/README.md +++ b/tests/kubernetes/README.md @@ -9,20 +9,18 @@ This image is hosted at `us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/ To build this image locally and optionally push to the SkyPilot registry, run: ```bash -# Build and loaad image locally -./build.sh +# Build and load image locally +./build_image.sh # Build and push image (CAREFUL - this will push to the SkyPilot registry!) -./build.sh -p +./build_image.sh -p ``` ## Running a local development cluster -You can use (kind)[https://kind.sigs.k8s.io/] to run a local Kubernetes cluster -for development. The following script will create a cluster with 1 node and -will make NodePort services available on localhost. +We use (kind)[https://kind.sigs.k8s.io/] to run a local Kubernetes cluster +for development. ```bash -cd kind -./create_cluster.sh +sky local up ``` ## Running a GKE cluster diff --git a/tests/test_smoke.py b/tests/test_smoke.py index e1fe9183a98..4f7d6e55058 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -763,6 +763,8 @@ def test_gcp_storage_mounts(): @pytest.mark.kubernetes def test_kubernetes_storage_mounts(): # Tests bucket mounting on k8s, assuming S3 is configured. + # This test will fail if run on non x86_64 architecture, since goofys is + # built for x86_64 only. name = _get_cluster_name() storage_name = f'sky-test-{int(time.time())}' template_str = pathlib.Path( From c057c88e326cc34f1b8cabd38e077394590af74f Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 27 Jun 2023 15:54:30 -0700 Subject: [PATCH 060/183] lint --- sky/cli.py | 5 ++--- sky/clouds/kubernetes.py | 2 +- sky/skylet/providers/kubernetes/config.py | 15 +++++++-------- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/sky/cli.py b/sky/cli.py index c94a3f74e3f..a7785e08a13 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -3040,9 +3040,8 @@ def show_gpus( instances. There may be multiple regions with the same lowest price. """ # validation for the --cloud kubernetes - if cloud == 'kubernetes': - raise click.UsageError( - 'Kubernetes does not have a service catalog.') + if cloud == 'kubernetes': + raise click.UsageError('Kubernetes does not have a service catalog.') # validation for the --region flag if region is not None and cloud is None: raise click.UsageError( diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 8fde32c200e..b186b8ed6c8 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -271,7 +271,7 @@ def make_deploy_resources_variables( 'region': region.name, 'cpus': str(cpus), 'memory': str(mem), - 'timeout': self.TIMEOUT, + 'timeout': str(self.TIMEOUT), 'k8s_ssh_key_secret_name': self.SKY_SSH_KEY_SECRET_NAME, } diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index 7002325871c..f693cc08deb 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -21,9 +21,10 @@ class InvalidNamespaceError(ValueError): def __init__(self, field_name, namespace): - self.message = (f'Namespace of {field_name} config does not match provided ' - f'namespace "{namespace}". Either set it to {namespace} or remove the ' - 'field') + self.message = ( + f'Namespace of {field_name} config does not match provided ' + f'namespace "{namespace}". Either set it to {namespace} or remove the ' + 'field') def __str__(self): return self.message @@ -101,9 +102,8 @@ def fillout_resources_kubernetes(config): config['available_node_types'][node_type]['resources']) config['available_node_types'][node_type][ 'resources'] = autodetected_resources - logger.debug( - f'Updating the resources of node type {node_type} ' - f'to include {autodetected_resources}.') + logger.debug(f'Updating the resources of node type {node_type} ' + f'to include {autodetected_resources}.') return config @@ -284,8 +284,7 @@ def _configure_autoscaler_role_binding(namespace, provider_config): elif subject['namespace'] != namespace: subject_name = subject['name'] raise InvalidNamespaceError( - binding_field + f' subject {subject_name}', - namespace) + binding_field + f' subject {subject_name}', namespace) name = binding['metadata']['name'] field_selector = f'metadata.name={name}' From b8e414e620e125bd0cc8dc4b3e5b1c7a8574e85b Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 3 Jul 2023 09:46:55 -0700 Subject: [PATCH 061/183] Do not raise error if GPUs requested, return empty list --- sky/clouds/kubernetes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index b186b8ed6c8..aa5bc9fb02e 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -310,9 +310,9 @@ def _make(instance_list): return (_make([default_instance_type]), []) assert len(accelerators) == 1, resources + # If GPUs are requested, return an empty list. # TODO(romilb): Add GPU support. - raise NotImplementedError('GPUs are not supported for Kubernetes ' - 'clusters yet.') + return ([], []) @classmethod def check_credentials(cls) -> Tuple[bool, Optional[str]]: From 1fc857b20f49f5910665725609ae0b9d25ed75b6 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 5 Jul 2023 12:17:14 -0700 Subject: [PATCH 062/183] Address comments --- sky/__init__.py | 6 +- sky/adaptors/kubernetes.py | 4 +- sky/authentication.py | 12 +-- sky/backends/backend_utils.py | 14 +--- sky/backends/cloud_vm_ray_backend.py | 27 ++++--- sky/backends/onprem_utils.py | 2 +- sky/clouds/kubernetes.py | 16 ++-- sky/data/sosutil/__init__.py | 0 sky/skylet/providers/kubernetes/config.py | 7 +- sky/templates/kubernetes-ray.yml.j2 | 96 +++++++++++------------ tests/kubernetes/build_image.sh | 2 +- tests/test_smoke.py | 17 ---- 12 files changed, 92 insertions(+), 111 deletions(-) create mode 100644 sky/data/sosutil/__init__.py diff --git a/sky/__init__.py b/sky/__init__.py index 41c129bd5be..c814cee3e62 100644 --- a/sky/__init__.py +++ b/sky/__init__.py @@ -38,19 +38,19 @@ __all__ = [ '__version__', - 'IBM', 'AWS', 'Azure', 'GCP', + 'IBM', + 'Kubernetes', 'Lambda', - 'SCP', 'Local', 'OCI', + 'SCP', 'Optimizer', 'OptimizeTarget', 'backends', 'benchmark', - 'Kubernetes', 'list_accelerators', '__root_dir__', 'Storage', diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py index 5648557d4ba..819a42345ab 100644 --- a/sky/adaptors/kubernetes.py +++ b/sky/adaptors/kubernetes.py @@ -2,7 +2,7 @@ # pylint: disable=import-outside-toplevel -from functools import wraps +import functools from sky.utils import ux_utils, env_options @@ -18,7 +18,7 @@ def import_package(func): - @wraps(func) + @functools.wraps(func) def wrapper(*args, **kwargs): global kubernetes global urllib3 diff --git a/sky/authentication.py b/sky/authentication.py index a357fb766a5..f0cbb371cd3 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -461,24 +461,20 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: subprocess.check_output(cmd, stderr=subprocess.STDOUT, shell=True) except subprocess.CalledProcessError as e: output = e.output.decode('utf-8') - suffix = '' - if env_options.Options.SHOW_DEBUG_INFO.get(): - suffix = f' Error message: {output}' + suffix = f'\nError message: {output}' if 'already exists' in output: logger.warning( f'Key {key_label} already exists in the cluster, using it...') - pass elif any(err in output for err in ['connection refused', 'timeout']): with ux_utils.print_exception_no_traceback(): raise ConnectionError( 'Failed to connect to the cluster. Check if your ' 'cluster is running, your kubeconfig is correct ' - 'and you can connect to it using ' + 'and you can connect to it using: ' f'kubectl get namespaces.{suffix}') from e else: - if suffix: - logger.error(suffix) - raise e + logger.error(suffix) + raise # Need to use ~ relative path because Ray uses the same # path for finding the public key path on both local and head node. diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 93863002a7b..e723ce20b3c 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -589,6 +589,7 @@ def _add_multinode_config( f'host named {worker_names[idx]}.') host_name = external_worker_ips[idx] logger.warning(f'Using {host_name} to identify host instead.') + # TODO(romilb): Update port number when k8s supports multinode codegens[idx] = cls._get_generated_config( sky_autogen_comment, host_name, @@ -607,6 +608,7 @@ def _add_multinode_config( host_name = worker_names[idx] overwrites[idx] = True overwrite_begin_idxs[idx] = i - 1 + # TODO(romilb): Update port number when k8s supports multinode codegens[idx] = cls._get_generated_config( sky_autogen_comment, host_name, @@ -857,12 +859,6 @@ def write_cluster_config( assert cluster_name is not None credentials = sky_check.get_cloud_credential_file_mounts() - k8s_image = None - ssh_key_secret_name = None - if isinstance(cloud, clouds.Kubernetes): - # TODO(romilb): Make this read from image id in the task - k8s_image = cloud.IMAGE - ip_list = None auth_config = {'ssh_private_key': auth.PRIVATE_SSH_KEY_PATH} if isinstance(cloud, clouds.Local): @@ -956,10 +952,6 @@ def write_cluster_config( # GCP only: 'gcp_project_id': gcp_project_id, - # Kubernetes only: - 'skypilot_k8s_image': k8s_image, - 'ssh_key_secret_name': ssh_key_secret_name, - # Port of Ray (GCS server). # Ray's default port 6379 is conflicted with Redis. 'ray_port': constants.SKY_REMOTE_RAY_PORT, @@ -1617,7 +1609,7 @@ def get_head_ssh_port( head_ssh_port = 22 if not isinstance(handle.launched_resources.cloud, clouds.Kubernetes): return head_ssh_port - elif isinstance(handle.launched_resources.cloud, clouds.Kubernetes): + else: if use_cache and handle.head_ssh_port is not None: head_ssh_port = handle.head_ssh_port else: diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index aaad308fd99..c359dd11c43 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2082,7 +2082,7 @@ class CloudVmRayResourceHandle(backends.backend.ResourceHandle): - (optional) Launched resources - (optional) If TPU(s) are managed, a path to a deletion script. """ - _VERSION = 3 + _VERSION = 4 def __init__(self, *, @@ -2193,13 +2193,11 @@ def _update_stable_ssh_ports(self, max_attempts: int = 1) -> None: head_port = backend_utils.get_head_ssh_port( self, use_cache=False, max_attempts=max_attempts) # TODO(romilb): Multinode doesn't work with Kubernetes yet. - worker_ports = [22] * self.launched_nodes + worker_ports = [22] * (self.launched_nodes - 1) ports = [head_port] + worker_ports else: # Use port 22 for other clouds - ext_ips = self.external_ips() - assert ext_ips is not None, ext_ips - ports = [22] * len(ext_ips) + ports = [22] * self.launched_nodes self.stable_ssh_ports = ports def _update_stable_cluster_ips(self, max_attempts: int = 1) -> None: @@ -2297,7 +2295,7 @@ def head_ip(self): @property def head_ssh_port(self): external_ssh_ports = self.external_ssh_ports() - if external_ssh_ports is not None: + if external_ssh_ports: return external_ssh_ports[0] return None @@ -2313,11 +2311,16 @@ def __setstate__(self, state): if version < 3: head_ip = state.pop('head_ip', None) state['stable_internal_external_ips'] = None + if version < 4: + # Version 4 adds self.external_ssh_ports for Kubernetes support + head_ssh_port = state.pop('head_ssh_port', None) + state['stable_ssh_ports'] = None self.__dict__.update(state) - # Because the _update_stable_cluster_ips function uses the handle, - # we call it on the current instance after the state is updated + # Because the _update_stable_cluster_ips and _update_stable_ssh_ports + # functions use the handle, we call it on the current instance + # after the state is updated. if version < 3 and head_ip is not None: try: self._update_stable_cluster_ips() @@ -2325,6 +2328,8 @@ def __setstate__(self, state): # This occurs when an old cluster from was autostopped, # so the head IP in the database is not updated. pass + if version < 4 and head_ssh_port is not None: + self._update_stable_ssh_ports() self._update_cluster_region() @@ -2595,7 +2600,7 @@ def _provision( ssh_credentials = backend_utils.ssh_credential_from_yaml( handle.cluster_yaml) runners = command_runner.SSHCommandRunner.make_runner_list( - ip_list, **ssh_credentials, port_list=ssh_port_list) + ip_list, port_list=ssh_port_list, **ssh_credentials) def _get_zone(runner): retry_count = 0 @@ -3282,7 +3287,7 @@ def sync_down_logs( ssh_credentials = backend_utils.ssh_credential_from_yaml( handle.cluster_yaml) runners = command_runner.SSHCommandRunner.make_runner_list( - ip_list, **ssh_credentials, port_list=ssh_port_list) + ip_list, port_list=ssh_port_list, **ssh_credentials) def _rsync_down(args) -> None: """Rsync down logs from remote nodes. @@ -3914,7 +3919,7 @@ def _set_tpu_name(self, handle: CloudVmRayResourceHandle, handle.cluster_yaml) runners = command_runner.SSHCommandRunner.make_runner_list( - ip_list, **ssh_credentials, port_list=None) + ip_list, **ssh_credentials) def _setup_tpu_name_on_node( runner: command_runner.SSHCommandRunner) -> None: diff --git a/sky/backends/onprem_utils.py b/sky/backends/onprem_utils.py index a0453a92447..0666210e5aa 100644 --- a/sky/backends/onprem_utils.py +++ b/sky/backends/onprem_utils.py @@ -546,7 +546,7 @@ def do_filemounts_and_setup_on_local_workers( setup_script = log_lib.make_task_bash_script('\n'.join(setup_cmds)) worker_runners = command_runner.SSHCommandRunner.make_runner_list( - worker_ips, **ssh_credentials, port_list=None) + worker_ips, **ssh_credentials) # Uploads setup script to the worker node with tempfile.NamedTemporaryFile('w', prefix='sky_setup_') as f: diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index aa5bc9fb02e..6c3b3cadc14 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -134,6 +134,9 @@ class Kubernetes(clouds.Cloud): _REPR = 'Kubernetes' _regions: List[clouds.Region] = [clouds.Region('kubernetes')] _CLOUD_UNSUPPORTED_FEATURES = { + # TODO(romilb): Stopping might be possible to implement with + # container checkpointing introduced in Kubernetes v1.25. See: + # https://kubernetes.io/blog/2022/12/05/forensic-container-checkpointing-alpha/ # pylint: disable=line-too-long clouds.CloudImplementationFeatures.STOP: 'Kubernetes does not ' 'support stopping VMs.', clouds.CloudImplementationFeatures.AUTOSTOP: 'Kubernetes does not ' @@ -169,7 +172,9 @@ def instance_type_to_hourly_cost(self, use_spot: bool, region: Optional[str] = None, zone: Optional[str] = None) -> float: - # Assume zero cost for Kubernetes clusters + # TODO(romilb): Investigate how users can provide their own cost catalog + # for Kubernetes clusters. + # For now, assume zero cost for Kubernetes clusters return 0.0 def accelerators_to_hourly_cost(self, @@ -273,6 +278,7 @@ def make_deploy_resources_variables( 'memory': str(mem), 'timeout': str(self.TIMEOUT), 'k8s_ssh_key_secret_name': self.SKY_SSH_KEY_SECRET_NAME, + 'skypilot_k8s_image': self.SKYPILOT_K8S_IMAGE, } def get_feasible_launchable_resources(self, @@ -338,10 +344,10 @@ def accelerator_in_region_or_zone(self, acc_count: int, region: Optional[str] = None, zone: Optional[str] = None) -> bool: - # TODO(romilb): All accelerators are marked as available for now. In the - # future, we should return false for accelerators that we know are not - # supported by the cluster. - return True + # TODO(romilb): All accelerators are marked as not available for now. + # In the future, we should return false for accelerators that we know + # are not supported by the cluster. + return False @classmethod def query_status(cls, name: str, tag_filters: Dict[str, str], diff --git a/sky/data/sosutil/__init__.py b/sky/data/sosutil/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index f693cc08deb..b53c1164015 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -72,12 +72,10 @@ def bootstrap_kubernetes(config): def fillout_resources_kubernetes(config): - """Fills CPU and GPU resources by reading pod spec of each available node - type. + """Fills CPU and GPU resources in the ray cluster config. For each node type and each of CPU/GPU, looks at container's resources - and limits, takes min of the two. The result is rounded up, as Ray does - not currently support fractional CPU. + and limits, takes min of the two. """ if 'available_node_types' not in config: return config @@ -117,6 +115,7 @@ def get_autodetected_resources(container_data): for resource_name in ['cpu', 'gpu'] } + # TODO(romilb): Update this to allow fractional resources. memory_limits = get_resource(container_resources, 'memory') node_type_resources['memory'] = int(memory_limits) diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 37001457ba8..4577c327823 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -20,7 +20,7 @@ provider: use_internal_ips: false # Namespace to use for all resources created. - # TODO(romilb): Make this configurable. + # TODO(romilb): Make the namespace configurable. namespace: default timeout: {{timeout}} @@ -115,25 +115,28 @@ provider: head_node_type: head_node # Specify the allowed pod types for this ray cluster and the resources they provide. available_node_types: - worker_node: - # Minimum number of Ray workers of this Pod type. - min_workers: {{num_nodes - 1}} - # Maximum number of Ray workers of this Pod type. Takes precedence over min_workers. - max_workers: {{num_nodes - 1}} - # User-specified custom resources for use by Ray. Object with string keys and integer values. - # (Ray detects CPU and GPU from pod spec resource requests and limits, so no need to fill those here.) - # resources: {"example-resource-a": 1, "example-resource-b": 2} + head_node: node_config: apiVersion: v1 kind: Pod metadata: + name: {{cluster_name}}-ray-head + # Must match the head node service selector above if a head node + # service is required. labels: - parent: skypilot - skypilot-cluster: {{cluster_name}} - # Automatically generates a name for the pod with this prefix. - generateName: {{cluster_name}}-ray-worker- + parent: skypilot + component: {{cluster_name}}-ray-head + skypilot-cluster: {{cluster_name}} spec: + # Change this if you altered the autoscaler_service_account above + # or want to provide your own. + serviceAccountName: autoscaler + restartPolicy: Never + + # This volume allocates shared memory for Ray to use for its plasma + # object store. If you do not provide this, Ray will fall back to + # /tmp which cause slowdowns if is not a shared memory volume. volumes: - name: secret-volume secret: @@ -148,14 +151,16 @@ available_node_types: - name: ray-node imagePullPolicy: Always image: {{skypilot_k8s_image}} + # Do not change this command - it keeps the pod alive until it is + # explicitly killed. command: ["/bin/bash", "-c", "--"] - args: ["trap : TERM INT; sleep infinity & wait;"] - lifecycle: - postStart: - exec: - command: ["/bin/bash", "-c", "mkdir -p ~/.ssh && cp /etc/secret-volume/ssh-publickey ~/.ssh/authorized_keys && sudo service ssh restart"] + args: ['trap : TERM INT; sleep infinity & wait;'] ports: - containerPort: 22 # Used for SSH + - containerPort: {{ray_port}} # Redis port + - containerPort: 10001 # Used by Ray Client + - containerPort: {{ray_dashboard_port}} # Used by Ray Dashboard + # This volume allocates shared memory for Ray to use for its plasma # object store. If you do not provide this, Ray will fall back to # /tmp which cause slowdowns if is not a shared memory volume. @@ -165,10 +170,14 @@ available_node_types: mountPath: "/etc/secret-volume" - mountPath: /dev/shm name: dshm - - mountPath: /dev/fuse # Required for fuse mounting + - mountPath: /dev/fuse # Required for FUSE mounting name: dev-fuse - securityContext: # Required for FUSE mounting. TODO(romilb) - evaluate security risk + securityContext: # Required for FUSE mounting, but may be a security risk privileged: true + lifecycle: + postStart: + exec: + command: ["/bin/bash", "-c", "mkdir -p ~/.ssh && cp /etc/secret-volume/ssh-publickey ~/.ssh/authorized_keys && sudo service ssh restart"] resources: requests: cpu: {{cpus}} @@ -183,28 +192,25 @@ available_node_types: # cause problems for other pods. cpu: {{cpus}} memory: {{memory}}G - head_node: + worker_node: + # Minimum number of Ray workers of this Pod type. + min_workers: {{num_nodes - 1}} + # Maximum number of Ray workers of this Pod type. Takes precedence over min_workers. + max_workers: {{num_nodes - 1}} + # User-specified custom resources for use by Ray. Object with string keys and integer values. + # (Ray detects CPU and GPU from pod spec resource requests and limits, so no need to fill those here.) + # resources: {"example-resource-a": 1, "example-resource-b": 2} node_config: apiVersion: v1 kind: Pod metadata: - name: {{cluster_name}}-ray-head - # Must match the head node service selector above if a head node - # service is required. labels: - parent: skypilot - component: {{cluster_name}}-ray-head - skypilot-cluster: {{cluster_name}} + parent: skypilot + skypilot-cluster: {{cluster_name}} + # Automatically generates a name for the pod with this prefix. + generateName: {{cluster_name}}-ray-worker- spec: - # Change this if you altered the autoscaler_service_account above - # or want to provide your own. - serviceAccountName: autoscaler - restartPolicy: Never - - # This volume allocates shared memory for Ray to use for its plasma - # object store. If you do not provide this, Ray will fall back to - # /tmp which cause slowdowns if is not a shared memory volume. volumes: - name: secret-volume secret: @@ -219,16 +225,14 @@ available_node_types: - name: ray-node imagePullPolicy: Always image: {{skypilot_k8s_image}} - # Do not change this command - it keeps the pod alive until it is - # explicitly killed. command: ["/bin/bash", "-c", "--"] - args: ['trap : TERM INT; sleep infinity & wait;'] + args: ["trap : TERM INT; sleep infinity & wait;"] + lifecycle: + postStart: + exec: + command: ["/bin/bash", "-c", "mkdir -p ~/.ssh && cp /etc/secret-volume/ssh-publickey ~/.ssh/authorized_keys && sudo service ssh restart"] ports: - containerPort: 22 # Used for SSH - - containerPort: {{ray_port}} # Redis port - - containerPort: 10001 # Used by Ray Client - - containerPort: {{ray_dashboard_port}} # Used by Ray Dashboard - # This volume allocates shared memory for Ray to use for its plasma # object store. If you do not provide this, Ray will fall back to # /tmp which cause slowdowns if is not a shared memory volume. @@ -238,14 +242,10 @@ available_node_types: mountPath: "/etc/secret-volume" - mountPath: /dev/shm name: dshm - - mountPath: /dev/fuse # Required for FUSE mounting + - mountPath: /dev/fuse # Required for fuse mounting name: dev-fuse - securityContext: # Required for FUSE mounting, but may be a security risk + securityContext: # Required for FUSE mounting. TODO(romilb) - evaluate security risk privileged: true - lifecycle: - postStart: - exec: - command: ["/bin/bash", "-c", "mkdir -p ~/.ssh && cp /etc/secret-volume/ssh-publickey ~/.ssh/authorized_keys && sudo service ssh restart"] resources: requests: cpu: {{cpus}} diff --git a/tests/kubernetes/build_image.sh b/tests/kubernetes/build_image.sh index 55df547f964..9ff1a44c164 100644 --- a/tests/kubernetes/build_image.sh +++ b/tests/kubernetes/build_image.sh @@ -42,4 +42,4 @@ else fi echo "Tagging image as skypilot:latest" -docker tag us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest skypilot:latest \ No newline at end of file +docker tag us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest skypilot:latest diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 804c844fc1f..1a1df5b4d39 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -744,23 +744,6 @@ def test_scp_file_mounts(): run_one_test(test) -@pytest.mark.kubernetes -def test_kubernetes_file_mounts(): - name = _get_cluster_name() - test_commands = [ - *storage_setup_commands, - f'sky launch -y -c {name} --cloud kubernetes --num-nodes 1 examples/using_file_mounts.yaml', - f'sky logs {name} 1 --status', # Ensure the job succeeded. - ] - test = Test( - 'kubernetes_using_file_mounts', - test_commands, - f'sky down -y {name}', - timeout=20 * 60, # 20 mins - ) - run_one_test(test) - - def test_using_file_mounts_with_env_vars(generic_cloud: str): name = _get_cluster_name() test_commands = [ From 0ae92eb9d707ee4a49d4b02288ee33ae33ee7bec Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 5 Jul 2023 13:39:38 -0700 Subject: [PATCH 063/183] comments --- sky/clouds/kubernetes.py | 7 ++++++- sky/skylet/providers/kubernetes/node_provider.py | 3 +++ sky/templates/kubernetes-ray.yml.j2 | 4 ++-- tests/test_smoke.py | 2 ++ 4 files changed, 13 insertions(+), 3 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 6c3b3cadc14..976c54b4a61 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -206,6 +206,10 @@ def get_default_instance_type( disk_tier: Optional[str] = None) -> Optional[str]: del disk_tier # Unused. # TODO(romilb): Allow fractional CPUs and memory + # TODO(romilb): We should check the maximum number of CPUs and memory + # that can be requested, and return None if the requested resources + # exceed the maximum. This may require thought about how to handle + # autoscaling clusters. # We strip '+' from resource requests since Kubernetes can provision # exactly the requested resources. instance_cpus = int( @@ -278,7 +282,8 @@ def make_deploy_resources_variables( 'memory': str(mem), 'timeout': str(self.TIMEOUT), 'k8s_ssh_key_secret_name': self.SKY_SSH_KEY_SECRET_NAME, - 'skypilot_k8s_image': self.SKYPILOT_K8S_IMAGE, + # TODO(romilb): Allow user to specify custom images + 'image_id': self.IMAGE, } def get_feasible_launchable_resources(self, diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index bd9e017a286..7210738c31a 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -285,6 +285,9 @@ def terminate_node(self, node_id): pass def terminate_nodes(self, node_ids): + # TODO(romilb): terminate_nodes should be include optimizations for + # deletion of multiple nodes. Currently, it deletes one node at a time. + # We should look in to using deletecollection here for batch deletion. for node_id in node_ids: self.terminate_node(node_id) diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 4577c327823..0e31fe51c04 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -150,7 +150,7 @@ available_node_types: containers: - name: ray-node imagePullPolicy: Always - image: {{skypilot_k8s_image}} + image: {{image_id}} # Do not change this command - it keeps the pod alive until it is # explicitly killed. command: ["/bin/bash", "-c", "--"] @@ -224,7 +224,7 @@ available_node_types: containers: - name: ray-node imagePullPolicy: Always - image: {{skypilot_k8s_image}} + image: {{image_id}} command: ["/bin/bash", "-c", "--"] args: ["trap : TERM INT; sleep infinity & wait;"] lifecycle: diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 1a1df5b4d39..3373866bfe0 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -712,6 +712,8 @@ def test_file_mounts(generic_cloud: str): extra_flags = '' if generic_cloud in 'kubernetes': # Kubernetes does not support multi-node + # NOTE: This test will fail if you have a Kubernetes cluster running on + # arm64 (e.g., Apple Silicon) since goofys does not work on arm64. extra_flags = '--num-nodes 1' test_commands = [ *storage_setup_commands, From 10f302f18c24cedba1604ef90d6648928031bf75 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 5 Jul 2023 13:42:29 -0700 Subject: [PATCH 064/183] lint --- sky/authentication.py | 1 - sky/backends/cloud_vm_ray_backend.py | 2 +- sky/backends/onprem_utils.py | 2 +- sky/data/sosutil/__init__.py | 0 4 files changed, 2 insertions(+), 3 deletions(-) delete mode 100644 sky/data/sosutil/__init__.py diff --git a/sky/authentication.py b/sky/authentication.py index f0cbb371cd3..5aa5cf61a4c 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -20,7 +20,6 @@ from sky import sky_logging from sky.adaptors import gcp, ibm from sky.utils import common_utils -from sky.utils import env_options from sky.utils import subprocess_utils from sky.utils import ux_utils from sky.skylet.providers.lambda_cloud import lambda_utils diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index c359dd11c43..85ae2775282 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -3919,7 +3919,7 @@ def _set_tpu_name(self, handle: CloudVmRayResourceHandle, handle.cluster_yaml) runners = command_runner.SSHCommandRunner.make_runner_list( - ip_list, **ssh_credentials) + ip_list, port_list=None, **ssh_credentials) def _setup_tpu_name_on_node( runner: command_runner.SSHCommandRunner) -> None: diff --git a/sky/backends/onprem_utils.py b/sky/backends/onprem_utils.py index 0666210e5aa..f95bae75f39 100644 --- a/sky/backends/onprem_utils.py +++ b/sky/backends/onprem_utils.py @@ -546,7 +546,7 @@ def do_filemounts_and_setup_on_local_workers( setup_script = log_lib.make_task_bash_script('\n'.join(setup_cmds)) worker_runners = command_runner.SSHCommandRunner.make_runner_list( - worker_ips, **ssh_credentials) + worker_ips, port_list=None, **ssh_credentials) # Uploads setup script to the worker node with tempfile.NamedTemporaryFile('w', prefix='sky_setup_') as f: diff --git a/sky/data/sosutil/__init__.py b/sky/data/sosutil/__init__.py deleted file mode 100644 index e69de29bb2d..00000000000 From 54b2b280c79e91a8139f3569cc21702e752e7911 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 13 Jul 2023 00:25:06 -0400 Subject: [PATCH 065/183] Remove public key upload --- sky/authentication.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/sky/authentication.py b/sky/authentication.py index ed701db31a6..ba9e4d1a1c7 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -401,12 +401,4 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: logger.error(suffix) raise - # Need to use ~ relative path because Ray uses the same - # path for finding the public key path on both local and head node. - config['auth']['ssh_public_key'] = PUBLIC_SSH_KEY_PATH - - file_mounts = config['file_mounts'] - file_mounts[PUBLIC_SSH_KEY_PATH] = PUBLIC_SSH_KEY_PATH - config['file_mounts'] = file_mounts - return config From fc362b7e533ab5ef0550480411418e93b1146265 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 14 Jul 2023 17:35:47 -0400 Subject: [PATCH 066/183] GPU support init --- sky/clouds/kubernetes.py | 45 +++++++++++++++++++++++------ sky/templates/kubernetes-ray.yml.j2 | 2 ++ tests/kubernetes/README.md | 24 ++++++++++++++- 3 files changed, 61 insertions(+), 10 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 976c54b4a61..6ce021a4850 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -147,6 +147,7 @@ class Kubernetes(clouds.Cloud): 'implementation yet.', } + # TODO(romilb): Add GPU Support - have GPU-enabled image. IMAGE = 'us-central1-docker.pkg.dev/' \ 'skypilot-375900/skypilotk8s/skypilot:latest' @@ -228,7 +229,10 @@ def get_accelerators_from_instance_type( instance_type: str, ) -> Optional[Dict[str, int]]: # TODO(romilb): Add GPU support. - return None + inst = KubernetesInstanceType.from_instance_type(instance_type) + return { + inst.accelerator_type: inst.accelerator_count + } if inst.accelerator_count else None @classmethod def get_vcpus_mem_from_instance_type( @@ -274,17 +278,33 @@ def make_deploy_resources_variables( # We fetch the default values for the instance type in that case. cpus, mem = self.get_vcpus_mem_from_instance_type( resources.instance_type) - return { + acc_count = 0 + acc_type = None + + # Add accelerator variables if they are set. + accelerators = resources.accelerators + if accelerators is not None: + assert len(accelerators) == 1, resources + acc_type, acc_count = list(accelerators.items())[0] + # TODO(romilb): Add accelerator type support. + # For now, hacking back to None + acc_type = None + + vars = { 'instance_type': resources.instance_type, 'custom_resources': custom_resources, 'region': region.name, 'cpus': str(cpus), 'memory': str(mem), + 'accelerator_count': str(acc_count), 'timeout': str(self.TIMEOUT), 'k8s_ssh_key_secret_name': self.SKY_SSH_KEY_SECRET_NAME, # TODO(romilb): Allow user to specify custom images 'image_id': self.IMAGE, } + return vars + + def get_feasible_launchable_resources(self, resources: 'resources_lib.Resources'): @@ -309,21 +329,28 @@ def _make(instance_list): # Currently, handle a filter on accelerators only. accelerators = resources.accelerators + default_instance_type = Kubernetes.get_default_instance_type( + cpus=resources.cpus, + memory=resources.memory, + disk_tier=resources.disk_tier) if accelerators is None: # Return a default instance type with the given number of vCPUs. - default_instance_type = Kubernetes.get_default_instance_type( - cpus=resources.cpus, - memory=resources.memory, - disk_tier=resources.disk_tier) if default_instance_type is None: return ([], []) else: - return (_make([default_instance_type]), []) + return _make([default_instance_type]), [] assert len(accelerators) == 1, resources # If GPUs are requested, return an empty list. # TODO(romilb): Add GPU support. - return ([], []) + acc_type, acc_count = list(accelerators.items())[0] + default_inst = KubernetesInstanceType.from_instance_type(default_instance_type) + instance_type = KubernetesInstanceType.from_resources(int(default_inst.cpus), + int(default_inst.memory), + int(acc_count), + acc_type).name + # No fuzzy lists for Kubernetes + return _make([instance_type]), [] @classmethod def check_credentials(cls) -> Tuple[bool, Optional[str]]: @@ -352,7 +379,7 @@ def accelerator_in_region_or_zone(self, # TODO(romilb): All accelerators are marked as not available for now. # In the future, we should return false for accelerators that we know # are not supported by the cluster. - return False + return True @classmethod def query_status(cls, name: str, tag_filters: Dict[str, str], diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 0e31fe51c04..475f3a1a6e7 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -250,6 +250,7 @@ available_node_types: requests: cpu: {{cpus}} memory: {{memory}}G + nvidia.com/gpu: {{accelerator_count}} limits: # The maximum memory that this pod is allowed to use. The # limit will be detected by ray and split to use 10% for @@ -260,6 +261,7 @@ available_node_types: # cause problems for other pods. cpu: {{cpus}} memory: {{memory}}G + nvidia.com/gpu: {{accelerator_count}} setup_commands: # Disable `unattended-upgrades` to prevent apt-get from hanging. It should be called at the beginning before the process started to avoid being blocked. (This is a temporary fix.) diff --git a/tests/kubernetes/README.md b/tests/kubernetes/README.md index 18dc58e8191..ad68f41f979 100644 --- a/tests/kubernetes/README.md +++ b/tests/kubernetes/README.md @@ -38,4 +38,26 @@ gcloud container clusters get-credentials --region ## Other useful scripts `scripts` directory contains other useful scripts for development, including Kubernetes dashboard, ray yaml for testing the SkyPilot Kubernetes node provider -and more. \ No newline at end of file +and more. + +# GKE GPU support guide + +Create a GKE cluster using the cloud console. Use standard cluster, not autopilot. + +## Install nvidia drivers (if needed) +If you're using GKE and running GKE < 1.27.2-gke.1200, you'll need to manually install nvidia drivers. +```bash +# For ubuntu image: +kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/ubuntu/daemonset-preloaded.yaml +``` + +[Not sure] This will create a resource like `nvidia.com/gpu: 1`. However, we still need labels for GPU type (e.g., A100). + +## Install GPU feature discovery +NOTE - GFD does not work on GKE! https://github.com/NVIDIA/gpu-feature-discovery/issues/44 +We can use Nvidia [gpu-feature-discovery](https://github.com/NVIDIA/gpu-feature-discovery/blob/main/README.md) to detect GPUs on the nodes and automatically label the nodes. + +```bash +kubectl apply -f https://raw.githubusercontent.com/NVIDIA/gpu-feature-discovery/v0.8.1/deployments/static/nfd.yaml +kubectl apply -f https://raw.githubusercontent.com/NVIDIA/gpu-feature-discovery/v0.8.1/deployments/static/gpu-feature-discovery-daemonset.yaml +``` \ No newline at end of file From 36f9ebc3089d37c6319fd6b162665192f945321b Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sat, 15 Jul 2023 16:53:20 -0700 Subject: [PATCH 067/183] wip --- Dockerfile_k8s_gpu | 49 ++++++++++++++++++++++++++++++++ sky/clouds/kubernetes.py | 2 +- tests/kubernetes/build_image.sh | 50 +++++++++++++++++++++++++-------- 3 files changed, 88 insertions(+), 13 deletions(-) create mode 100644 Dockerfile_k8s_gpu diff --git a/Dockerfile_k8s_gpu b/Dockerfile_k8s_gpu new file mode 100644 index 00000000000..91a8b5aeb70 --- /dev/null +++ b/Dockerfile_k8s_gpu @@ -0,0 +1,49 @@ +FROM rayproject/ray:2.4.0-gpu + +# Initialize conda for root user, install ssh and other local dependencies +RUN sudo apt update -y && \ + sudo apt install gcc rsync sudo patch openssh-server pciutils nano fuse -y && \ + sudo rm -rf /var/lib/apt/lists/* && \ + sudo apt remove -y python3 && \ + conda init + +# Setup SSH and generate hostkeys +RUN sudo mkdir -p /var/run/sshd && \ + sudo chmod 0755 /var/run/sshd && \ + sudo sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ + sudo sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd && \ + cd /etc/ssh/ && \ + ssh-keygen -A + +# Setup new user named sky and add to sudoers. Also add /opt/conda/bin to sudo path. +RUN sudo useradd -m -s /bin/bash sky && \ + sudo /bin/bash -c 'echo "sky ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers' && \ + sudo /bin/bash -c "echo 'Defaults secure_path=\"/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"' > /etc/sudoers.d/sky" + + +# Switch to sky user +USER sky + +# Install SkyPilot pip dependencies +RUN pip install wheel Click colorama cryptography jinja2 jsonschema && \ + pip install networkx oauth2client pandas pendulum PrettyTable && \ + pip install rich tabulate filelock && \ + pip install packaging 'protobuf<4.0.0' pulp && \ + pip install awscli boto3 pycryptodome==3.12.0 && \ + pip install docker kubernetes + +# Add /home/sky/.local/bin/ to PATH +RUN echo 'export PATH="$PATH:$HOME/.local/bin"' >> ~/.bashrc + +# Install SkyPilot. This is purposely separate from installing SkyPilot +# dependencies to optimize rebuild time +COPY --chown=sky . /skypilot/sky/ + +# TODO(romilb): Installing SkyPilot may not be necessary since ray up will do it +RUN cd /skypilot/ && \ + sudo mv -v sky/setup_files/* . && \ + pip install ".[aws]" + +# Set WORKDIR and initialize conda for sky user +WORKDIR /home/sky +RUN conda init diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 6ce021a4850..3e900373083 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -147,7 +147,7 @@ class Kubernetes(clouds.Cloud): 'implementation yet.', } - # TODO(romilb): Add GPU Support - have GPU-enabled image. + # TODO(romilb): Add GPU Support - have GPU-enabled image.sky IMAGE = 'us-central1-docker.pkg.dev/' \ 'skypilot-375900/skypilotk8s/skypilot:latest' diff --git a/tests/kubernetes/build_image.sh b/tests/kubernetes/build_image.sh index 9ff1a44c164..36167101027 100644 --- a/tests/kubernetes/build_image.sh +++ b/tests/kubernetes/build_image.sh @@ -1,32 +1,52 @@ #!/bin/bash # Builds the Dockerfile_k8s image as the SkyPilot image. -# Optionally, if -p is specified, pushes the image to the registry. # Uses buildx to build the image for both amd64 and arm64. -# Usage: ./build_image.sh [-p] +# If -p flag is specified, pushes the image to the registry. +# If -g flag is specified, builds the GPU image in Dockerfile_k8s_gpu. GPU image is built only for amd64. +# Usage: ./build_image.sh [-p] [-g] # -p: Push the image to the registry +# -g: Build the GPU image TAG=us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest # Parse command line arguments -while getopts ":p" opt; do - case $opt in - p) +while getopts ":pg" opt; do + case ${opt} in + p ) push=true ;; - \?) - echo "Invalid option: -$OPTARG" >&2 + g ) + gpu=true + ;; + \? ) + echo "Usage: ./build_image.sh [-p] [-g]" + echo "-p: Push the image to the registry" + echo "-g: Build the GPU image" + exit 1 ;; esac done +# Add -gpu to the tag if the GPU image is being built +if [[ $gpu ]]; then + TAG=$TAG-gpu +fi + # Navigate to the root of the project (inferred from git) cd "$(git rev-parse --show-toplevel)" # If push is used, build the image for both amd64 and arm64 if [[ $push ]]; then - echo "Building and pushing for amd64 and arm64" - # Push both platforms as one image manifest list - docker buildx build --push --platform linux/amd64,linux/arm64 -t $TAG -f Dockerfile_k8s ./sky + # If gpu is used, build the GPU image + if [[ $gpu ]]; then + echo "Building and pushing GPU image for amd64" + docker buildx build --push --platform linux/amd64 -t $TAG -f Dockerfile_k8s_gpu ./sky + fi + # Else, build the CPU image + else + echo "Building and pushing CPU image for amd64 and arm64" + docker buildx build --push --platform linux/arm64,linux/amd64 -t $TAG -f Dockerfile_k8s ./sky + fi fi # Load the right image depending on the architecture of the host machine (Apple Silicon or Intel) @@ -41,5 +61,11 @@ else exit 1 fi -echo "Tagging image as skypilot:latest" -docker tag us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest skypilot:latest +echo "Tagging image." +if [[ $gpu ]]; then + docker tag $TAG skypilot:latest-gpu +else + docker tag $TAG skypilot:latest +fi + +docker buildx build --push --platform linux/amd64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest-gpu -f Dockerfile_k8s_gpu ./sky \ No newline at end of file From 5ee821d037e395058c459b812554f16bff76e414 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sat, 15 Jul 2023 16:54:53 -0700 Subject: [PATCH 068/183] add shebang --- sky/utils/kubernetes/create_cluster.sh | 1 + sky/utils/kubernetes/delete_cluster.sh | 1 + 2 files changed, 2 insertions(+) diff --git a/sky/utils/kubernetes/create_cluster.sh b/sky/utils/kubernetes/create_cluster.sh index 574c1fdd9e9..a3a12816efa 100644 --- a/sky/utils/kubernetes/create_cluster.sh +++ b/sky/utils/kubernetes/create_cluster.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Creates a local Kubernetes cluster using kind # Usage: ./create_cluster.sh # Invokes generate_kind_config.py to generate a kind-cluster.yaml with NodePort mappings diff --git a/sky/utils/kubernetes/delete_cluster.sh b/sky/utils/kubernetes/delete_cluster.sh index 0256f43b8f3..25c0afc8a9e 100644 --- a/sky/utils/kubernetes/delete_cluster.sh +++ b/sky/utils/kubernetes/delete_cluster.sh @@ -1,3 +1,4 @@ +#!/bin/bash # Deletes the local kind cluster # Usage: ./delete_cluster.sh # Raises error code 100 if the local cluster does not exist From d6ca85ab4ea60747ba3631711f4c99346f000882 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 16 Jul 2023 12:31:07 -0700 Subject: [PATCH 069/183] comments --- sky/backends/cloud_vm_ray_backend.py | 5 ++--- sky/clouds/kubernetes.py | 2 +- sky/skylet/providers/kubernetes/utils.py | 5 +++++ sky/templates/kubernetes-ray.yml.j2 | 4 ++-- sky/utils/kubernetes/create_cluster.sh | 2 +- sky/utils/kubernetes/delete_cluster.sh | 2 +- tests/test_smoke.py | 6 +++--- ..._storage_mounting.yaml => test_storage_mounting.yaml.j2} | 0 8 files changed, 15 insertions(+), 11 deletions(-) rename tests/test_yamls/{test_storage_mounting.yaml => test_storage_mounting.yaml.j2} (100%) diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index f646412ab3f..b1b0b5701df 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2314,8 +2314,7 @@ def __setstate__(self, state): head_ip = state.pop('head_ip', None) state['stable_internal_external_ips'] = None if version < 4: - # Version 4 adds self.external_ssh_ports for Kubernetes support - head_ssh_port = state.pop('head_ssh_port', None) + # Version 4 adds self.stable_ssh_ports for Kubernetes support state['stable_ssh_ports'] = None self.__dict__.update(state) @@ -2330,7 +2329,7 @@ def __setstate__(self, state): # This occurs when an old cluster from was autostopped, # so the head IP in the database is not updated. pass - if version < 4 and head_ssh_port is not None: + if version < 4: self._update_stable_ssh_ports() self._update_cluster_region() diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 976c54b4a61..3fa1e1fb9bd 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -288,7 +288,7 @@ def make_deploy_resources_variables( def get_feasible_launchable_resources(self, resources: 'resources_lib.Resources'): - if resources.use_spot: + if resources.use_spot or resources.disk_tier is not None: return ([], []) fuzzy_candidate_list: List[str] = [] if resources.instance_type is not None: diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index eb6c9b5fbac..444cfb702e2 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -29,6 +29,11 @@ def check_credentials(timeout: int = 3) -> Tuple[bool, Optional[str]]: try: kubernetes.core_api().list_namespace(_request_timeout=timeout) return True, None + except ImportError: + # TODO(romilb): Update these error strs to also include link to docs + # when docs are ready. + return False, f'`kubernetes` package is not installed. ' \ + f'Install it with: pip install kubernetes' except kubernetes.api_exception() as e: # Check if the error is due to invalid credentials if e.status == 401: diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 0e31fe51c04..e36afdbc65f 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -149,7 +149,7 @@ available_node_types: path: /dev/fuse containers: - name: ray-node - imagePullPolicy: Always + imagePullPolicy: IfNotPresent image: {{image_id}} # Do not change this command - it keeps the pod alive until it is # explicitly killed. @@ -223,7 +223,7 @@ available_node_types: path: /dev/fuse containers: - name: ray-node - imagePullPolicy: Always + imagePullPolicy: IfNotPresent image: {{image_id}} command: ["/bin/bash", "-c", "--"] args: ["trap : TERM INT; sleep infinity & wait;"] diff --git a/sky/utils/kubernetes/create_cluster.sh b/sky/utils/kubernetes/create_cluster.sh index a3a12816efa..8915143ace8 100644 --- a/sky/utils/kubernetes/create_cluster.sh +++ b/sky/utils/kubernetes/create_cluster.sh @@ -40,4 +40,4 @@ kind load docker-image --name skypilot us-central1-docker.pkg.dev/skypilot-37590 # Print CPUs available on the local cluster NUM_CPUS=$(kubectl get nodes -o jsonpath='{.items[0].status.capacity.cpu}') echo "Kubernetes cluster ready! Run `sky check` to setup Kubernetes access." -echo "Number of CPUs available on the local cluster: $NUM_CPUS" \ No newline at end of file +echo "Number of CPUs available on the local cluster: $NUM_CPUS" diff --git a/sky/utils/kubernetes/delete_cluster.sh b/sky/utils/kubernetes/delete_cluster.sh index 25c0afc8a9e..576f89ad90f 100644 --- a/sky/utils/kubernetes/delete_cluster.sh +++ b/sky/utils/kubernetes/delete_cluster.sh @@ -23,4 +23,4 @@ if ! kind get clusters | grep -q skypilot; then fi kind delete cluster --name skypilot -echo "Local cluster deleted!" \ No newline at end of file +echo "Local cluster deleted!" diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 3373866bfe0..c965970a12f 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -774,7 +774,7 @@ def test_aws_storage_mounts(): name = _get_cluster_name() storage_name = f'sky-test-{int(time.time())}' template_str = pathlib.Path( - 'tests/test_yamls/test_storage_mounting.yaml').read_text() + 'tests/test_yamls/test_storage_mounting.yaml.j2').read_text() template = jinja2.Template(template_str) content = template.render(storage_name=storage_name) with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as f: @@ -801,7 +801,7 @@ def test_gcp_storage_mounts(): name = _get_cluster_name() storage_name = f'sky-test-{int(time.time())}' template_str = pathlib.Path( - 'tests/test_yamls/test_storage_mounting.yaml').read_text() + 'tests/test_yamls/test_storage_mounting.yaml.j2').read_text() template = jinja2.Template(template_str) content = template.render(storage_name=storage_name) with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as f: @@ -831,7 +831,7 @@ def test_kubernetes_storage_mounts(): name = _get_cluster_name() storage_name = f'sky-test-{int(time.time())}' template_str = pathlib.Path( - 'tests/test_yamls/test_storage_mounting.yaml').read_text() + 'tests/test_yamls/test_storage_mounting.yaml.j2').read_text() template = jinja2.Template(template_str) content = template.render(storage_name=storage_name) with tempfile.NamedTemporaryFile(suffix='.yaml', mode='w') as f: diff --git a/tests/test_yamls/test_storage_mounting.yaml b/tests/test_yamls/test_storage_mounting.yaml.j2 similarity index 100% rename from tests/test_yamls/test_storage_mounting.yaml rename to tests/test_yamls/test_storage_mounting.yaml.j2 From fbae4bf45ff849ec6f85b74a020ccff0650f1e56 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 16 Jul 2023 12:38:18 -0700 Subject: [PATCH 070/183] change permissions --- sky/utils/kubernetes/create_cluster.sh | 0 sky/utils/kubernetes/delete_cluster.sh | 0 2 files changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 sky/utils/kubernetes/create_cluster.sh mode change 100644 => 100755 sky/utils/kubernetes/delete_cluster.sh diff --git a/sky/utils/kubernetes/create_cluster.sh b/sky/utils/kubernetes/create_cluster.sh old mode 100644 new mode 100755 diff --git a/sky/utils/kubernetes/delete_cluster.sh b/sky/utils/kubernetes/delete_cluster.sh old mode 100644 new mode 100755 From 6e9e6ba227afd1f4250290cbbd2021cde1994b5c Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 16 Jul 2023 12:42:21 -0700 Subject: [PATCH 071/183] remove chmod --- sky/cli.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/sky/cli.py b/sky/cli.py index 53757edba76..d658e7160e1 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4432,7 +4432,6 @@ def local_up(): path_to_package = os.path.dirname(os.path.dirname(__file__)) up_script_path = os.path.join(path_to_package, 'sky/utils/kubernetes', 'create_cluster.sh') - subprocess_utils.run_no_outputs('chmod +x {}'.format(up_script_path)) # Get directory of script and run it from there cwd = os.path.dirname(os.path.abspath(up_script_path)) # Run script and don't print output @@ -4480,7 +4479,6 @@ def local_down(): path_to_package = os.path.dirname(os.path.dirname(__file__)) down_script_path = os.path.join(path_to_package, 'sky/utils/kubernetes', 'delete_cluster.sh') - subprocess_utils.run_no_outputs('chmod +x {}'.format(down_script_path)) try: subprocess_utils.run(down_script_path, capture_output=True) cluster_removed = True From a3f827eb67fb3e2b919569a31598c2b4c16b7435 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 16 Jul 2023 12:56:19 -0700 Subject: [PATCH 072/183] merge 2241 --- sky/templates/kubernetes-ray.yml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index e36afdbc65f..f256d54ae3f 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -272,7 +272,7 @@ setup_commands: pip3 --version > /dev/null 2>&1 || (curl -sSL https://bootstrap.pypa.io/get-pip.py -o get-pip.py && python3 get-pip.py && echo "PATH=$HOME/.local/bin:$PATH" >> ~/.bashrc); (type -a python | grep -q python3) || echo 'alias python=python3' >> ~/.bashrc; (type -a pip | grep -q pip3) || echo 'alias pip=pip3' >> ~/.bashrc; - which conda > /dev/null 2>&1 || (wget -nc https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && bash Miniconda3-latest-Linux-x86_64.sh -b && eval "$(~/miniconda3/bin/conda shell.bash hook)" && conda init && conda config --set auto_activate_base true); + {{ conda_installation_commands }} source ~/.bashrc; mkdir -p ~/sky_workdir && mkdir -p ~/.sky/sky_app && touch ~/.sudo_as_admin_successful; (pip3 list | grep skypilot && [ "$(cat {{sky_remote_path}}/current_sky_wheel_hash)" == "{{sky_wheel_hash}}" ]) || (pip3 uninstall skypilot -y; pip3 install "$(echo {{sky_remote_path}}/{{sky_wheel_hash}}/skypilot-{{sky_version}}*.whl)" && echo "{{sky_wheel_hash}}" > {{sky_remote_path}}/current_sky_wheel_hash || exit 1); From 9687ea8b5df075a2909b8c4eb8b7a4300557649b Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 16 Jul 2023 12:58:56 -0700 Subject: [PATCH 073/183] add todo --- Dockerfile_k8s | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Dockerfile_k8s b/Dockerfile_k8s index 42e97206a2f..12dbaa9006c 100644 --- a/Dockerfile_k8s +++ b/Dockerfile_k8s @@ -1,5 +1,8 @@ FROM continuumio/miniconda3:22.11.1 +# TODO(romilb): Investigate if this image can be consolidated with the skypilot +# client image (`Dockerfile`) + # Initialize conda for root user, install ssh and other local dependencies RUN apt update -y && \ apt install gcc rsync sudo patch openssh-server pciutils nano fuse -y && \ From 4b5455537531b82e9a430348cc92d0f911d31b76 Mon Sep 17 00:00:00 2001 From: Hemil Desai Date: Wed, 19 Jul 2023 15:58:52 -0700 Subject: [PATCH 074/183] Handle kube config management for sky local commands (#2253) * Set current-context (if availablee) after sky local down and remove incorrect prompt in sky local up * Warn user of kubeconfig context switch during sky local up * Use Optional instead of Union --- sky/cli.py | 14 +++++++------- sky/skylet/providers/kubernetes/utils.py | 15 +++++++++++++++ sky/utils/kubernetes/delete_cluster.sh | 7 +++++++ 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/sky/cli.py b/sky/cli.py index d658e7160e1..96d826eb5ba 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4421,13 +4421,13 @@ def local_up(): cluster_created = False # Check if ~/.kube/config exists: if os.path.exists(os.path.expanduser('~/.kube/config')): - # Check if kubeconfig is valid, `kind delete` leaves an empty kubeconfig - valid, reason = kubernetes_utils.check_credentials() - if valid or (not valid and 'Invalid configuration' not in reason): - # Could be a valid kubeconfig or a non-empty but non-functioning - # kubeconfig - check if user wants to overwrite it - prompt = 'Cluster config found at ~/.kube/config. Overwrite it?' - click.confirm(prompt, default=True, abort=True, show_default=True) + current_context = kubernetes_utils.get_current_kube_config_context() + skypilot_context = "kind-skypilot" + if current_context is not None and current_context != skypilot_context: + click.echo( + f'Current context in kube config: {current_context}' + '\nWill automatically switch to kind-skypilot after the local cluster is created.' + ) with log_utils.safe_rich_status('Creating local cluster...'): path_to_package = os.path.dirname(os.path.dirname(__file__)) up_script_path = os.path.join(path_to_package, 'sky/utils/kubernetes', diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 444cfb702e2..5e8082c223a 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -66,3 +66,18 @@ def get_cluster_status(cluster_name: str, cluster_status.append(status_lib.ClusterStatus.INIT) # If pods are not found, we don't add them to the return list return cluster_status + + +def get_current_kube_config_context() -> Optional[str]: + """ + Get the current kubernetes context from the kubeconfig file + + Returns: + str | None: The current kubernetes context if it exists, None otherwise + """ + k8s = kubernetes.get_kubernetes() + try: + _, current_context = k8s.config.list_kube_config_contexts() + return current_context['name'] + except k8s.config.config_exception.ConfigException: + return None diff --git a/sky/utils/kubernetes/delete_cluster.sh b/sky/utils/kubernetes/delete_cluster.sh index 576f89ad90f..1f93270f414 100755 --- a/sky/utils/kubernetes/delete_cluster.sh +++ b/sky/utils/kubernetes/delete_cluster.sh @@ -24,3 +24,10 @@ fi kind delete cluster --name skypilot echo "Local cluster deleted!" + +# Switch to the first available context +AVAILABLE_CONTEXT=$(kubectl config get-contexts -o name | head -n 1) +if [ ! -z "$AVAILABLE_CONTEXT" ]; then + echo "Switching to context $AVAILABLE_CONTEXT" + kubectl config use-context $AVAILABLE_CONTEXT +fi From f73f1b25d5d970ceef6ead6b92035242dd1f6d89 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 19 Jul 2023 15:59:44 -0700 Subject: [PATCH 075/183] Switch context in create_cluster if cluster already exists. --- sky/utils/kubernetes/create_cluster.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sky/utils/kubernetes/create_cluster.sh b/sky/utils/kubernetes/create_cluster.sh index 8915143ace8..c5b74f6819d 100755 --- a/sky/utils/kubernetes/create_cluster.sh +++ b/sky/utils/kubernetes/create_cluster.sh @@ -23,6 +23,8 @@ fi # Check if the local cluster already exists if kind get clusters | grep -q skypilot; then echo "Local cluster already exists. Exiting." + # Switch context to the local cluster + kubectl config use-context kind-skypilot exit 100 fi From a69df01b3a48ef92c6647114a720eb7728f7ae41 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 20 Jul 2023 15:13:58 -0700 Subject: [PATCH 076/183] fix typo --- sky/clouds/kubernetes.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 3fa1e1fb9bd..edaddb609a6 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -145,6 +145,13 @@ class Kubernetes(clouds.Cloud): 'supported by the ' 'Kubernetes ' 'implementation yet.', + clouds.CloudImplementationFeatures.SPOT_INSTANCE: 'Spot instances are ' + 'not supported in ' + 'Kubernetes.', + clouds.CloudImplementationFeatures.CUSTOM_DOSK_TIER: 'Custom disk ' + 'tiers are not ' + 'supported in ' + 'Kubernetes.', } IMAGE = 'us-central1-docker.pkg.dev/' \ From 6a931e214d6892ff14c7118699eae262d2c29cf2 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 20 Jul 2023 16:32:24 -0700 Subject: [PATCH 077/183] update sky check error msg after sky local down --- sky/adaptors/kubernetes.py | 27 ++++++++++++++++-------- sky/clouds/kubernetes.py | 6 ++---- sky/skylet/providers/kubernetes/utils.py | 2 ++ 3 files changed, 22 insertions(+), 13 deletions(-) diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py index 819a42345ab..7348196c03a 100644 --- a/sky/adaptors/kubernetes.py +++ b/sky/adaptors/kubernetes.py @@ -15,7 +15,6 @@ _networking_api = None _custom_objects_api = None - def import_package(func): @functools.wraps(func) @@ -52,15 +51,25 @@ def _load_config(): try: kubernetes.config.load_kube_config() except kubernetes.config.config_exception.ConfigException as e: - with ux_utils.print_exception_no_traceback(): - suffix = '' - if env_options.Options.SHOW_DEBUG_INFO.get(): - suffix += f' Error: {str(e)}' - raise ValueError( + suffix = '' + if env_options.Options.SHOW_DEBUG_INFO.get(): + suffix += f' Error: {str(e)}' + # Check if exception was due to no current-context + if 'Expected key current-context' in str(e): + err_str = ( 'Failed to load Kubernetes configuration. ' - f'Please check your kubeconfig file is it valid. {suffix}' - ) from None - + 'Kubeconfig does not contain any valid context(s).' + f'{suffix}\n' + ' If you were running a local Kubernetes ' + 'cluster, run `sky local up` to start the cluster.' + ) + else: + err_str = ( + 'Failed to load Kubernetes configuration. ' + f'Please check if your kubeconfig file is valid.{suffix}' + ) + with ux_utils.print_exception_no_traceback(): + raise ValueError(err_str) from None _configured = True diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index edaddb609a6..105379dd665 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -148,7 +148,7 @@ class Kubernetes(clouds.Cloud): clouds.CloudImplementationFeatures.SPOT_INSTANCE: 'Spot instances are ' 'not supported in ' 'Kubernetes.', - clouds.CloudImplementationFeatures.CUSTOM_DOSK_TIER: 'Custom disk ' + clouds.CloudImplementationFeatures.CUSTOM_DISK_TIER: 'Custom disk ' 'tiers are not ' 'supported in ' 'Kubernetes.', @@ -293,10 +293,8 @@ def make_deploy_resources_variables( 'image_id': self.IMAGE, } - def get_feasible_launchable_resources(self, + def _get_feasible_launchable_resources(self, resources: 'resources_lib.Resources'): - if resources.use_spot or resources.disk_tier is not None: - return ([], []) fuzzy_candidate_list: List[str] = [] if resources.instance_type is not None: assert resources.is_launchable(), resources diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 5e8082c223a..a27df94abaf 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -47,6 +47,8 @@ def check_credentials(timeout: int = 3) -> Tuple[bool, Optional[str]]: return False, 'Failed to communicate with the cluster - timeout. ' \ 'Check if your cluster is running and your network ' \ 'is stable.' + except ValueError as e: + return False, str(e) except Exception as e: return False, f'An error occurred: {str(e)}' From 662e4b946a6ac8ae4833baf5b378ed78c7bbe1dc Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 20 Jul 2023 16:35:24 -0700 Subject: [PATCH 078/183] lint --- sky/adaptors/kubernetes.py | 16 +++++++--------- sky/cli.py | 6 +++--- sky/clouds/kubernetes.py | 4 ++-- 3 files changed, 12 insertions(+), 14 deletions(-) diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py index 7348196c03a..7409603558c 100644 --- a/sky/adaptors/kubernetes.py +++ b/sky/adaptors/kubernetes.py @@ -15,6 +15,7 @@ _networking_api = None _custom_objects_api = None + def import_package(func): @functools.wraps(func) @@ -56,18 +57,15 @@ def _load_config(): suffix += f' Error: {str(e)}' # Check if exception was due to no current-context if 'Expected key current-context' in str(e): - err_str = ( - 'Failed to load Kubernetes configuration. ' - 'Kubeconfig does not contain any valid context(s).' - f'{suffix}\n' - ' If you were running a local Kubernetes ' - 'cluster, run `sky local up` to start the cluster.' - ) + err_str = ('Failed to load Kubernetes configuration. ' + 'Kubeconfig does not contain any valid context(s).' + f'{suffix}\n' + ' If you were running a local Kubernetes ' + 'cluster, run `sky local up` to start the cluster.') else: err_str = ( 'Failed to load Kubernetes configuration. ' - f'Please check if your kubeconfig file is valid.{suffix}' - ) + f'Please check if your kubeconfig file is valid.{suffix}') with ux_utils.print_exception_no_traceback(): raise ValueError(err_str) from None _configured = True diff --git a/sky/cli.py b/sky/cli.py index 96d826eb5ba..16ee8c72767 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4422,12 +4422,12 @@ def local_up(): # Check if ~/.kube/config exists: if os.path.exists(os.path.expanduser('~/.kube/config')): current_context = kubernetes_utils.get_current_kube_config_context() - skypilot_context = "kind-skypilot" + skypilot_context = 'kind-skypilot' if current_context is not None and current_context != skypilot_context: click.echo( f'Current context in kube config: {current_context}' - '\nWill automatically switch to kind-skypilot after the local cluster is created.' - ) + '\nWill automatically switch to kind-skypilot after the local ' + 'cluster is created.') with log_utils.safe_rich_status('Creating local cluster...'): path_to_package = os.path.dirname(os.path.dirname(__file__)) up_script_path = os.path.join(path_to_package, 'sky/utils/kubernetes', diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 105379dd665..bc3f44d44bd 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -293,8 +293,8 @@ def make_deploy_resources_variables( 'image_id': self.IMAGE, } - def _get_feasible_launchable_resources(self, - resources: 'resources_lib.Resources'): + def _get_feasible_launchable_resources( + self, resources: 'resources_lib.Resources'): fuzzy_candidate_list: List[str] = [] if resources.instance_type is not None: assert resources.is_launchable(), resources From 4046749d4feb6766b3fcd56e744bb8c43e8a85e6 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 20 Jul 2023 22:31:41 -0700 Subject: [PATCH 079/183] update timeout check --- sky/clouds/kubernetes.py | 9 ++++++- .../providers/kubernetes/node_provider.py | 26 +++++++++++-------- 2 files changed, 23 insertions(+), 12 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index bc3f44d44bd..fc52348347e 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -126,8 +126,15 @@ class Kubernetes(clouds.Cloud): SKY_SSH_KEY_SECRET_NAME = f'sky-ssh-{common_utils.get_user_hash()}' + # Timeout for resource provisioning. This timeout determines how long to + # wait for pod to be in pending status before giving up. + # Larger timeout may be required for autoscaling clusters, since autoscaler + # may take some time to provision new nodes. + # Note that this timeout includes time taken by the Kubernetes scheduler + # itself, which can be upto 2-3 seconds. + # For non-autoscaling clusters, we conservatively set this to 10s. # TODO(romilb): Make the timeout configurable. - TIMEOUT = 60 # Timeout for resource provisioning + TIMEOUT = 10 _DEFAULT_NUM_VCPUS = 2 _DEFAULT_MEMORY_CPU_RATIO = 1 diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 7210738c31a..099663848bc 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -235,25 +235,29 @@ def create_node(self, node_config, tags, count): 'Cluster may be out of resources or ' 'may be too slow to autoscale.') all_ready = True + for node in new_nodes: pod = kubernetes.core_api().read_namespaced_pod( node.metadata.name, self.namespace) if pod.status.phase == 'Pending': - # Check conditions for more detailed status - if pod.status.conditions is not None: - for condition in pod.status.conditions: - if condition.reason == 'ContainerCreating': - # Container is creating, so we can assume resources - # have been allocated. Safe to exit. - break + # Iterate over each pod to check their status + if pod.status.container_statuses is not None: + for container_status in pod.status.container_statuses: + # Continue if container status is ContainerCreating + # This indicates this pod has been scheduled. + if container_status.state.waiting is not None and container_status.state.waiting.reason == 'ContainerCreating': + continue else: - # Pod is pending but not in 'ContainerCreating' state + # If the container wasn't in creating state, + # then we know pod wasn't scheduled or had some + # other error, such as image pull error. + # See list of possible reasons for waiting here: + # https://stackoverflow.com/a/57886025 all_ready = False - break else: - # No conditions also indicates that the pod is pending + # If container_statuses is None, then the pod hasn't + # been scheduled yet. all_ready = False - break if all_ready: break time.sleep(1) From 92d588d7ebfe3f815f56554eb07a8c5c8106cfaf Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 21 Jul 2023 09:32:19 -0700 Subject: [PATCH 080/183] fix import error --- sky/adaptors/kubernetes.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py index 7409603558c..22bcc825baa 100644 --- a/sky/adaptors/kubernetes.py +++ b/sky/adaptors/kubernetes.py @@ -27,8 +27,11 @@ def wrapper(*args, **kwargs): import kubernetes as _kubernetes import urllib3 as _urllib3 except ImportError: + # TODO(romilb): Update this message to point to installation + # docs when they are ready. raise ImportError('Fail to import dependencies for Kubernetes. ' - 'See README for how to install it.') from None + 'Run `pip install kubernetes` to ' + 'install them.') from None kubernetes = _kubernetes urllib3 = _urllib3 return func(*args, **kwargs) From 9ff1662d0eb2e4c24cbe20b9d0cfe9b437527c26 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 21 Jul 2023 14:31:22 -0700 Subject: [PATCH 081/183] Fix kube API access from within cluster (load_incluster_auth) --- sky/adaptors/kubernetes.py | 8 +++++ sky/cli.py | 2 +- sky/skylet/providers/kubernetes/config.py | 34 ++----------------- .../providers/kubernetes/node_provider.py | 5 +-- sky/skylet/providers/kubernetes/utils.py | 25 ++++++++++++-- sky/templates/kubernetes-ray.yml.j2 | 12 +++---- 6 files changed, 43 insertions(+), 43 deletions(-) diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py index 22bcc825baa..940b71c97e2 100644 --- a/sky/adaptors/kubernetes.py +++ b/sky/adaptors/kubernetes.py @@ -3,6 +3,7 @@ # pylint: disable=import-outside-toplevel import functools +import os from sky.utils import ux_utils, env_options @@ -50,6 +51,13 @@ def _load_config(): if _configured: return try: + # Load in-cluster config if running in a pod + # Kubernetes set environment variables for service discovery do not + # show up in SkyPilot tasks. For now, we work around by using + # DNS name instead of environment variables. + # See issue: https://github.com/skypilot-org/skypilot/issues/2287 + os.environ['KUBERNETES_SERVICE_HOST'] = 'kubernetes.default.svc' + os.environ['KUBERNETES_SERVICE_PORT'] = '443' kubernetes.config.load_incluster_config() except kubernetes.config.config_exception.ConfigException: try: diff --git a/sky/cli.py b/sky/cli.py index 16ee8c72767..1e8cb150f27 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4421,7 +4421,7 @@ def local_up(): cluster_created = False # Check if ~/.kube/config exists: if os.path.exists(os.path.expanduser('~/.kube/config')): - current_context = kubernetes_utils.get_current_kube_config_context() + current_context = kubernetes_utils.get_current_kube_config_context_name() skypilot_context = 'kind-skypilot' if current_context is not None and current_context != skypilot_context: click.echo( diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index b53c1164015..c239e29c305 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -4,6 +4,7 @@ import re from sky.adaptors import kubernetes +from sky.skylet.providers.kubernetes import utils logger = logging.getLogger(__name__) @@ -55,10 +56,7 @@ def not_provided_msg(resource_type): def bootstrap_kubernetes(config): - if config['provider'].get('_operator'): - namespace = config['provider']['namespace'] - else: - namespace = _configure_namespace(config['provider']) + namespace = utils.get_current_kube_config_context_namespace() _configure_services(namespace, config['provider']) @@ -186,34 +184,6 @@ def _parse_memory_resource(resource): return float(number) * MEMORY_SIZE_UNITS[unit_index] -def _configure_namespace(provider_config): - namespace_field = 'namespace' - if namespace_field not in provider_config: - raise ValueError('Must specify namespace in Kubernetes config.') - - namespace = provider_config[namespace_field] - field_selector = f'metadata.name={namespace}' - try: - namespaces = kubernetes.core_api().list_namespace( - field_selector=field_selector).items - except kubernetes.api_exception(): - logger.warning(log_prefix + - not_checking_msg(namespace_field, namespace)) - return namespace - - if len(namespaces) > 0: - assert len(namespaces) == 1 - logger.info(log_prefix + using_existing_msg(namespace_field, namespace)) - return namespace - - logger.info(log_prefix + not_found_msg(namespace_field, namespace)) - namespace_config = kubernetes.client.V1Namespace( - metadata=kubernetes.client.V1ObjectMeta(name=namespace)) - kubernetes.core_api().create_namespace(namespace_config) - logger.info(log_prefix + created_msg(namespace_field, namespace)) - return namespace - - def _configure_autoscaler_service_account(namespace, provider_config): account_field = 'autoscaler_service_account' if account_field not in provider_config: diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 099663848bc..8921dbc9292 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -6,8 +6,9 @@ from uuid import uuid4 from sky.adaptors import kubernetes -from sky.skylet.providers.kubernetes import get_head_ssh_port from sky.skylet.providers.kubernetes import config +from sky.skylet.providers.kubernetes import get_head_ssh_port +from sky.skylet.providers.kubernetes import utils from ray.autoscaler._private.command_runner import SSHCommandRunner from ray.autoscaler._private.cli_logger import cli_logger from ray.autoscaler.node_provider import NodeProvider @@ -50,7 +51,7 @@ def __init__(self, provider_config, cluster_name): self.cluster_name = cluster_name # Kubernetes namespace to user - self.namespace = provider_config['namespace'] + self.namespace = utils.get_current_kube_config_context_namespace() # Timeout for resource provisioning. If it takes longer than this # timeout, the resource provisioning will be considered failed. diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index a27df94abaf..40c89c1293f 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -3,6 +3,8 @@ from sky import status_lib from sky.adaptors import kubernetes +DEFAULT_NAMESPACE = 'default' + def get_head_ssh_port(cluster_name, namespace): svc_name = f'{cluster_name}-ray-head-ssh' @@ -27,7 +29,7 @@ def check_credentials(timeout: int = 3) -> Tuple[bool, Optional[str]]: str: Error message if credentials are invalid, None otherwise """ try: - kubernetes.core_api().list_namespace(_request_timeout=timeout) + kubernetes.core_api().list_namespaced_pod(_request_timeout=timeout) return True, None except ImportError: # TODO(romilb): Update these error strs to also include link to docs @@ -70,7 +72,7 @@ def get_cluster_status(cluster_name: str, return cluster_status -def get_current_kube_config_context() -> Optional[str]: +def get_current_kube_config_context_name() -> Optional[str]: """ Get the current kubernetes context from the kubeconfig file @@ -83,3 +85,22 @@ def get_current_kube_config_context() -> Optional[str]: return current_context['name'] except k8s.config.config_exception.ConfigException: return None + + +def get_current_kube_config_context_namespace() -> Optional[str]: + """ + Get the current kubernetes context namespace from the kubeconfig file + + Returns: + str | None: The current kubernetes context namespace if it exists, else + the default namespace. + """ + k8s = kubernetes.get_kubernetes() + try: + _, current_context = k8s.config.list_kube_config_contexts() + if 'namespace' in current_context: + return current_context['namespace'] + else: + return DEFAULT_NAMESPACE + except k8s.config.config_exception.ConfigException: + return DEFAULT_NAMESPACE diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index f256d54ae3f..0e7469c3306 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -19,10 +19,6 @@ provider: # Use False if running from outside of k8s cluster use_internal_ips: false - # Namespace to use for all resources created. - # TODO(romilb): Make the namespace configurable. - namespace: default - timeout: {{timeout}} # ServiceAccount created by the autoscaler for the head node pod that it @@ -48,8 +44,11 @@ provider: name: autoscaler rules: - apiGroups: [""] - resources: ["pods", "pods/status", "pods/exec"] - verbs: ["get", "watch", "list", "create", "delete", "patch"] + resources: ["pods", "pods/status", "pods/exec", "services"] + verbs: ["get", "watch", "list", "create", "delete", "patch", "update"] + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get", "watch", "list", "create", "delete", "patch", "update"] # RoleBinding created by the autoscaler for the head node pod that it runs # in. If this field isn't provided, the head pod config below must contain @@ -210,6 +209,7 @@ available_node_types: # Automatically generates a name for the pod with this prefix. generateName: {{cluster_name}}-ray-worker- spec: + serviceAccountName: skypilot-service-account restartPolicy: Never volumes: - name: secret-volume From 364b03f033a71c285a12244d44583f188a26d655 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 21 Jul 2023 14:32:12 -0700 Subject: [PATCH 082/183] lint --- sky/cli.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sky/cli.py b/sky/cli.py index 1e8cb150f27..54a66f9993b 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4421,7 +4421,8 @@ def local_up(): cluster_created = False # Check if ~/.kube/config exists: if os.path.exists(os.path.expanduser('~/.kube/config')): - current_context = kubernetes_utils.get_current_kube_config_context_name() + current_context = kubernetes_utils.get_current_kube_config_context_name( + ) skypilot_context = 'kind-skypilot' if current_context is not None and current_context != skypilot_context: click.echo( From 691f6b7025365813452621870985634f7fa849d4 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 21 Jul 2023 14:33:08 -0700 Subject: [PATCH 083/183] lint --- sky/cli.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/sky/cli.py b/sky/cli.py index 54a66f9993b..f22c86bf4f0 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4421,12 +4421,11 @@ def local_up(): cluster_created = False # Check if ~/.kube/config exists: if os.path.exists(os.path.expanduser('~/.kube/config')): - current_context = kubernetes_utils.get_current_kube_config_context_name( - ) + curr_context = kubernetes_utils.get_current_kube_config_context_name() skypilot_context = 'kind-skypilot' - if current_context is not None and current_context != skypilot_context: + if curr_context is not None and curr_context != skypilot_context: click.echo( - f'Current context in kube config: {current_context}' + f'Current context in kube config: {curr_context}' '\nWill automatically switch to kind-skypilot after the local ' 'cluster is created.') with log_utils.safe_rich_status('Creating local cluster...'): From ed0741fff825f7d0d7efa47e453a6f06477b3426 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 21 Jul 2023 15:44:53 -0700 Subject: [PATCH 084/183] working autodown and sky status -r --- sky/clouds/kubernetes.py | 5 ++--- sky/skylet/providers/kubernetes/utils.py | 3 ++- sky/templates/kubernetes-ray.yml.j2 | 17 ++++++++--------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index fc52348347e..17499cc53a3 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -370,7 +370,6 @@ def accelerator_in_region_or_zone(self, def query_status(cls, name: str, tag_filters: Dict[str, str], region: Optional[str], zone: Optional[str], **kwargs) -> List['status_lib.ClusterStatus']: - # TODO(romilb): Implement this. For now, we return UP as the status. - # Assuming single node cluster. del tag_filters, region, zone, kwargs # Unused. - return [status_lib.ClusterStatus.UP] + namespace = kubernetes_utils.get_current_kube_config_context_namespace() + return kubernetes_utils.get_cluster_status(name, namespace) diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 40c89c1293f..fccdb7710a9 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -29,7 +29,8 @@ def check_credentials(timeout: int = 3) -> Tuple[bool, Optional[str]]: str: Error message if credentials are invalid, None otherwise """ try: - kubernetes.core_api().list_namespaced_pod(_request_timeout=timeout) + ns = get_current_kube_config_context_namespace() + kubernetes.core_api().list_namespaced_pod(ns, _request_timeout=timeout) return True, None except ImportError: # TODO(romilb): Update these error strs to also include link to docs diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 0e7469c3306..363ce45f37f 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -42,13 +42,12 @@ provider: labels: parent: skypilot name: autoscaler + # TODO(romilb): This is a very permissive role - gives all access in the + # namespace. We should restrict this. rules: - - apiGroups: [""] - resources: ["pods", "pods/status", "pods/exec", "services"] - verbs: ["get", "watch", "list", "create", "delete", "patch", "update"] - - apiGroups: ["apps"] - resources: ["deployments"] - verbs: ["get", "watch", "list", "create", "delete", "patch", "update"] + - apiGroups: ["*"] + resources: ["*"] + verbs: ["*"] # RoleBinding created by the autoscaler for the head node pod that it runs # in. If this field isn't provided, the head pod config below must contain @@ -111,10 +110,10 @@ provider: targetPort: 8265 # Specify the pod type for the ray head node (as configured below). -head_node_type: head_node +head_node_type: ray_head_default # Specify the allowed pod types for this ray cluster and the resources they provide. available_node_types: - head_node: + ray_head_default: node_config: apiVersion: v1 kind: Pod @@ -191,7 +190,7 @@ available_node_types: # cause problems for other pods. cpu: {{cpus}} memory: {{memory}}G - worker_node: + ray_worker_default: # Minimum number of Ray workers of this Pod type. min_workers: {{num_nodes - 1}} # Maximum number of Ray workers of this Pod type. Takes precedence over min_workers. From 3fe9bfb2b532804e52c379d9b4f26932350b531e Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 21 Jul 2023 15:49:37 -0700 Subject: [PATCH 085/183] lint --- sky/clouds/kubernetes.py | 19 +++++++++++++++++-- sky/skylet/providers/kubernetes/__init__.py | 2 +- sky/skylet/providers/kubernetes/utils.py | 19 +------------------ 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 17499cc53a3..9cb92c47e4f 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -5,7 +5,9 @@ import typing from typing import Dict, Iterator, List, Optional, Tuple -from sky import clouds, status_lib +from sky import clouds +from sky import status_lib +from sky.adaptors import kubernetes from sky.utils import common_utils from sky.skylet.providers.kubernetes import utils as kubernetes_utils @@ -372,4 +374,17 @@ def query_status(cls, name: str, tag_filters: Dict[str, str], **kwargs) -> List['status_lib.ClusterStatus']: del tag_filters, region, zone, kwargs # Unused. namespace = kubernetes_utils.get_current_kube_config_context_namespace() - return kubernetes_utils.get_cluster_status(name, namespace) + + # Get all the pods with the label skypilot-cluster: + pods = kubernetes.core_api().list_namespaced_pod( + namespace, label_selector=f'skypilot-cluster={name}').items + + # Check if the pods are running or pending + cluster_status = [] + for pod in pods: + if pod.status.phase == 'Running': + cluster_status.append(status_lib.ClusterStatus.UP) + elif pod.status.phase == 'Pending': + cluster_status.append(status_lib.ClusterStatus.INIT) + # If pods are not found, we don't add them to the return list + return cluster_status diff --git a/sky/skylet/providers/kubernetes/__init__.py b/sky/skylet/providers/kubernetes/__init__.py index e3d0621fbea..b09a3fe4183 100644 --- a/sky/skylet/providers/kubernetes/__init__.py +++ b/sky/skylet/providers/kubernetes/__init__.py @@ -1,2 +1,2 @@ -from sky.skylet.providers.kubernetes.utils import get_head_ssh_port, get_port, get_cluster_status +from sky.skylet.providers.kubernetes.utils import get_head_ssh_port, get_port from sky.skylet.providers.kubernetes.node_provider import KubernetesNodeProvider diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index fccdb7710a9..2d6fe1b6654 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -56,23 +56,6 @@ def check_credentials(timeout: int = 3) -> Tuple[bool, Optional[str]]: return False, f'An error occurred: {str(e)}' -def get_cluster_status(cluster_name: str, - namespace: str) -> List[status_lib.ClusterStatus]: - # Get all the pods with the label skypilot-cluster: - pods = kubernetes.core_api().list_namespaced_pod( - namespace, label_selector=f'skypilot-cluster={cluster_name}').items - - # Check if the pods are running or pending - cluster_status = [] - for pod in pods: - if pod.status.phase == 'Running': - cluster_status.append(status_lib.ClusterStatus.UP) - elif pod.status.phase == 'Pending': - cluster_status.append(status_lib.ClusterStatus.INIT) - # If pods are not found, we don't add them to the return list - return cluster_status - - def get_current_kube_config_context_name() -> Optional[str]: """ Get the current kubernetes context from the kubeconfig file @@ -88,7 +71,7 @@ def get_current_kube_config_context_name() -> Optional[str]: return None -def get_current_kube_config_context_namespace() -> Optional[str]: +def get_current_kube_config_context_namespace() -> str: """ Get the current kubernetes context namespace from the kubeconfig file From b98ced3f31c8af4762cee45a244c9c287b7284c8 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 21 Jul 2023 16:34:18 -0700 Subject: [PATCH 086/183] add test_kubernetes_autodown --- tests/test_smoke.py | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index fbce4b91655..6408dd14207 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1488,7 +1488,7 @@ def test_autostop(generic_cloud: str): # ---------- Testing Autodowning ---------- @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet. Run test_scp_autodown instead. -@pytest.mark.no_kubernetes # Kubernetes does not support autodown yet +@pytest.mark.no_kubernetes # Kubernetes does not support num_nodes > 1 yet. Run test_scp_kubernetes instead. def test_autodown(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -1558,6 +1558,40 @@ def test_scp_autodown(): run_one_test(test) +@pytest.mark.kubernetes +def test_kubernetes_autodown(): + name = _get_cluster_name() + test = Test( + 'kubernetes_autodown', + [ + f'sky launch -y -d -c {name} --cloud kubernetes tests/test_yamls/minimal.yaml', + f'sky autostop -y {name} --down -i 1', + # Ensure autostop is set. + f'sky status | grep {name} | grep "1m (down)"', + # Ensure the cluster is not terminated early. + 'sleep 45', + f'sky status --refresh | grep {name} | grep UP', + # Ensure the cluster is terminated. + 'sleep 200', + f's=$(SKYPILOT_DEBUG=0 sky status --refresh) && printf "$s" && {{ echo "$s" | grep {name} | grep "Autodowned cluster\|terminated on the cloud"; }} || {{ echo "$s" | grep {name} && exit 1 || exit 0; }}', + f'sky launch -y -d -c {name} --cloud kubernetes --down tests/test_yamls/minimal.yaml', + f'sky status | grep {name} | grep UP', # Ensure the cluster is UP. + f'sky exec {name} --cloud kubernetes tests/test_yamls/minimal.yaml', + f'sky status | grep {name} | grep "1m (down)"', + 'sleep 200', + # Ensure the cluster is terminated. + f's=$(SKYPILOT_DEBUG=0 sky status --refresh) && printf "$s" && {{ echo "$s" | grep {name} | grep "Autodowned cluster\|terminated on the cloud"; }} || {{ echo "$s" | grep {name} && exit 1 || exit 0; }}', + f'sky launch -y -d -c {name} --cloud kubernetes --down tests/test_yamls/minimal.yaml', + f'sky autostop -y {name} --cancel', + 'sleep 200', + # Ensure the cluster is still UP. + f's=$(SKYPILOT_DEBUG=0 sky status --refresh) && printf "$s" && echo "$s" | grep {name} | grep UP', + ], + f'sky down -y {name}', + timeout=25 * 60, + ) + run_one_test(test) + def _get_cancel_task_with_cloud(name, cloud, timeout=15 * 60): test = Test( f'{cloud}-cancel-task', From 07ea97dbfd3e7f0d64903d505b34ed826902aed7 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 24 Jul 2023 10:09:27 -0700 Subject: [PATCH 087/183] lint --- tests/test_smoke.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 6408dd14207..ea537cebb63 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1592,6 +1592,7 @@ def test_kubernetes_autodown(): ) run_one_test(test) + def _get_cancel_task_with_cloud(name, cloud, timeout=15 * 60): test = Test( f'{cloud}-cancel-task', From 73ee7371c8846d7f42e3dd991e6f10d3f788dc7f Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 24 Jul 2023 14:01:36 -0700 Subject: [PATCH 088/183] address comments --- sky/skylet/providers/kubernetes/utils.py | 6 +++--- sky/templates/kubernetes-ray.yml.j2 | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 2d6fe1b6654..538d53ee671 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -1,6 +1,6 @@ -from typing import List, Tuple, Optional +from typing import Tuple, Optional -from sky import status_lib +from sky.utils import common_utils from sky.adaptors import kubernetes DEFAULT_NAMESPACE = 'default' @@ -51,7 +51,7 @@ def check_credentials(timeout: int = 3) -> Tuple[bool, Optional[str]]: 'Check if your cluster is running and your network ' \ 'is stable.' except ValueError as e: - return False, str(e) + return False, common_utils.format_exception(e) except Exception as e: return False, f'An error occurred: {str(e)}' diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 363ce45f37f..9a2e97f551d 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -43,7 +43,8 @@ provider: parent: skypilot name: autoscaler # TODO(romilb): This is a very permissive role - gives all access in the - # namespace. We should restrict this. + # namespace. We should restrict this. For reference, this is required + # for autodown and creating more SkyPilot clusters from within the pod. rules: - apiGroups: ["*"] resources: ["*"] From 772685057dfeb8ba81b83f4614d8f5a44be8c99a Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 24 Jul 2023 16:16:07 -0700 Subject: [PATCH 089/183] address comments --- sky/adaptors/kubernetes.py | 2 ++ sky/clouds/cloud.py | 4 ++++ sky/clouds/kubernetes.py | 14 ++++++++++++-- sky/skylet/providers/kubernetes/utils.py | 3 ++- 4 files changed, 20 insertions(+), 3 deletions(-) diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py index 940b71c97e2..12391e1bdbd 100644 --- a/sky/adaptors/kubernetes.py +++ b/sky/adaptors/kubernetes.py @@ -16,6 +16,8 @@ _networking_api = None _custom_objects_api = None +# Timeout to use for API calls +API_TIMEOUT = 5 def import_package(func): diff --git a/sky/clouds/cloud.py b/sky/clouds/cloud.py index 7d044f7b955..45d148779ca 100644 --- a/sky/clouds/cloud.py +++ b/sky/clouds/cloud.py @@ -601,6 +601,10 @@ def query_status(cls, name: str, tag_filters: Dict[str, str], Returns: A list of ClusterStatus representing the status of all the alive nodes in the cluster. + + Raises: + exceptions.ClusterStatusFetchingError: raised if the status of the + cluster cannot be fetched. """ raise NotImplementedError diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 9cb92c47e4f..25c09d21710 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -6,9 +6,11 @@ from typing import Dict, Iterator, List, Optional, Tuple from sky import clouds +from sky import exceptions from sky import status_lib from sky.adaptors import kubernetes from sky.utils import common_utils +from sky.utils import ux_utils from sky.skylet.providers.kubernetes import utils as kubernetes_utils if typing.TYPE_CHECKING: @@ -376,8 +378,16 @@ def query_status(cls, name: str, tag_filters: Dict[str, str], namespace = kubernetes_utils.get_current_kube_config_context_namespace() # Get all the pods with the label skypilot-cluster: - pods = kubernetes.core_api().list_namespaced_pod( - namespace, label_selector=f'skypilot-cluster={name}').items + try: + pods = kubernetes.core_api().list_namespaced_pod( + namespace, + label_selector=f'skypilot-cluster={name}', + _request_timeout=kubernetes.API_TIMEOUT).items + except Exception as e: + with ux_utils.print_exception_no_traceback(): + raise exceptions.ClusterStatusFetchingError( + f'Failed to query Kubernetes cluster {name!r} status: ' + f'{common_utils.format_exception(e)}') # Check if the pods are running or pending cluster_status = [] diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 538d53ee671..5857fd211ef 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -17,7 +17,8 @@ def get_port(svc_name, namespace): return head_service.spec.ports[0].node_port -def check_credentials(timeout: int = 3) -> Tuple[bool, Optional[str]]: +def check_credentials(timeout: int = kubernetes.API_TIMEOUT) -> \ + Tuple[bool, Optional[str]]: """ Check if the credentials in kubeconfig file are valid From 2ee48334dbea4af8e2a962eeb5fe63ed77b1d5d8 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 24 Jul 2023 16:28:36 -0700 Subject: [PATCH 090/183] lint --- sky/adaptors/kubernetes.py | 1 + sky/clouds/kubernetes.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py index 12391e1bdbd..79daa6f2434 100644 --- a/sky/adaptors/kubernetes.py +++ b/sky/adaptors/kubernetes.py @@ -19,6 +19,7 @@ # Timeout to use for API calls API_TIMEOUT = 5 + def import_package(func): @functools.wraps(func) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 25c09d21710..dc37f39c2e8 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -383,7 +383,7 @@ def query_status(cls, name: str, tag_filters: Dict[str, str], namespace, label_selector=f'skypilot-cluster={name}', _request_timeout=kubernetes.API_TIMEOUT).items - except Exception as e: + except Exception as e: # pylint: disable=broad-except with ux_utils.print_exception_no_traceback(): raise exceptions.ClusterStatusFetchingError( f'Failed to query Kubernetes cluster {name!r} status: ' From 9e0f5b6e39ccf95bdd0d3513d0587563d5f6afcf Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 25 Jul 2023 10:23:38 -0700 Subject: [PATCH 091/183] deletion timeouts wip --- sky/clouds/kubernetes.py | 7 +++++++ sky/skylet/providers/kubernetes/config.py | 3 +++ sky/skylet/providers/kubernetes/node_provider.py | 8 +++++--- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index dc37f39c2e8..7a21af74739 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -383,6 +383,13 @@ def query_status(cls, name: str, tag_filters: Dict[str, str], namespace, label_selector=f'skypilot-cluster={name}', _request_timeout=kubernetes.API_TIMEOUT).items + except kubernetes.max_retry_error(): + with ux_utils.print_exception_no_traceback(): + ctx_name = kubernetes_utils.get_current_kube_config_context_name() + raise exceptions.ClusterStatusFetchingError( + f'Failed to query cluster {name!r} status. ' + 'Network error - check if the Kubernetes cluster in ' + f'context {ctx_name} is up and accessible.') except Exception as e: # pylint: disable=broad-except with ux_utils.print_exception_no_traceback(): raise exceptions.ClusterStatusFetchingError( diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index c239e29c305..a9b0e0de32f 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -18,6 +18,9 @@ log_prefix = 'KubernetesNodeProvider: ' +# Timeout for deleting a Kubernetes resource (in seconds). +DELETION_TIMEOUT = 90 + class InvalidNamespaceError(ValueError): diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 8921dbc9292..7ea6b29e9ec 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -266,7 +266,8 @@ def create_node(self, node_config, tags, count): def terminate_node(self, node_id): logger.info(config.log_prefix + 'calling delete_namespaced_pod') try: - kubernetes.core_api().delete_namespaced_pod(node_id, self.namespace) + kubernetes.core_api().delete_namespaced_pod(node_id, self.namespace, + _request_timeout=config.DELETION_TIMEOUT) except kubernetes.api_exception() as e: if e.status == 404: logger.warning(config.log_prefix + @@ -276,15 +277,16 @@ def terminate_node(self, node_id): raise try: kubernetes.core_api().delete_namespaced_service( - node_id, self.namespace) + node_id, self.namespace, _request_timeout=config.DELETION_TIMEOUT) kubernetes.core_api().delete_namespaced_service( - f'{node_id}-ssh', self.namespace) + f'{node_id}-ssh', self.namespace, _request_timeout=config.DELETION_TIMEOUT) except kubernetes.api_exception(): pass try: kubernetes.networking_api().delete_namespaced_ingress( node_id, self.namespace, + _request_timeout=config.DELETION_TIMEOUT ) except kubernetes.api_exception(): pass From b36fba4f7c98dc2c9efc27c46cf081c8aa950531 Mon Sep 17 00:00:00 2001 From: Avi Weit Date: Wed, 26 Jul 2023 03:30:10 +0300 Subject: [PATCH 092/183] [k8s_cloud] Ray pod not created under current context namespace. (#2302) 'namespace' exists under 'context' key. --- sky/skylet/providers/kubernetes/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 5857fd211ef..e23bfa83c4f 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -83,8 +83,8 @@ def get_current_kube_config_context_namespace() -> str: k8s = kubernetes.get_kubernetes() try: _, current_context = k8s.config.list_kube_config_contexts() - if 'namespace' in current_context: - return current_context['namespace'] + if 'namespace' in current_context['context']: + return current_context['context']['namespace'] else: return DEFAULT_NAMESPACE except k8s.config.config_exception.ConfigException: From a806b3913941fd131bd8f011b55b561fff624de6 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 25 Jul 2023 17:32:28 -0700 Subject: [PATCH 093/183] head ssh port namespace fix --- sky/backends/backend_utils.py | 2 +- sky/clouds/kubernetes.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 9646ee40758..8a83ede6792 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -1619,7 +1619,7 @@ def get_head_ssh_port( head_ssh_port = handle.head_ssh_port else: svc_name = f'{handle.get_cluster_name()}-ray-head-ssh' - head_ssh_port = clouds.Kubernetes.get_port(svc_name, 'default') + head_ssh_port = clouds.Kubernetes.get_port(svc_name) return head_ssh_port diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 7a21af74739..a90b85b2e2f 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -213,8 +213,9 @@ def is_same_cloud(self, other: clouds.Cloud) -> bool: return isinstance(other, Kubernetes) @classmethod - def get_port(cls, svc_name, namespace) -> int: - return kubernetes_utils.get_port(svc_name, namespace) + def get_port(cls, svc_name) -> int: + ns = kubernetes_utils.get_current_kube_config_context_namespace() + return kubernetes_utils.get_port(svc_name, ns) @classmethod def get_default_instance_type( From a9b963615c117add5130febf61c009cb1d50ae3d Mon Sep 17 00:00:00 2001 From: Avi Weit Date: Wed, 26 Jul 2023 18:35:52 +0300 Subject: [PATCH 094/183] [k8s-cloud] Typo in sky local --help. (#2308) Typo. --- sky/cli.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/cli.py b/sky/cli.py index f22c86bf4f0..17870878f8d 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -4473,7 +4473,7 @@ def local_up(): @local.command('down', cls=_DocumentedCodeCommand) @usage_lib.entrypoint def local_down(): - """Creates a local cluster.""" + """Deletes a local cluster.""" cluster_removed = False with log_utils.safe_rich_status('Removing local cluster...'): path_to_package = os.path.dirname(os.path.dirname(__file__)) From 79033395cb9c5dfed3d7a8163263f0db96777c1d Mon Sep 17 00:00:00 2001 From: Avi Weit Date: Wed, 26 Jul 2023 18:36:49 +0300 Subject: [PATCH 095/183] [k8s-cloud] Set build_image.sh to be executable. (#2307) * Set build_image.sh to be executable. * Use TAG to easily switch between registries. --- tests/kubernetes/build_image.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) mode change 100644 => 100755 tests/kubernetes/build_image.sh diff --git a/tests/kubernetes/build_image.sh b/tests/kubernetes/build_image.sh old mode 100644 new mode 100755 index 9ff1a44c164..2babd0cd95a --- a/tests/kubernetes/build_image.sh +++ b/tests/kubernetes/build_image.sh @@ -32,14 +32,14 @@ fi # Load the right image depending on the architecture of the host machine (Apple Silicon or Intel) if [[ $(uname -m) == "arm64" ]]; then echo "Loading image for arm64 (Apple Silicon etc.)" - docker buildx build --load --platform linux/arm64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky + docker buildx build --load --platform linux/arm64 -t $TAG -f Dockerfile_k8s ./sky elif [[ $(uname -m) == "x86_64" ]]; then echo "Building for amd64 (Intel CPUs)" - docker buildx build --load --platform linux/amd64 -t us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest -f Dockerfile_k8s ./sky + docker buildx build --load --platform linux/amd64 -t $TAG -f Dockerfile_k8s ./sky else echo "Unsupported architecture: $(uname -m)" exit 1 fi echo "Tagging image as skypilot:latest" -docker tag us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest skypilot:latest +docker tag $TAG skypilot:latest From 4ab5329b3c23aafa18e50ce043424dd20d2633b5 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 26 Jul 2023 08:45:21 -0700 Subject: [PATCH 096/183] remove ingress --- sky/skylet/providers/kubernetes/config.py | 3 +- .../providers/kubernetes/node_provider.py | 49 ------------------- 2 files changed, 1 insertion(+), 51 deletions(-) diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index a9b0e0de32f..d684c4b4617 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -85,8 +85,7 @@ def fillout_resources_kubernetes(config): for node_type in node_types: node_config = node_types[node_type]['node_config'] - # The next line is for compatibility with configs like - # kubernetes/example-ingress.yaml, + # The next line is for compatibility with configs which define pod specs # cf. KubernetesNodeProvider.create_node(). pod = node_config.get('pod', node_config) container_data = pod['spec']['containers'][0] diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 7ea6b29e9ec..0ac6963ffbd 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -174,7 +174,6 @@ def create_node(self, node_config, tags, count): conf = copy.deepcopy(node_config) pod_spec = conf.get('pod', conf) service_spec = conf.get('service') - ingress_spec = conf.get('ingress') node_uuid = str(uuid4()) tags[TAG_RAY_CLUSTER_NAME] = self.cluster_name tags['ray-node-uuid'] = node_uuid @@ -212,18 +211,6 @@ def create_node(self, node_config, tags, count): self.namespace, service_spec) new_svcs.append(svc) - if ingress_spec is not None: - logger.info(config.log_prefix + 'calling create_namespaced_ingress ' - '(count={}).'.format(count)) - for new_svc in new_svcs: - metadata = ingress_spec.get('metadata', {}) - metadata['name'] = new_svc.metadata.name - ingress_spec['metadata'] = metadata - ingress_spec = _add_service_name_to_service_port( - ingress_spec, new_svc.metadata.name) - kubernetes.networking_api().create_namespaced_ingress( - self.namespace, ingress_spec) - # Wait for all pods to be ready, and if it exceeds the timeout, raise an # exception. If pod's container is ContainerCreating, then we can assume # that resources have been allocated and we can exit. @@ -282,14 +269,6 @@ def terminate_node(self, node_id): f'{node_id}-ssh', self.namespace, _request_timeout=config.DELETION_TIMEOUT) except kubernetes.api_exception(): pass - try: - kubernetes.networking_api().delete_namespaced_ingress( - node_id, - self.namespace, - _request_timeout=config.DELETION_TIMEOUT - ) - except kubernetes.api_exception(): - pass def terminate_nodes(self, node_ids): # TODO(romilb): terminate_nodes should be include optimizations for @@ -351,31 +330,3 @@ def bootstrap_config(cluster_config): def fillout_available_node_types_resources(cluster_config): """Fills out missing "resources" field for available_node_types.""" return config.fillout_resources_kubernetes(cluster_config) - - -def _add_service_name_to_service_port(spec, svc_name): - """Goes recursively through the ingress manifest and adds the - right serviceName next to every servicePort definition. - """ - if isinstance(spec, dict): - dict_keys = list(spec.keys()) - for k in dict_keys: - spec[k] = _add_service_name_to_service_port(spec[k], svc_name) - - if k == 'serviceName' and spec[k] != svc_name: - raise ValueError( - 'The value of serviceName must be set to ' - '${RAY_POD_NAME}. It is automatically replaced ' - 'when using the autoscaler.') - - elif isinstance(spec, list): - spec = [ - _add_service_name_to_service_port(item, svc_name) for item in spec - ] - - elif isinstance(spec, str): - # The magic string ${RAY_POD_NAME} is replaced with - # the true service name, which is equal to the worker pod name. - if '${RAY_POD_NAME}' in spec: - spec = spec.replace('${RAY_POD_NAME}', svc_name) - return spec From 4b49241725bb81f9866fc275f15e717d988953a6 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 26 Jul 2023 08:46:51 -0700 Subject: [PATCH 097/183] remove debug statements --- sky/skylet/providers/kubernetes/node_provider.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 0ac6963ffbd..ec9b8849c25 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -315,10 +315,6 @@ def get_command_runner(self, port = 22 else: port = self.external_port(node_id) - cli_logger.print(port) - cli_logger.print(command_runner.__dict__) - with open('/tmp/log.txt', 'a') as f: - f.write(f'{node_id} port: {port}\n') command_runner.set_port(port) return command_runner From 83aecd33837b4db586efae713b934b0b56c6b2b5 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 26 Jul 2023 08:55:35 -0700 Subject: [PATCH 098/183] UX and readme updates --- sky/authentication.py | 2 +- tests/kubernetes/README.md | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/sky/authentication.py b/sky/authentication.py index ba9e4d1a1c7..a7d69dc39d7 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -388,7 +388,7 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: output = e.output.decode('utf-8') suffix = f'\nError message: {output}' if 'already exists' in output: - logger.warning( + logger.debug( f'Key {key_label} already exists in the cluster, using it...') elif any(err in output for err in ['connection refused', 'timeout']): with ux_utils.print_exception_no_traceback(): diff --git a/tests/kubernetes/README.md b/tests/kubernetes/README.md index 18dc58e8191..5a44fffd1b2 100644 --- a/tests/kubernetes/README.md +++ b/tests/kubernetes/README.md @@ -25,7 +25,8 @@ sky local up ## Running a GKE cluster 1. Make sure ports 30000-32767 are open in your node pool VPC's firewall. -2. Create a GKE cluster with at least 1 node. +2. Create a GKE cluster with at least 1 node. We recommend creating nodes with at least 4 vCPUs. + * Note - only GKE standard clusters are supported. GKE autopilot clusters are not supported. 3. Get the kubeconfig for your cluster and place it in `~/.kube/config`: ```bash gcloud container clusters get-credentials --region From bdeb7d517f6d9896d62584ffdc31f461d47946a2 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 26 Jul 2023 09:02:15 -0700 Subject: [PATCH 099/183] lint --- sky/clouds/kubernetes.py | 6 +++--- sky/skylet/providers/kubernetes/node_provider.py | 14 ++++++++++---- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index a90b85b2e2f..b9ee9d12849 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -384,13 +384,13 @@ def query_status(cls, name: str, tag_filters: Dict[str, str], namespace, label_selector=f'skypilot-cluster={name}', _request_timeout=kubernetes.API_TIMEOUT).items - except kubernetes.max_retry_error(): + except kubernetes.max_retry_error() as e: with ux_utils.print_exception_no_traceback(): - ctx_name = kubernetes_utils.get_current_kube_config_context_name() + ctx = kubernetes_utils.get_current_kube_config_context_name() raise exceptions.ClusterStatusFetchingError( f'Failed to query cluster {name!r} status. ' 'Network error - check if the Kubernetes cluster in ' - f'context {ctx_name} is up and accessible.') + f'context {ctx} is up and accessible.') except Exception as e: # pylint: disable=broad-except with ux_utils.print_exception_no_traceback(): raise exceptions.ClusterStatusFetchingError( diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index ec9b8849c25..d40234bd4fe 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -253,8 +253,10 @@ def create_node(self, node_config, tags, count): def terminate_node(self, node_id): logger.info(config.log_prefix + 'calling delete_namespaced_pod') try: - kubernetes.core_api().delete_namespaced_pod(node_id, self.namespace, - _request_timeout=config.DELETION_TIMEOUT) + kubernetes.core_api().delete_namespaced_pod( + node_id, + self.namespace, + _request_timeout=config.DELETION_TIMEOUT) except kubernetes.api_exception() as e: if e.status == 404: logger.warning(config.log_prefix + @@ -264,9 +266,13 @@ def terminate_node(self, node_id): raise try: kubernetes.core_api().delete_namespaced_service( - node_id, self.namespace, _request_timeout=config.DELETION_TIMEOUT) + node_id, + self.namespace, + _request_timeout=config.DELETION_TIMEOUT) kubernetes.core_api().delete_namespaced_service( - f'{node_id}-ssh', self.namespace, _request_timeout=config.DELETION_TIMEOUT) + f'{node_id}-ssh', + self.namespace, + _request_timeout=config.DELETION_TIMEOUT) except kubernetes.api_exception(): pass From 4fb1d9427b9d7968299d970945e020037dc0c513 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 26 Jul 2023 09:37:42 -0700 Subject: [PATCH 100/183] fix logging for 409 retry --- sky/skylet/providers/kubernetes/node_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index d40234bd4fe..9078d91dc4c 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -155,7 +155,7 @@ def set_node_tags(self, node_ids, tags): return except kubernetes.api_exception() as e: if e.status == 409: - logger.info(kubernetes.log_prefix + + logger.info(config.log_prefix + 'Caught a 409 error while setting' ' node tags. Retrying...') time.sleep(DELAY_BEFORE_TAG_RETRY) From 02e3415b0b856a06d32087334231653de92285cc Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 26 Jul 2023 10:00:19 -0700 Subject: [PATCH 101/183] lint --- sky/skylet/providers/kubernetes/node_provider.py | 1 - sky/skylet/providers/kubernetes/utils.py | 14 ++++++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 9078d91dc4c..3ab8414b2d2 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -10,7 +10,6 @@ from sky.skylet.providers.kubernetes import get_head_ssh_port from sky.skylet.providers.kubernetes import utils from ray.autoscaler._private.command_runner import SSHCommandRunner -from ray.autoscaler._private.cli_logger import cli_logger from ray.autoscaler.node_provider import NodeProvider from ray.autoscaler.tags import NODE_KIND_HEAD, TAG_RAY_CLUSTER_NAME, TAG_RAY_NODE_KIND diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index e23bfa83c4f..b801e4354f5 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -6,12 +6,22 @@ DEFAULT_NAMESPACE = 'default' -def get_head_ssh_port(cluster_name, namespace): +def get_head_ssh_port(cluster_name: str, + namespace: str) -> int: svc_name = f'{cluster_name}-ray-head-ssh' return get_port(svc_name, namespace) -def get_port(svc_name, namespace): +def get_port(svc_name: str, + namespace: str) -> int: + """ + Gets the nodeport of the specified service. + + Args: + svc_name (str): Name of the kubernetes service. Note that this may be + different from the cluster name. + namespace (str): Kubernetes namespace to look for the service in. + """ head_service = kubernetes.core_api().read_namespaced_service( svc_name, namespace) return head_service.spec.ports[0].node_port From c1b7438e3ad958520d6c9f68c78f75f783e95e86 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 26 Jul 2023 10:11:55 -0700 Subject: [PATCH 102/183] lint --- sky/skylet/providers/kubernetes/utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index b801e4354f5..60bc99d0050 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -6,14 +6,12 @@ DEFAULT_NAMESPACE = 'default' -def get_head_ssh_port(cluster_name: str, - namespace: str) -> int: +def get_head_ssh_port(cluster_name: str, namespace: str) -> int: svc_name = f'{cluster_name}-ray-head-ssh' return get_port(svc_name, namespace) -def get_port(svc_name: str, - namespace: str) -> int: +def get_port(svc_name: str, namespace: str) -> int: """ Gets the nodeport of the specified service. From 428946249ac730f150ec21ee31780ac1c2201d00 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 31 Jul 2023 18:56:11 +0530 Subject: [PATCH 103/183] Debug dockerfile --- Dockerfile_k8s_gpu | 6 ++++-- sky/clouds/kubernetes.py | 24 +++++++++--------------- sky/templates/kubernetes-ray.yml.j2 | 2 ++ tests/kubernetes/build_image.sh | 6 ++++-- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Dockerfile_k8s_gpu b/Dockerfile_k8s_gpu index 91a8b5aeb70..41582132948 100644 --- a/Dockerfile_k8s_gpu +++ b/Dockerfile_k8s_gpu @@ -15,10 +15,12 @@ RUN sudo mkdir -p /var/run/sshd && \ cd /etc/ssh/ && \ ssh-keygen -A -# Setup new user named sky and add to sudoers. Also add /opt/conda/bin to sudo path. +# Setup new user named sky and add to sudoers. \ +# Also add /opt/conda/bin to sudo path and give sky user access to /home/ray RUN sudo useradd -m -s /bin/bash sky && \ sudo /bin/bash -c 'echo "sky ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers' && \ - sudo /bin/bash -c "echo 'Defaults secure_path=\"/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"' > /etc/sudoers.d/sky" + sudo /bin/bash -c "echo 'Defaults secure_path=\"/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"' > /etc/sudoers.d/sky" && \ + sudo chmod -R a+rwx /home/ray # Switch to sky user diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 73301296df0..3a1f28620a1 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -165,9 +165,10 @@ class Kubernetes(clouds.Cloud): 'Kubernetes.', } - # TODO(romilb): Add GPU Support - have GPU-enabled image.sky + # TODO(romilb): Add GPU Support - toggle between image depending on chosen + # accelerator type. IMAGE = 'us-central1-docker.pkg.dev/' \ - 'skypilot-375900/skypilotk8s/skypilot:latest' + 'skypilot-375900/skypilotk8s/skypilot-gpu:latest' @classmethod def _cloud_unsupported_features( @@ -295,19 +296,12 @@ def make_deploy_resources_variables( # resources.memory and cpus are None if they are not explicitly set. # We fetch the default values for the instance type in that case. - cpus, mem = self.get_vcpus_mem_from_instance_type( - resources.instance_type) - acc_count = 0 - acc_type = None - - # Add accelerator variables if they are set. - accelerators = resources.accelerators - if accelerators is not None: - assert len(accelerators) == 1, resources - acc_type, acc_count = list(accelerators.items())[0] - # TODO(romilb): Add accelerator type support. - # For now, hacking back to None - acc_type = None + k = KubernetesInstanceType.from_instance_type(resources.instance_type) + cpus = k.cpus + mem = k.memory + # Optionally populate accelerator information. + acc_count = k.accelerator_count if k.accelerator_count else 0 + acc_type = k.accelerator_type if k.accelerator_type else '' vars = { 'instance_type': resources.instance_type, diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index b66118259a2..c1a410c9860 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -181,6 +181,7 @@ available_node_types: requests: cpu: {{cpus}} memory: {{memory}}G + nvidia.com/gpu: {{accelerator_count}} limits: # The maximum memory that this pod is allowed to use. The # limit will be detected by ray and split to use 10% for @@ -191,6 +192,7 @@ available_node_types: # cause problems for other pods. cpu: {{cpus}} memory: {{memory}}G + nvidia.com/gpu: {{accelerator_count}} ray_worker_default: # Minimum number of Ray workers of this Pod type. min_workers: {{num_nodes - 1}} diff --git a/tests/kubernetes/build_image.sh b/tests/kubernetes/build_image.sh index 26d8fc2c98e..0bcc6aead69 100644 --- a/tests/kubernetes/build_image.sh +++ b/tests/kubernetes/build_image.sh @@ -8,7 +8,7 @@ # -p: Push the image to the registry # -g: Build the GPU image -TAG=us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot:latest +TAG=us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot # Parse command line arguments while getopts ":pg" opt; do @@ -30,7 +30,9 @@ done # Add -gpu to the tag if the GPU image is being built if [[ $gpu ]]; then - TAG=$TAG-gpu + TAG=$TAG-gpu:latest +else + TAG=$TAG:latest fi # Navigate to the root of the project (inferred from git) From 3d770bd99ca8ca452f2bb83e1e299b01a6818177 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 1 Aug 2023 21:24:11 +0530 Subject: [PATCH 104/183] wip --- Dockerfile_k8s_gpu | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Dockerfile_k8s_gpu b/Dockerfile_k8s_gpu index 41582132948..dbcc76ba127 100644 --- a/Dockerfile_k8s_gpu +++ b/Dockerfile_k8s_gpu @@ -22,6 +22,8 @@ RUN sudo useradd -m -s /bin/bash sky && \ sudo /bin/bash -c "echo 'Defaults secure_path=\"/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"' > /etc/sudoers.d/sky" && \ sudo chmod -R a+rwx /home/ray +# Set HOME environment variable for sky user, otherwise Ray base image HOME overrides +ENV HOME /home/sky # Switch to sky user USER sky From 2875ff97a44fec08b9ddf2558e04162c987e21db Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 3 Aug 2023 21:16:57 +0530 Subject: [PATCH 105/183] Fix GPU image --- Dockerfile_k8s_gpu | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/Dockerfile_k8s_gpu b/Dockerfile_k8s_gpu index dbcc76ba127..2f4d8c3593b 100644 --- a/Dockerfile_k8s_gpu +++ b/Dockerfile_k8s_gpu @@ -1,3 +1,6 @@ +# TODO(romilb) - The base image used here (ray) is very large (11.4GB). +# as a result, this built image is about 13.5GB. We need to pick a lighter base +# image. FROM rayproject/ray:2.4.0-gpu # Initialize conda for root user, install ssh and other local dependencies @@ -7,14 +10,6 @@ RUN sudo apt update -y && \ sudo apt remove -y python3 && \ conda init -# Setup SSH and generate hostkeys -RUN sudo mkdir -p /var/run/sshd && \ - sudo chmod 0755 /var/run/sshd && \ - sudo sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ - sudo sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd && \ - cd /etc/ssh/ && \ - ssh-keygen -A - # Setup new user named sky and add to sudoers. \ # Also add /opt/conda/bin to sudo path and give sky user access to /home/ray RUN sudo useradd -m -s /bin/bash sky && \ @@ -22,11 +17,19 @@ RUN sudo useradd -m -s /bin/bash sky && \ sudo /bin/bash -c "echo 'Defaults secure_path=\"/opt/conda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin\"' > /etc/sudoers.d/sky" && \ sudo chmod -R a+rwx /home/ray +# Switch to sky user +USER sky + # Set HOME environment variable for sky user, otherwise Ray base image HOME overrides ENV HOME /home/sky -# Switch to sky user -USER sky +# Setup SSH and generate hostkeys +RUN sudo mkdir -p /var/run/sshd && \ + sudo chmod 0755 /var/run/sshd && \ + sudo sed -i 's/PermitRootLogin prohibit-password/PermitRootLogin yes/' /etc/ssh/sshd_config && \ + sudo sed 's@session\s*required\s*pam_loginuid.so@session optional pam_loginuid.so@g' -i /etc/pam.d/sshd && \ + cd /etc/ssh/ && \ + ssh-keygen -A # Install SkyPilot pip dependencies RUN pip install wheel Click colorama cryptography jinja2 jsonschema && \ From 1202c3452d0c7768ffa11655785a585621233c83 Mon Sep 17 00:00:00 2001 From: Hemil Desai Date: Thu, 3 Aug 2023 20:01:14 -0700 Subject: [PATCH 106/183] Query cloud specific env vars in task setup (#2347) * Query cloud specific env vars in task setup * Make query_env_vars specific to Kubernetes cloud * Address PR comments --- sky/adaptors/kubernetes.py | 5 +++++ sky/backends/cloud_vm_ray_backend.py | 16 ++++++++++++++++ sky/clouds/kubernetes.py | 23 +++++++++++++++++++++++ sky/task.py | 9 ++------- sky/utils/common_utils.py | 7 +++++++ 5 files changed, 53 insertions(+), 7 deletions(-) diff --git a/sky/adaptors/kubernetes.py b/sky/adaptors/kubernetes.py index 79daa6f2434..f746d3d03fd 100644 --- a/sky/adaptors/kubernetes.py +++ b/sky/adaptors/kubernetes.py @@ -138,3 +138,8 @@ def config_exception(): @import_package def max_retry_error(): return urllib3.exceptions.MaxRetryError + + +@import_package +def stream(): + return kubernetes.stream.stream diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index b8daa6adabe..c5675d02dcc 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2822,12 +2822,27 @@ def _sync_file_mounts( self._execute_file_mounts(handle, all_file_mounts) self._execute_storage_mounts(handle, storage_mounts) + def _update_envs_for_k8s(self, handle: CloudVmRayResourceHandle, + task: task_lib.Task) -> None: + """Update envs for a task with Kubernetes specific env vars if cloud is Kubernetes.""" + if isinstance(handle.launched_resources.cloud, clouds.Kubernetes): + temp_envs = copy.deepcopy(task.envs) + cloud_env_vars = handle.launched_resources.cloud.query_env_vars( + handle.cluster_name) + task.update_envs(cloud_env_vars) + + # Re update the envs with the original envs to give priority to + # the original envs. + task.update_envs(temp_envs) + def _setup(self, handle: CloudVmRayResourceHandle, task: task_lib.Task, detach_setup: bool) -> None: start = time.time() style = colorama.Style fore = colorama.Fore + self._update_envs_for_k8s(handle, task) + if task.setup is None: return @@ -3138,6 +3153,7 @@ def _execute( # Check the task resources vs the cluster resources. Since `sky exec` # will not run the provision and _check_existing_cluster self.check_resources_fit_cluster(handle, task) + self._update_envs_for_k8s(handle, task) resources_str = backend_utils.get_task_resources_str(task) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index a2c339f9791..41d266b8c81 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -406,3 +406,26 @@ def query_status(cls, name: str, tag_filters: Dict[str, str], cluster_status.append(status_lib.ClusterStatus.INIT) # If pods are not found, we don't add them to the return list return cluster_status + + @classmethod + def query_env_vars(cls, name: str) -> Dict[str, str]: + namespace = kubernetes_utils.get_current_kube_config_context_namespace() + pod = kubernetes.core_api().list_namespaced_pod( + namespace, + label_selector=f'skypilot-cluster={name},ray-node-type=head' + ).items[0] + response = kubernetes.stream()( + kubernetes.core_api().connect_get_namespaced_pod_exec, + pod.metadata.name, + namespace, + command=['env'], + stderr=True, + stdin=False, + stdout=True, + tty=False, + _request_timeout=kubernetes.API_TIMEOUT) + lines: List[List[str]] = [ + line.split('=', 1) for line in response.split('\n') if '=' in line + ] + return dict( + [line for line in lines if common_utils.is_valid_env_var(line[0])]) diff --git a/sky/task.py b/sky/task.py index 39c2727c53d..6ab8c4aac22 100644 --- a/sky/task.py +++ b/sky/task.py @@ -18,6 +18,7 @@ from sky.skylet import constants from sky.utils import schemas from sky.utils import ux_utils +from sky.utils import common_utils if typing.TYPE_CHECKING: from sky import resources as resources_lib @@ -27,7 +28,6 @@ CommandOrCommandGen = Union[str, CommandGen] _VALID_NAME_REGEX = '[a-z0-9]+(?:[._-]{1,2}[a-z0-9]+)*' -_VALID_ENV_VAR_REGEX = '[a-zA-Z_][a-zA-Z0-9_]*' _VALID_NAME_DESCR = ('ASCII characters and may contain lowercase and' ' uppercase letters, digits, underscores, periods,' ' and dashes. Must start and end with alphanumeric' @@ -64,11 +64,6 @@ def _is_valid_name(name: str) -> bool: return bool(re.fullmatch(_VALID_NAME_REGEX, name)) -def _is_valid_env_var(name: str) -> bool: - """Checks if the task environment variable name is valid.""" - return bool(re.fullmatch(_VALID_ENV_VAR_REGEX, name)) - - def _fill_in_env_vars_in_file_mounts( file_mounts: Dict[str, Any], task_envs: Dict[str, str], @@ -446,7 +441,7 @@ def update_envs( if not isinstance(key, str): with ux_utils.print_exception_no_traceback(): raise ValueError('Env keys must be strings.') - if not _is_valid_env_var(key): + if not common_utils.is_valid_env_var(key): with ux_utils.print_exception_no_traceback(): raise ValueError(f'Invalid env key: {key}') else: diff --git a/sky/utils/common_utils.py b/sky/utils/common_utils.py index 2467b88003b..425c96575c3 100644 --- a/sky/utils/common_utils.py +++ b/sky/utils/common_utils.py @@ -30,6 +30,8 @@ _PAYLOAD_PATTERN = re.compile(r'(.*)') _PAYLOAD_STR = '{}' +_VALID_ENV_VAR_REGEX = '[a-zA-Z_][a-zA-Z0-9_]*' + logger = sky_logging.init_logger(__name__) _usage_run_id = None @@ -409,3 +411,8 @@ def find_free_port(start_port: int) -> int: except OSError: pass raise OSError('No free ports available.') + + +def is_valid_env_var(name: str) -> bool: + """Checks if the task environment variable name is valid.""" + return bool(re.fullmatch(_VALID_ENV_VAR_REGEX, name)) From d1a6ef420fd9b17796bd5b334299aeab5a215184 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 4 Aug 2023 17:31:22 +0530 Subject: [PATCH 107/183] working GPU type selection for GKE and EKS. GFD needs work. --- sky/clouds/kubernetes.py | 65 +++++++++++++++++++++++++++++ sky/templates/kubernetes-ray.yml.j2 | 6 +++ 2 files changed, 71 insertions(+) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index c162ceb4069..e5bfd63348e 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -303,6 +303,69 @@ def make_deploy_resources_variables( acc_count = k.accelerator_count if k.accelerator_count else 0 acc_type = k.accelerator_type if k.accelerator_type else '' + GKE_GPU_LABEL_PREFIX = 'cloud.google.com/gke-accelerator' + EKS_GPU_LABEL_PREFIX = 'k8s.amazonaws.com/accelerator' + NVIDIA_GFD_GPU_LABEL_PREFIX = 'nvidia.com/gpu.product' + + def detect_cluster_gpu_labels(): + # Detects and returns the node labels for identifying GPU type + # available on the node. This varies for GKE, EKS and on-prem + # (Nvidia GPU Operator). + # For GKE, the node labels are: + # cloud.google.com/gke-accelerator: nvidia-tesla-t4 + # cloud.google.com/gke-accelerator-count: 1 + # cloud.google.com/gke-accelerator-type: NVIDIA_TESLA_T4 + # For EKS, the node labels are: + # k8s.amazonaws.com/accelerator: nvidia-tesla-t4 + # For on-prem, the node labels are: + # nvidia.com/gpu.product: Tesla T4 + + # Get the set of labels across all nodes + # TODO(romilb): This is not efficient. We should cache the node labels + node_labels = set() + for node in kubernetes.core_api().list_node().items: + node_labels.update(node.metadata.labels.keys()) + + # Check if the node labels contain any of the GPU label prefixes + if any(label.startswith(GKE_GPU_LABEL_PREFIX) for label in node_labels): + return GKE_GPU_LABEL_PREFIX + elif any(label.startswith(EKS_GPU_LABEL_PREFIX) for label in node_labels): + return EKS_GPU_LABEL_PREFIX + elif any(label.startswith(NVIDIA_GFD_GPU_LABEL_PREFIX) for label in node_labels): + return NVIDIA_GFD_GPU_LABEL_PREFIX + else: + return None + + def get_gpu_label_value(accelerator: str, + gpu_label: str): + # Returns the GPU string from SkyPilot accelerator string + # to use as the value with the GPU label when specifying the nodeSelector. + # For GKE, the GPU string is the GPU type (e.g. nvidia-tesla-t4) + # For EKS, the GPU string is the GPU type (e.g. nvidia-tesla-t4) + # For on-prem, the GPU string is the GPU product name (e.g. Tesla-T4 or A100-SXM4-40GB) + + def get_k8s_accelerator_name(accelerator: str): + # Used by GKE and EKS + if accelerator in ('A100-80GB', 'L4'): + # A100-80GB and L4 have a different name pattern. + return 'nvidia-{}'.format(accelerator.lower()) + else: + return 'nvidia-tesla-{}'.format( + accelerator.lower()) + + if gpu_label.startswith(GKE_GPU_LABEL_PREFIX): + return get_k8s_accelerator_name(accelerator) + elif gpu_label.startswith(EKS_GPU_LABEL_PREFIX): + return get_k8s_accelerator_name(accelerator) + elif gpu_label.startswith(NVIDIA_GFD_GPU_LABEL_PREFIX): + raise NotImplementedError('On-prem GPU label not supported yet') + else: + raise NotImplementedError('GPU label not supported') + + + k8s_acc_label_key = detect_cluster_gpu_labels() + k8s_acc_label_value = get_gpu_label_value(acc_type, k8s_acc_label_key) + vars = { 'instance_type': resources.instance_type, 'custom_resources': custom_resources, @@ -312,6 +375,8 @@ def make_deploy_resources_variables( 'accelerator_count': str(acc_count), 'timeout': str(self.TIMEOUT), 'k8s_ssh_key_secret_name': self.SKY_SSH_KEY_SECRET_NAME, + 'k8s_acc_label_key': k8s_acc_label_key, + 'k8s_acc_label_value': k8s_acc_label_value, # TODO(romilb): Allow user to specify custom images 'image_id': self.IMAGE, } diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index c1a410c9860..cead671f7aa 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -133,6 +133,12 @@ available_node_types: restartPolicy: Never + # Add node selector if GPUs are requested: + {% if k8s_acc_label_key is not none and k8s_acc_label_value is not none %} + nodeSelector: + {{k8s_acc_label_key}}: {{k8s_acc_label_value}} + {% endif %} + # This volume allocates shared memory for Ray to use for its plasma # object store. If you do not provide this, Ray will fall back to # /tmp which cause slowdowns if is not a shared memory volume. From b3fcadcb197a21806ceee2a91b59def30e9a2f98 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 4 Aug 2023 17:32:35 +0530 Subject: [PATCH 108/183] TODO for auto-detection --- sky/clouds/kubernetes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index e5bfd63348e..9be69085110 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -327,6 +327,8 @@ def detect_cluster_gpu_labels(): node_labels.update(node.metadata.labels.keys()) # Check if the node labels contain any of the GPU label prefixes + # TODO(romilb): First read from config and if not configured, then + # do auto-detection. if any(label.startswith(GKE_GPU_LABEL_PREFIX) for label in node_labels): return GKE_GPU_LABEL_PREFIX elif any(label.startswith(EKS_GPU_LABEL_PREFIX) for label in node_labels): From 4a7d5d738c9c3d2e07962982e293b56be8bfafc9 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 4 Aug 2023 17:37:35 +0530 Subject: [PATCH 109/183] Add image toggling for CPU/GPU --- sky/clouds/kubernetes.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 9be69085110..904e37d5508 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -167,8 +167,10 @@ class Kubernetes(clouds.Cloud): # TODO(romilb): Add GPU Support - toggle between image depending on chosen # accelerator type. - IMAGE = 'us-central1-docker.pkg.dev/' \ - 'skypilot-375900/skypilotk8s/skypilot-gpu:latest' + IMAGE_CPU = ('us-central1-docker.pkg.dev/' + 'skypilot-375900/skypilotk8s/skypilot:latest') + IMAGE_GPU = ('us-central1-docker.pkg.dev/skypilot-375900/' + 'skypilotk8s/skypilot-gpu:latest') @classmethod def _cloud_unsupported_features( @@ -303,6 +305,9 @@ def make_deploy_resources_variables( acc_count = k.accelerator_count if k.accelerator_count else 0 acc_type = k.accelerator_type if k.accelerator_type else '' + # Select image based on whether we are using GPUs or not. + image = self.IMAGE_GPU if acc_count > 0 else self.IMAGE_CPU + GKE_GPU_LABEL_PREFIX = 'cloud.google.com/gke-accelerator' EKS_GPU_LABEL_PREFIX = 'k8s.amazonaws.com/accelerator' NVIDIA_GFD_GPU_LABEL_PREFIX = 'nvidia.com/gpu.product' @@ -380,7 +385,7 @@ def get_k8s_accelerator_name(accelerator: str): 'k8s_acc_label_key': k8s_acc_label_key, 'k8s_acc_label_value': k8s_acc_label_value, # TODO(romilb): Allow user to specify custom images - 'image_id': self.IMAGE, + 'image_id': image, } return vars From 85ee1e1bd3f8dc3fc9c3903fac57a70661dda90f Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 4 Aug 2023 17:39:11 +0530 Subject: [PATCH 110/183] Add image toggling for CPU/GPU --- sky/clouds/kubernetes.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 904e37d5508..111ae99439e 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -167,10 +167,10 @@ class Kubernetes(clouds.Cloud): # TODO(romilb): Add GPU Support - toggle between image depending on chosen # accelerator type. - IMAGE_CPU = ('us-central1-docker.pkg.dev/' - 'skypilot-375900/skypilotk8s/skypilot:latest') - IMAGE_GPU = ('us-central1-docker.pkg.dev/skypilot-375900/' - 'skypilotk8s/skypilot-gpu:latest') + IMAGE_CPU = 'us-central1-docker.pkg.dev/' \ + 'skypilot-375900/skypilotk8s/skypilot:latest' + IMAGE_GPU = 'us-central1-docker.pkg.dev/skypilot-375900/' \ + 'skypilotk8s/skypilot-gpu:latest' @classmethod def _cloud_unsupported_features( From d95438b4e4a69cd0cf0d70039d09180979903440 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 4 Aug 2023 17:51:37 +0530 Subject: [PATCH 111/183] Fix none acce_type --- sky/clouds/kubernetes.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 111ae99439e..4cf15b633d6 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -167,10 +167,10 @@ class Kubernetes(clouds.Cloud): # TODO(romilb): Add GPU Support - toggle between image depending on chosen # accelerator type. - IMAGE_CPU = 'us-central1-docker.pkg.dev/' \ - 'skypilot-375900/skypilotk8s/skypilot:latest' - IMAGE_GPU = 'us-central1-docker.pkg.dev/skypilot-375900/' \ - 'skypilotk8s/skypilot-gpu:latest' + IMAGE_CPU = ('us-central1-docker.pkg.dev/' + 'skypilot-375900/skypilotk8s/skypilot:latest') + IMAGE_GPU = ('us-central1-docker.pkg.dev/skypilot-375900/' + 'skypilotk8s/skypilot-gpu:latest') @classmethod def _cloud_unsupported_features( @@ -303,7 +303,7 @@ def make_deploy_resources_variables( mem = k.memory # Optionally populate accelerator information. acc_count = k.accelerator_count if k.accelerator_count else 0 - acc_type = k.accelerator_type if k.accelerator_type else '' + acc_type = k.accelerator_type if k.accelerator_type else None # Select image based on whether we are using GPUs or not. image = self.IMAGE_GPU if acc_count > 0 else self.IMAGE_CPU @@ -344,13 +344,15 @@ def detect_cluster_gpu_labels(): return None def get_gpu_label_value(accelerator: str, - gpu_label: str): + gpu_label: str) -> Optional[str]: # Returns the GPU string from SkyPilot accelerator string # to use as the value with the GPU label when specifying the nodeSelector. # For GKE, the GPU string is the GPU type (e.g. nvidia-tesla-t4) # For EKS, the GPU string is the GPU type (e.g. nvidia-tesla-t4) # For on-prem, the GPU string is the GPU product name (e.g. Tesla-T4 or A100-SXM4-40GB) + if not accelerator: + return accelerator def get_k8s_accelerator_name(accelerator: str): # Used by GKE and EKS if accelerator in ('A100-80GB', 'L4'): From 607ad8538d24da5fe9e5a571602428a51246bc55 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 7 Aug 2023 12:08:45 +0530 Subject: [PATCH 112/183] remove memory from j2 --- sky/templates/kubernetes-ray.yml.j2 | 29 +++++++---------------------- tests/kubernetes/README.md | 17 +++++++++++++++++ 2 files changed, 24 insertions(+), 22 deletions(-) diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index cead671f7aa..6b222339045 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -189,16 +189,8 @@ available_node_types: memory: {{memory}}G nvidia.com/gpu: {{accelerator_count}} limits: - # The maximum memory that this pod is allowed to use. The - # limit will be detected by ray and split to use 10% for - # redis, 30% for the shared memory object store, and the - # rest for application memory. If this limit is not set and - # the object store size is not set manually, ray will - # allocate a very large object store in each pod that may - # cause problems for other pods. - cpu: {{cpus}} - memory: {{memory}}G - nvidia.com/gpu: {{accelerator_count}} + cpu: {{cpus}} # CPUs limits are required for Ray CPU count initialization + nvidia.com/gpu: {{accelerator_count}} # Limits need to be defined for GPU requests ray_worker_default: # Minimum number of Ray workers of this Pod type. min_workers: {{num_nodes - 1}} @@ -260,16 +252,8 @@ available_node_types: memory: {{memory}}G nvidia.com/gpu: {{accelerator_count}} limits: - # The maximum memory that this pod is allowed to use. The - # limit will be detected by ray and split to use 10% for - # redis, 30% for the shared memory object store, and the - # rest for application memory. If this limit is not set and - # the object store size is not set manually, ray will - # allocate a very large object store in each pod that may - # cause problems for other pods. - cpu: {{cpus}} - memory: {{memory}}G - nvidia.com/gpu: {{accelerator_count}} + cpu: {{cpus}} # CPUs limits are required for Ray CPU count initialization + nvidia.com/gpu: {{accelerator_count}} # Limits need to be defined for GPU requests setup_commands: # Disable `unattended-upgrades` to prevent apt-get from hanging. It should be called at the beginning before the process started to avoid being blocked. (This is a temporary fix.) @@ -305,14 +289,15 @@ head_start_ray_commands: # NOTE: --disable-usage-stats in `ray start` saves 10 seconds of idle wait. # Line "which prlimit ..": increase the limit of the number of open files for the raylet process, as the `ulimit` may not take effect at this point, because it requires # all the sessions to be reloaded. This is a workaround. + # We manually set --object-store-memory=500000000 to avoid ray from allocating a very large object store in each pod that may cause problems for other pods. - ((ps aux | grep -v nohup | grep -v grep | grep -q -- "python3 -m sky.skylet.skylet") || nohup python3 -m sky.skylet.skylet >> ~/.sky/skylet.log 2>&1 &); - ray stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 ray start --disable-usage-stats --head --port={{ray_port}} --dashboard-port={{ray_dashboard_port}} --dashboard-host 0.0.0.0 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1; + ray stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 ray start --disable-usage-stats --head --port={{ray_port}} --dashboard-port={{ray_dashboard_port}} --dashboard-host 0.0.0.0 --object-manager-port=8076 --autoscaling-config=~/ray_bootstrap_config.yaml {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} --object-store-memory=500000000 || exit 1; which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; {{dump_port_command}}; {%- if num_nodes > 1 %} worker_start_ray_commands: - - ray stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 ray start --disable-usage-stats --address=$RAY_HEAD_IP:{{ray_port}} --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} || exit 1; + - ray stop; RAY_SCHEDULER_EVENTS=0 RAY_DEDUP_LOGS=0 ray start --disable-usage-stats --address=$RAY_HEAD_IP:{{ray_port}} --object-manager-port=8076 {{"--resources='%s'" % custom_resources if custom_resources}} --temp-dir {{ray_temp_dir}} --object-store-memory=500000000 || exit 1; which prlimit && for id in $(pgrep -f raylet/raylet); do sudo prlimit --nofile=1048576:1048576 --pid=$id || true; done; {%- else %} worker_start_ray_commands: [] diff --git a/tests/kubernetes/README.md b/tests/kubernetes/README.md index 8d8b300808a..1fbeba72cf2 100644 --- a/tests/kubernetes/README.md +++ b/tests/kubernetes/README.md @@ -45,9 +45,26 @@ and more. Create a GKE cluster using the cloud console. Use standard cluster, not autopilot. + +## Creating a GPU GKE Cluster from the command line + +This command will create a GKE cluster with 2 nodes - one with 1x T4 and another with 1x K80. + +``` +gcloud beta container --project "skypilot-375900" clusters create "testcluster" --zone "us-central1-c" --no-enable-basic-auth --cluster-version "1.27.2-gke.1200" --release-channel "regular" --machine-type "n1-standard-8" --accelerator "type=nvidia-tesla-t4,count=1" --image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "100" --metadata disable-legacy-endpoints=true --scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" --num-nodes "1" --logging=SYSTEM,WORKLOAD --monitoring=SYSTEM --enable-ip-alias --network "projects/skypilot-375900/global/networks/default" --subnetwork "projects/skypilot-375900/regions/us-central1/subnetworks/default" --no-enable-intra-node-visibility --default-max-pods-per-node "110" --security-posture=standard --workload-vulnerability-scanning=disabled --no-enable-master-authorized-networks --addons HorizontalPodAutoscaling,HttpLoadBalancing,GcePersistentDiskCsiDriver --enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 --enable-managed-prometheus --enable-shielded-nodes --node-locations "us-central1-c" && gcloud beta container --project "skypilot-375900" node-pools create "k80" --cluster "testcluster" --zone "us-central1-c" --machine-type "n1-standard-8" --accelerator "type=nvidia-tesla-k80,count=1" --image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "100" --metadata disable-legacy-endpoints=true --scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" --num-nodes "1" --enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 --node-locations "us-central1-c" +``` + +## Setup local kubectl +```bash +gcloud container clusters get-credentials testcluster --region us-central1-c +``` + ## Install nvidia drivers (if needed) If you're using GKE and running GKE < 1.27.2-gke.1200, you'll need to manually install nvidia drivers. ```bash +# For COS image: +kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/cos/daemonset-preloaded.yaml + # For ubuntu image: kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/ubuntu/daemonset-preloaded.yaml ``` From 6f702dab5bb9476371592c52b4ef980e2788a663 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 7 Aug 2023 13:07:43 +0530 Subject: [PATCH 113/183] Make resnet examples run again --- examples/resnet_app.py | 9 ++++- examples/resnet_app.yaml | 7 ++++ examples/resnet_app_storage.py | 54 ++++++++++++++++----------- examples/resnet_app_storage.yaml | 6 +++ examples/resnet_app_storage_spot.yaml | 6 +++ 5 files changed, 60 insertions(+), 22 deletions(-) diff --git a/examples/resnet_app.py b/examples/resnet_app.py index 9cd3276ea13..35ea60c08bd 100644 --- a/examples/resnet_app.py +++ b/examples/resnet_app.py @@ -4,7 +4,11 @@ # The working directory contains all code and will be synced to remote. workdir = '~/Downloads/tpu' -subprocess.run(f'cd {workdir} && git checkout 222cc86', shell=True, check=True) + +# Clone the repo locally to workdir +subprocess.run('git clone https://github.com/concretevitamin/tpu ' + f'{workdir} || true', shell=True, check=True) +subprocess.run(f'cd {workdir} && git checkout 9459fee', shell=True, check=True) # The setup command. Will be run under the working directory. setup = """\ @@ -18,6 +22,9 @@ conda install cudatoolkit=11.0 -y pip install tensorflow==2.4.0 pyyaml pip install protobuf==3.20 + mkdir -p $CONDA_PREFIX/etc/conda/activate.d + echo 'CUDNN_PATH=$(dirname $(python -c "import nvidia.cudnn;print(nvidia.cudnn.__file__)"))' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh + echo 'export LD_LIBRARY_PATH=$CONDA_PREFIX/lib/:$CUDNN_PATH/lib:$LD_LIBRARY_PATH' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh cd models && pip install -e . fi """ diff --git a/examples/resnet_app.yaml b/examples/resnet_app.yaml index b4dbce00521..cd81d667d3d 100644 --- a/examples/resnet_app.yaml +++ b/examples/resnet_app.yaml @@ -35,6 +35,13 @@ setup: | conda install cudatoolkit=11.0 -y pip install tensorflow==2.4.0 pyyaml pip install protobuf==3.20 + pip install nvidia-cudnn-cu11==8.6.0.163 + + # Automatically set CUDNN envvars when conda activate is run + mkdir -p $CONDA_PREFIX/etc/conda/activate.d + echo 'CUDNN_PATH=$(dirname $(python -c "import nvidia.cudnn;print(nvidia.cudnn.__file__)"))' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh + echo 'export LD_LIBRARY_PATH=$CONDA_PREFIX/lib/:$CUDNN_PATH/lib:$LD_LIBRARY_PATH' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh + cd models pip install -e . fi diff --git a/examples/resnet_app_storage.py b/examples/resnet_app_storage.py index 80ac5076070..0cf27d8dcd9 100644 --- a/examples/resnet_app_storage.py +++ b/examples/resnet_app_storage.py @@ -5,38 +5,50 @@ with sky.Dag() as dag: # The working directory contains all code and will be synced to remote. workdir = '~/Downloads/tpu' + data_mount_path = '/tmp/imagenet' - subprocess.run(f'cd {workdir} && git checkout 222cc86', - shell=True, + + # Clone the repo locally to workdir + subprocess.run('git clone https://github.com/concretevitamin/tpu ' + f'{workdir} || true', shell=True, check=True) + subprocess.run(f'cd {workdir} && git checkout 9459fee', shell=True, check=True) # The setup command. Will be run under the working directory. - setup = 'echo \"alias python=python3\" >> ~/.bashrc && \ - echo \"alias pip3=pip\" >> ~/.bashrc && \ - source ~/.bashrc && \ - pip install --upgrade pip && \ - pip install awscli botocore boto3 && \ - conda init bash && \ - conda activate resnet || \ - (conda create -n resnet python=3.7 -y && \ - conda activate resnet && \ - conda install cudatoolkit=11.0 -y && \ - pip install tensorflow==2.4.0 pyyaml && \ - cd models && pip install -e .)' + setup = """\ + set -e + pip install --upgrade pip + conda init bash + conda activate resnet && exists=1 || exists=0 + if [ $exists -eq 0 ]; then + conda create -n resnet python=3.7 -y + conda activate resnet + conda install cudatoolkit=11.0 -y + pip install tensorflow==2.4.0 pyyaml + pip install protobuf==3.20 + mkdir -p $CONDA_PREFIX/etc/conda/activate.d + echo 'CUDNN_PATH=$(dirname $(python -c "import nvidia.cudnn;print(nvidia.cudnn.__file__)"))' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh + echo 'export LD_LIBRARY_PATH=$CONDA_PREFIX/lib/:$CUDNN_PATH/lib:$LD_LIBRARY_PATH' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh + cd models && pip install -e . + fi + """ # The command to run. Will be run under the working directory. - run = f'conda activate resnet && \ + run = f"""\ + conda activate resnet + export XLA_FLAGS=\'--xla_gpu_cuda_data_dir=/usr/local/cuda/\' python -u models/official/resnet/resnet_main.py --use_tpu=False \ - --mode=train --train_batch_size=256 --train_steps=250 \ - --iterations_per_loop=125 \ - --data_dir={data_mount_path} \ - --model_dir=resnet-model-dir \ - --amp --xla --loss_scale=128' + --mode=train --train_batch_size=256 --train_steps=250 \ + --iterations_per_loop=125 \ + --data_dir={data_mount_path} \ + --model_dir=resnet-model-dir \ + --amp --xla --loss_scale=128 + """ # If the backend to be added is not specified, then SkyPilot's optimizer # will choose the backend bucket to be stored. # S3 Example - storage = sky.Storage(name="imagenet-bucket", source="s3://imagenet-bucket") + storage = sky.Storage(source="s3://imagenet-bucket") # GCS Example #storage = sky.Storage(name="imagenet_test_mluo",source="gs://imagenet_test_mluo") # Can also be from a local dir diff --git a/examples/resnet_app_storage.yaml b/examples/resnet_app_storage.yaml index 322a375c99d..7a3ddd81b57 100644 --- a/examples/resnet_app_storage.yaml +++ b/examples/resnet_app_storage.yaml @@ -31,6 +31,12 @@ setup: | conda activate resnet conda install cudatoolkit=11.0 -y pip install tensorflow==2.4.0 pyyaml + + # Automatically set CUDNN envvars when conda activate is run + mkdir -p $CONDA_PREFIX/etc/conda/activate.d + echo 'CUDNN_PATH=$(dirname $(python -c "import nvidia.cudnn;print(nvidia.cudnn.__file__)"))' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh + echo 'export LD_LIBRARY_PATH=$CONDA_PREFIX/lib/:$CUDNN_PATH/lib:$LD_LIBRARY_PATH' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh + cd models pip install -e . fi diff --git a/examples/resnet_app_storage_spot.yaml b/examples/resnet_app_storage_spot.yaml index ac09fd8d446..0d4a3fec840 100644 --- a/examples/resnet_app_storage_spot.yaml +++ b/examples/resnet_app_storage_spot.yaml @@ -29,6 +29,12 @@ setup: | conda activate resnet conda install cudatoolkit=11.0 -y pip install tensorflow==2.4.0 pyyaml + + # Automatically set CUDNN envvars when conda activate is run + mkdir -p $CONDA_PREFIX/etc/conda/activate.d + echo 'CUDNN_PATH=$(dirname $(python -c "import nvidia.cudnn;print(nvidia.cudnn.__file__)"))' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh + echo 'export LD_LIBRARY_PATH=$CONDA_PREFIX/lib/:$CUDNN_PATH/lib:$LD_LIBRARY_PATH' >> $CONDA_PREFIX/etc/conda/activate.d/env_vars.sh + cd models pip install -e . fi From 738ae1998622f8836997a081b3ecf29178a48297 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 7 Aug 2023 13:11:08 +0530 Subject: [PATCH 114/183] lint --- examples/resnet_app.py | 7 +++++-- examples/resnet_app_storage.py | 10 +++++++--- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/examples/resnet_app.py b/examples/resnet_app.py index 35ea60c08bd..17ebf9fa5d6 100644 --- a/examples/resnet_app.py +++ b/examples/resnet_app.py @@ -6,8 +6,11 @@ workdir = '~/Downloads/tpu' # Clone the repo locally to workdir -subprocess.run('git clone https://github.com/concretevitamin/tpu ' - f'{workdir} || true', shell=True, check=True) +subprocess.run( + 'git clone https://github.com/concretevitamin/tpu ' + f'{workdir} || true', + shell=True, + check=True) subprocess.run(f'cd {workdir} && git checkout 9459fee', shell=True, check=True) # The setup command. Will be run under the working directory. diff --git a/examples/resnet_app_storage.py b/examples/resnet_app_storage.py index 0cf27d8dcd9..9d8063ea6ab 100644 --- a/examples/resnet_app_storage.py +++ b/examples/resnet_app_storage.py @@ -9,9 +9,13 @@ data_mount_path = '/tmp/imagenet' # Clone the repo locally to workdir - subprocess.run('git clone https://github.com/concretevitamin/tpu ' - f'{workdir} || true', shell=True, check=True) - subprocess.run(f'cd {workdir} && git checkout 9459fee', shell=True, + subprocess.run( + 'git clone https://github.com/concretevitamin/tpu ' + f'{workdir} || true', + shell=True, + check=True) + subprocess.run(f'cd {workdir} && git checkout 9459fee', + shell=True, check=True) # The setup command. Will be run under the working directory. From c3420a8f6403a6391d36e2fbff7376a3a87c7f31 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 8 Aug 2023 21:20:09 +0530 Subject: [PATCH 115/183] v100 readme --- tests/kubernetes/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/kubernetes/README.md b/tests/kubernetes/README.md index 1fbeba72cf2..8863988223a 100644 --- a/tests/kubernetes/README.md +++ b/tests/kubernetes/README.md @@ -48,10 +48,10 @@ Create a GKE cluster using the cloud console. Use standard cluster, not autopilo ## Creating a GPU GKE Cluster from the command line -This command will create a GKE cluster with 2 nodes - one with 1x T4 and another with 1x K80. +This command will create a GKE cluster with 2 nodes - one with 1x T4 and another with 1x V100. ``` -gcloud beta container --project "skypilot-375900" clusters create "testcluster" --zone "us-central1-c" --no-enable-basic-auth --cluster-version "1.27.2-gke.1200" --release-channel "regular" --machine-type "n1-standard-8" --accelerator "type=nvidia-tesla-t4,count=1" --image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "100" --metadata disable-legacy-endpoints=true --scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" --num-nodes "1" --logging=SYSTEM,WORKLOAD --monitoring=SYSTEM --enable-ip-alias --network "projects/skypilot-375900/global/networks/default" --subnetwork "projects/skypilot-375900/regions/us-central1/subnetworks/default" --no-enable-intra-node-visibility --default-max-pods-per-node "110" --security-posture=standard --workload-vulnerability-scanning=disabled --no-enable-master-authorized-networks --addons HorizontalPodAutoscaling,HttpLoadBalancing,GcePersistentDiskCsiDriver --enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 --enable-managed-prometheus --enable-shielded-nodes --node-locations "us-central1-c" && gcloud beta container --project "skypilot-375900" node-pools create "k80" --cluster "testcluster" --zone "us-central1-c" --machine-type "n1-standard-8" --accelerator "type=nvidia-tesla-k80,count=1" --image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "100" --metadata disable-legacy-endpoints=true --scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" --num-nodes "1" --enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 --node-locations "us-central1-c" +gcloud beta container --project "skypilot-375900" clusters create "testcluster" --zone "us-central1-c" --no-enable-basic-auth --cluster-version "1.27.2-gke.1200" --release-channel "regular" --machine-type "n1-standard-8" --accelerator "type=nvidia-tesla-t4,count=1" --image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "100" --metadata disable-legacy-endpoints=true --scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" --num-nodes "1" --logging=SYSTEM,WORKLOAD --monitoring=SYSTEM --enable-ip-alias --network "projects/skypilot-375900/global/networks/default" --subnetwork "projects/skypilot-375900/regions/us-central1/subnetworks/default" --no-enable-intra-node-visibility --default-max-pods-per-node "110" --security-posture=standard --workload-vulnerability-scanning=disabled --no-enable-master-authorized-networks --addons HorizontalPodAutoscaling,HttpLoadBalancing,GcePersistentDiskCsiDriver --enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 --enable-managed-prometheus --enable-shielded-nodes --node-locations "us-central1-c" && gcloud beta container --project "skypilot-375900" node-pools create "v100" --cluster "testcluster" --zone "us-central1-c" --machine-type "n1-standard-8" --accelerator "type=nvidia-tesla-v100,count=1" --image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "100" --metadata disable-legacy-endpoints=true --scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" --num-nodes "1" --enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 --node-locations "us-central1-c" ``` ## Setup local kubectl From c87c64df8058f054627ad7a542ba7c91fa89084b Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 9 Aug 2023 09:46:13 +0530 Subject: [PATCH 116/183] dockerfile and smoketest --- Dockerfile_k8s_gpu | 2 +- tests/test_smoke.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Dockerfile_k8s_gpu b/Dockerfile_k8s_gpu index 2f4d8c3593b..0908d320cff 100644 --- a/Dockerfile_k8s_gpu +++ b/Dockerfile_k8s_gpu @@ -5,7 +5,7 @@ FROM rayproject/ray:2.4.0-gpu # Initialize conda for root user, install ssh and other local dependencies RUN sudo apt update -y && \ - sudo apt install gcc rsync sudo patch openssh-server pciutils nano fuse -y && \ + sudo apt install gcc rsync sudo patch openssh-server pciutils nano fuse unzip -y && \ sudo rm -rf /var/lib/apt/lists/* && \ sudo apt remove -y python3 && \ conda init diff --git a/tests/test_smoke.py b/tests/test_smoke.py index cdd7f0032e3..012ec58d2c9 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1259,7 +1259,6 @@ def test_multi_echo(generic_cloud: str): @pytest.mark.no_lambda_cloud # Lambda Cloud does not have V100 gpus @pytest.mark.no_ibm # IBM cloud currently doesn't provide public image with CUDA @pytest.mark.no_scp # SCP does not have V100 (16GB) GPUs. Run test_scp_huggingface instead. -@pytest.mark.no_kubernetes # Kubernetes not have gpus def test_huggingface(generic_cloud: str): name = _get_cluster_name() test = Test( From 85f2b9e5ba00b3a8bada62ede883ad9fb97a6164 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 13 Aug 2023 07:33:43 +0530 Subject: [PATCH 117/183] fractional cpu and mem --- sky/clouds/kubernetes.py | 194 ++++++++++++++-------- sky/skylet/providers/kubernetes/config.py | 3 +- sky/templates/kubernetes-ray.yml.j2 | 8 +- 3 files changed, 126 insertions(+), 79 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 4cf15b633d6..d891a18e0f6 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -1,15 +1,17 @@ """Kubernetes.""" import json +import math import os import re import typing -from typing import Dict, Iterator, List, Optional, Tuple +from typing import Dict, Iterator, List, Optional, Set, Tuple, Union from sky import clouds from sky import exceptions from sky import status_lib from sky.adaptors import kubernetes from sky.utils import common_utils +from sky.utils import env_options from sky.utils import ux_utils from sky.skylet.providers.kubernetes import utils as kubernetes_utils @@ -38,6 +40,8 @@ class KubernetesInstanceType: appending "--{a}{type}" where a is the number of accelerators and type is the accelerator type. + CPU and memory can be specified as floats. Accelerator count must be int. + Examples: - 4CPU--16GB - 0.5CPU--1.5GB @@ -47,7 +51,7 @@ class KubernetesInstanceType: def __init__(self, cpus: Optional[float] = None, memory: Optional[float] = None, - accelerator_count: Optional[float] = None, + accelerator_count: Optional[int] = None, accelerator_type: Optional[str] = None): self.cpus = cpus self.memory = memory @@ -57,7 +61,8 @@ def __init__(self, @property def name(self) -> str: """Returns the name of the instance.""" - name = f'{self.cpus}CPU--{self.memory}GB' + name = (f'{self._format_count(self.cpus)}CPU--' + f'{self._format_count(self.memory)}GB') if self.accelerator_count: name += f'--{self.accelerator_count}{self.accelerator_type}' return name @@ -84,7 +89,7 @@ def _parse_instance_type( accelerator_count = match.group('accelerator_count') accelerator_type = match.group('accelerator_type') if accelerator_count: - accelerator_count = float(accelerator_count) + accelerator_count = int(accelerator_count) accelerator_type = str(accelerator_type) else: accelerator_count = None @@ -111,8 +116,14 @@ def from_resources(cls, memory: float, accelerator_count: float = 0, accelerator_type: str = '') -> 'KubernetesInstanceType': - """Returns an instance name object from the given resources.""" + """Returns an instance name object from the given resources. + + If accelerator_count is not an int, it will be rounded up since GPU + requests in Kubernetes must be int. + """ name = f'{cpus}CPU--{memory}GB' + # Round up accelerator_count if it is not an int. + accelerator_count = math.ceil(accelerator_count) if accelerator_count > 0: name += f'--{accelerator_count}{accelerator_type}' return cls(cpus=cpus, @@ -123,6 +134,14 @@ def from_resources(cls, def __str__(self): return self.name + @classmethod + def _format_count(cls, num: Union[float, int]) -> str: + """Formats a float to not show decimal point if it is a whole number""" + if isinstance(num, int): + return str(num) + return '{:.0f}'.format(num) if num.is_integer() else '{:.1f}'.format( + num) + @clouds.CLOUD_REGISTRY.register class Kubernetes(clouds.Cloud): @@ -228,18 +247,14 @@ def get_default_instance_type( memory: Optional[str] = None, disk_tier: Optional[str] = None) -> Optional[str]: del disk_tier # Unused. - # TODO(romilb): Allow fractional CPUs and memory # TODO(romilb): We should check the maximum number of CPUs and memory # that can be requested, and return None if the requested resources # exceed the maximum. This may require thought about how to handle # autoscaling clusters. # We strip '+' from resource requests since Kubernetes can provision # exactly the requested resources. - instance_cpus = int( - cpus.strip('+')) if cpus is not None else cls._DEFAULT_NUM_VCPUS - instance_mem = int( - memory.strip('+') - ) if memory is not None else \ + instance_cpus = float(cpus.strip('+')) if cpus is not None else cls._DEFAULT_NUM_VCPUS + instance_mem = float(memory.strip('+')) if memory is not None else \ instance_cpus * cls._DEFAULT_MEMORY_CPU_RATIO virtual_instance_type = KubernetesInstanceType(instance_cpus, instance_mem).name @@ -250,7 +265,6 @@ def get_accelerators_from_instance_type( cls, instance_type: str, ) -> Optional[Dict[str, int]]: - # TODO(romilb): Add GPU support. inst = KubernetesInstanceType.from_instance_type(instance_type) return { inst.accelerator_type: inst.accelerator_count @@ -308,51 +322,23 @@ def make_deploy_resources_variables( # Select image based on whether we are using GPUs or not. image = self.IMAGE_GPU if acc_count > 0 else self.IMAGE_CPU - GKE_GPU_LABEL_PREFIX = 'cloud.google.com/gke-accelerator' - EKS_GPU_LABEL_PREFIX = 'k8s.amazonaws.com/accelerator' - NVIDIA_GFD_GPU_LABEL_PREFIX = 'nvidia.com/gpu.product' - - def detect_cluster_gpu_labels(): - # Detects and returns the node labels for identifying GPU type - # available on the node. This varies for GKE, EKS and on-prem - # (Nvidia GPU Operator). - # For GKE, the node labels are: - # cloud.google.com/gke-accelerator: nvidia-tesla-t4 - # cloud.google.com/gke-accelerator-count: 1 - # cloud.google.com/gke-accelerator-type: NVIDIA_TESLA_T4 - # For EKS, the node labels are: - # k8s.amazonaws.com/accelerator: nvidia-tesla-t4 - # For on-prem, the node labels are: - # nvidia.com/gpu.product: Tesla T4 - - # Get the set of labels across all nodes - # TODO(romilb): This is not efficient. We should cache the node labels - node_labels = set() - for node in kubernetes.core_api().list_node().items: - node_labels.update(node.metadata.labels.keys()) - - # Check if the node labels contain any of the GPU label prefixes - # TODO(romilb): First read from config and if not configured, then - # do auto-detection. - if any(label.startswith(GKE_GPU_LABEL_PREFIX) for label in node_labels): - return GKE_GPU_LABEL_PREFIX - elif any(label.startswith(EKS_GPU_LABEL_PREFIX) for label in node_labels): - return EKS_GPU_LABEL_PREFIX - elif any(label.startswith(NVIDIA_GFD_GPU_LABEL_PREFIX) for label in node_labels): - return NVIDIA_GFD_GPU_LABEL_PREFIX - else: - return None - - def get_gpu_label_value(accelerator: str, - gpu_label: str) -> Optional[str]: - # Returns the GPU string from SkyPilot accelerator string - # to use as the value with the GPU label when specifying the nodeSelector. - # For GKE, the GPU string is the GPU type (e.g. nvidia-tesla-t4) - # For EKS, the GPU string is the GPU type (e.g. nvidia-tesla-t4) - # For on-prem, the GPU string is the GPU product name (e.g. Tesla-T4 or A100-SXM4-40GB) - - if not accelerator: - return accelerator + # ==================== # + # If GPU, set up GPU env + # ==================== # + + k8s_acc_label_key = None + k8s_acc_label_value = None + + if acc_count > 0: + class GPULabelFormatter: + @classmethod + def get_label_key(cls) -> str: + raise NotImplementedError + + @classmethod + def get_label_value(cls, accelerator: str) -> str: + raise NotImplementedError + def get_k8s_accelerator_name(accelerator: str): # Used by GKE and EKS if accelerator in ('A100-80GB', 'L4'): @@ -362,19 +348,81 @@ def get_k8s_accelerator_name(accelerator: str): return 'nvidia-tesla-{}'.format( accelerator.lower()) - if gpu_label.startswith(GKE_GPU_LABEL_PREFIX): - return get_k8s_accelerator_name(accelerator) - elif gpu_label.startswith(EKS_GPU_LABEL_PREFIX): - return get_k8s_accelerator_name(accelerator) - elif gpu_label.startswith(NVIDIA_GFD_GPU_LABEL_PREFIX): - raise NotImplementedError('On-prem GPU label not supported yet') - else: - raise NotImplementedError('GPU label not supported') - - - k8s_acc_label_key = detect_cluster_gpu_labels() - k8s_acc_label_value = get_gpu_label_value(acc_type, k8s_acc_label_key) - + class GKELabelFormatter(GPULabelFormatter): + @classmethod + def get_label_key(cls) -> str: + return 'cloud.google.com/gke-accelerator' + + @classmethod + def get_label_value(cls, accelerator: str) -> str: + return get_k8s_accelerator_name(accelerator) + + class EKSLabelFormatter(GPULabelFormatter): + @classmethod + def get_label_key(cls) -> str: + return 'k8s.amazonaws.com/accelerator' + + @classmethod + def get_label_value(cls, accelerator: str) -> str: + return get_k8s_accelerator_name(accelerator) + + class SkyPilotLabelFormatter(GPULabelFormatter): + @classmethod + def get_label_key(cls) -> str: + return 'skypilot.co/accelerator' + + @classmethod + def get_label_value(cls, accelerator: str) -> str: + return get_k8s_accelerator_name(accelerator) + + class NvidiaGFDLabelFormatter(GPULabelFormatter): + @classmethod + def get_label_key(cls) -> str: + return 'nvidia.com/gpu.product' + + @classmethod + def get_label_value(cls, accelerator: str) -> str: + raise NotImplementedError + + # has to be in order + LABEL_FORMATTER_REGISTRY = [SkyPilotLabelFormatter, GKELabelFormatter, EKSLabelFormatter, NvidiaGFDLabelFormatter] + + def detect_gpu_label_formatter() -> [Optional[GPULabelFormatter], + Set[str]]: + # Get the set of labels across all nodes + # TODO(romilb): This is not efficient. We should cache the node labels + node_labels = set() + for node in kubernetes.core_api().list_node().items: + node_labels.update(node.metadata.labels.keys()) + + # Check if the node labels contain any of the GPU label prefixes + for label_formatter in LABEL_FORMATTER_REGISTRY: + if label_formatter.get_label_key() in node_labels: + return label_formatter, node_labels + return None, node_labels + + label_formatter, node_labels = detect_gpu_label_formatter() + if label_formatter is None: + # TODO(romilb): This will fail early for autoscaling clusters. + # For AS clusters, we may need a way for users to specify the + # GPULabelFormatter to use since the cluster may be scaling up + # from zero nodes and may not have any GPU nodes yet. + with ux_utils.print_exception_no_traceback(): + suffix = '' + if env_options.Options.SHOW_DEBUG_INFO.get(): + suffix = ' Found node labels: {}'.format(node_labels) + raise KeyError( + 'Could not detect GPU labels in Kubernetes cluster. ' + 'Please ensure at least one node in the cluster has ' + 'node labels of the format ' + f'{SkyPilotLabelFormatter.get_label_key()}, ' + f'{GKELabelFormatter.get_label_key()} or ' + f'{EKSLabelFormatter.get_label_key()}. Please refer to ' + ' the documentation on how to set up node labels.' + f'{suffix}') + + k8s_acc_label_key = label_formatter.get_label_key() + k8s_acc_label_value = label_formatter.get_label_value(acc_type) vars = { 'instance_type': resources.instance_type, 'custom_resources': custom_resources, @@ -430,9 +478,9 @@ def _make(instance_list): # TODO(romilb): Add GPU support. acc_type, acc_count = list(accelerators.items())[0] default_inst = KubernetesInstanceType.from_instance_type(default_instance_type) - instance_type = KubernetesInstanceType.from_resources(int(default_inst.cpus), - int(default_inst.memory), - int(acc_count), + instance_type = KubernetesInstanceType.from_resources(default_inst.cpus, + default_inst.memory, + acc_count, acc_type).name # No fuzzy lists for Kubernetes return _make([instance_type]), [] diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index d684c4b4617..b60d5b612de 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -115,9 +115,8 @@ def get_autodetected_resources(container_data): for resource_name in ['cpu', 'gpu'] } - # TODO(romilb): Update this to allow fractional resources. memory_limits = get_resource(container_resources, 'memory') - node_type_resources['memory'] = int(memory_limits) + node_type_resources['memory'] = memory_limits return node_type_resources diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 6b222339045..fe799972d8d 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -177,7 +177,7 @@ available_node_types: name: dshm - mountPath: /dev/fuse # Required for FUSE mounting name: dev-fuse - securityContext: # Required for FUSE mounting, but may be a security risk + securityContext: # Required for FUSE mounting. TODO(romilb): See if we can grant a reduced set of privileges. privileged: true lifecycle: postStart: @@ -189,7 +189,7 @@ available_node_types: memory: {{memory}}G nvidia.com/gpu: {{accelerator_count}} limits: - cpu: {{cpus}} # CPUs limits are required for Ray CPU count initialization + cpu: {{cpus}} # TODO(romilb): Ideally, we should not specify CPU limits here to allow jobs to use idle CPU cycles. However, CPUs limits are required for Ray CPU count initialization. nvidia.com/gpu: {{accelerator_count}} # Limits need to be defined for GPU requests ray_worker_default: # Minimum number of Ray workers of this Pod type. @@ -244,7 +244,7 @@ available_node_types: name: dshm - mountPath: /dev/fuse # Required for fuse mounting name: dev-fuse - securityContext: # Required for FUSE mounting. TODO(romilb) - evaluate security risk + securityContext: # Required for FUSE mounting. TODO(romilb): See if we can grant a reduced set of privileges. privileged: true resources: requests: @@ -252,7 +252,7 @@ available_node_types: memory: {{memory}}G nvidia.com/gpu: {{accelerator_count}} limits: - cpu: {{cpus}} # CPUs limits are required for Ray CPU count initialization + cpu: {{cpus}} # TODO(romilb): Ideally, we should not specify CPU limits here to allow jobs to use idle CPU cycles. However, CPUs limits are required for Ray CPU count initialization. nvidia.com/gpu: {{accelerator_count}} # Limits need to be defined for GPU requests setup_commands: From 509fd9626738af5e84d13857d9a52c563b95a786 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 13 Aug 2023 07:56:16 +0530 Subject: [PATCH 118/183] nits --- sky/clouds/kubernetes.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index d891a18e0f6..6f50ed916e7 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -184,8 +184,6 @@ class Kubernetes(clouds.Cloud): 'Kubernetes.', } - # TODO(romilb): Add GPU Support - toggle between image depending on chosen - # accelerator type. IMAGE_CPU = ('us-central1-docker.pkg.dev/' 'skypilot-375900/skypilotk8s/skypilot:latest') IMAGE_GPU = ('us-central1-docker.pkg.dev/skypilot-375900/' @@ -351,7 +349,7 @@ def get_k8s_accelerator_name(accelerator: str): class GKELabelFormatter(GPULabelFormatter): @classmethod def get_label_key(cls) -> str: - return 'cloud.google.com/gke-accelerator' + return 'mycloud.google.com/gke-accelerator' @classmethod def get_label_value(cls, accelerator: str) -> str: @@ -418,7 +416,7 @@ def detect_gpu_label_formatter() -> [Optional[GPULabelFormatter], f'{SkyPilotLabelFormatter.get_label_key()}, ' f'{GKELabelFormatter.get_label_key()} or ' f'{EKSLabelFormatter.get_label_key()}. Please refer to ' - ' the documentation on how to set up node labels.' + 'the documentation on how to set up node labels.' f'{suffix}') k8s_acc_label_key = label_formatter.get_label_key() @@ -474,8 +472,7 @@ def _make(instance_list): return _make([default_instance_type]), [] assert len(accelerators) == 1, resources - # If GPUs are requested, return an empty list. - # TODO(romilb): Add GPU support. + # GPUs requested - build instance type. acc_type, acc_count = list(accelerators.items())[0] default_inst = KubernetesInstanceType.from_instance_type(default_instance_type) instance_type = KubernetesInstanceType.from_resources(default_inst.cpus, From 22b1d17b630620197b79c7679138fbb40f1f3385 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 13 Aug 2023 22:10:15 +0530 Subject: [PATCH 119/183] refactor utils --- sky/clouds/kubernetes.py | 61 +----------------- sky/skylet/providers/kubernetes/__init__.py | 1 - sky/skylet/providers/kubernetes/config.py | 16 ++++- .../providers/kubernetes/node_provider.py | 7 +- sky/templates/kubernetes-ray.yml.j2 | 2 - sky/utils/__init__.py | 1 - .../utils.py => utils/kubernetes_utils.py} | 64 +++++++++++++++++++ 7 files changed, 82 insertions(+), 70 deletions(-) rename sky/{skylet/providers/kubernetes/utils.py => utils/kubernetes_utils.py} (66%) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 6f50ed916e7..68b5c20f0f6 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -12,8 +12,8 @@ from sky.adaptors import kubernetes from sky.utils import common_utils from sky.utils import env_options +from sky.utils import kubernetes_utils from sky.utils import ux_utils -from sky.skylet.providers.kubernetes import utils as kubernetes_utils if typing.TYPE_CHECKING: # Renaming to avoid shadowing variables. @@ -328,63 +328,6 @@ def make_deploy_resources_variables( k8s_acc_label_value = None if acc_count > 0: - class GPULabelFormatter: - @classmethod - def get_label_key(cls) -> str: - raise NotImplementedError - - @classmethod - def get_label_value(cls, accelerator: str) -> str: - raise NotImplementedError - - def get_k8s_accelerator_name(accelerator: str): - # Used by GKE and EKS - if accelerator in ('A100-80GB', 'L4'): - # A100-80GB and L4 have a different name pattern. - return 'nvidia-{}'.format(accelerator.lower()) - else: - return 'nvidia-tesla-{}'.format( - accelerator.lower()) - - class GKELabelFormatter(GPULabelFormatter): - @classmethod - def get_label_key(cls) -> str: - return 'mycloud.google.com/gke-accelerator' - - @classmethod - def get_label_value(cls, accelerator: str) -> str: - return get_k8s_accelerator_name(accelerator) - - class EKSLabelFormatter(GPULabelFormatter): - @classmethod - def get_label_key(cls) -> str: - return 'k8s.amazonaws.com/accelerator' - - @classmethod - def get_label_value(cls, accelerator: str) -> str: - return get_k8s_accelerator_name(accelerator) - - class SkyPilotLabelFormatter(GPULabelFormatter): - @classmethod - def get_label_key(cls) -> str: - return 'skypilot.co/accelerator' - - @classmethod - def get_label_value(cls, accelerator: str) -> str: - return get_k8s_accelerator_name(accelerator) - - class NvidiaGFDLabelFormatter(GPULabelFormatter): - @classmethod - def get_label_key(cls) -> str: - return 'nvidia.com/gpu.product' - - @classmethod - def get_label_value(cls, accelerator: str) -> str: - raise NotImplementedError - - # has to be in order - LABEL_FORMATTER_REGISTRY = [SkyPilotLabelFormatter, GKELabelFormatter, EKSLabelFormatter, NvidiaGFDLabelFormatter] - def detect_gpu_label_formatter() -> [Optional[GPULabelFormatter], Set[str]]: # Get the set of labels across all nodes @@ -409,7 +352,7 @@ def detect_gpu_label_formatter() -> [Optional[GPULabelFormatter], suffix = '' if env_options.Options.SHOW_DEBUG_INFO.get(): suffix = ' Found node labels: {}'.format(node_labels) - raise KeyError( + raise exceptions.ResourcesUnavailableError( 'Could not detect GPU labels in Kubernetes cluster. ' 'Please ensure at least one node in the cluster has ' 'node labels of the format ' diff --git a/sky/skylet/providers/kubernetes/__init__.py b/sky/skylet/providers/kubernetes/__init__.py index b09a3fe4183..0bb7afaea81 100644 --- a/sky/skylet/providers/kubernetes/__init__.py +++ b/sky/skylet/providers/kubernetes/__init__.py @@ -1,2 +1 @@ -from sky.skylet.providers.kubernetes.utils import get_head_ssh_port, get_port from sky.skylet.providers.kubernetes.node_provider import KubernetesNodeProvider diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index b60d5b612de..e861f88b9ca 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -4,7 +4,7 @@ import re from sky.adaptors import kubernetes -from sky.skylet.providers.kubernetes import utils +from sky.utils import kubernetes_utils logger = logging.getLogger(__name__) @@ -59,7 +59,7 @@ def not_provided_msg(resource_type): def bootstrap_kubernetes(config): - namespace = utils.get_current_kube_config_context_namespace() + namespace = kubernetes_utils.get_current_kube_config_context_namespace() _configure_services(namespace, config['provider']) @@ -122,11 +122,21 @@ def get_autodetected_resources(container_data): def get_resource(container_resources, resource_name): + request = _get_resource(container_resources, + resource_name, + field_name='requests') limit = _get_resource(container_resources, resource_name, field_name='limits') + # Use request if limit is not set, else use limit. # float('inf') means there's no limit set - return 0 if limit == float('inf') else int(limit) + res_count = request if limit == float('inf') else limit + # Convert to int since Ray autoscaler expects int. + # Cap the minimum resource to 1 because if resource count is set to 0, + # (e.g., when request=0.5), ray will not be able to schedule any tasks. + # We also round up the resource count to the nearest integer to provide the + # user at least the amount of resource they requested. + return max(1, math.ceil(res_count)) def _get_resource(container_resources, resource_name, field_name): diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 3ab8414b2d2..4749c9d19b2 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -7,8 +7,7 @@ from sky.adaptors import kubernetes from sky.skylet.providers.kubernetes import config -from sky.skylet.providers.kubernetes import get_head_ssh_port -from sky.skylet.providers.kubernetes import utils +from sky.utils import kubernetes_utils from ray.autoscaler._private.command_runner import SSHCommandRunner from ray.autoscaler.node_provider import NodeProvider from ray.autoscaler.tags import NODE_KIND_HEAD, TAG_RAY_CLUSTER_NAME, TAG_RAY_NODE_KIND @@ -50,7 +49,7 @@ def __init__(self, provider_config, cluster_name): self.cluster_name = cluster_name # Kubernetes namespace to user - self.namespace = utils.get_current_kube_config_context_namespace() + self.namespace = kubernetes_utils.get_current_kube_config_context_namespace() # Timeout for resource provisioning. If it takes longer than this # timeout, the resource provisioning will be considered failed. @@ -116,7 +115,7 @@ def external_port(self, node_id): # TODO(romilb): Implement caching here for performance. # TODO(romilb): Multi-node would need more handling here. cluster_name = node_id.split('-ray-head')[0] - return get_head_ssh_port(cluster_name, self.namespace) + return kubernetes_utils.get_head_ssh_port(cluster_name, self.namespace) def internal_ip(self, node_id): pod = kubernetes.core_api().read_namespaced_pod(node_id, self.namespace) diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index fe799972d8d..a8e3ccb257c 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -189,7 +189,6 @@ available_node_types: memory: {{memory}}G nvidia.com/gpu: {{accelerator_count}} limits: - cpu: {{cpus}} # TODO(romilb): Ideally, we should not specify CPU limits here to allow jobs to use idle CPU cycles. However, CPUs limits are required for Ray CPU count initialization. nvidia.com/gpu: {{accelerator_count}} # Limits need to be defined for GPU requests ray_worker_default: # Minimum number of Ray workers of this Pod type. @@ -252,7 +251,6 @@ available_node_types: memory: {{memory}}G nvidia.com/gpu: {{accelerator_count}} limits: - cpu: {{cpus}} # TODO(romilb): Ideally, we should not specify CPU limits here to allow jobs to use idle CPU cycles. However, CPUs limits are required for Ray CPU count initialization. nvidia.com/gpu: {{accelerator_count}} # Limits need to be defined for GPU requests setup_commands: diff --git a/sky/utils/__init__.py b/sky/utils/__init__.py index eff27bdd65b..f5d3dc7a34a 100644 --- a/sky/utils/__init__.py +++ b/sky/utils/__init__.py @@ -1,2 +1 @@ """Utility functions.""" -from sky.skylet.providers.kubernetes import utils as kubernetes_utils diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/utils/kubernetes_utils.py similarity index 66% rename from sky/skylet/providers/kubernetes/utils.py rename to sky/utils/kubernetes_utils.py index 60bc99d0050..eae214461a9 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/utils/kubernetes_utils.py @@ -6,6 +6,70 @@ DEFAULT_NAMESPACE = 'default' +class GPULabelFormatter: + @classmethod + def get_label_key(cls) -> str: + raise NotImplementedError + + @classmethod + def get_label_value(cls, accelerator: str) -> str: + raise NotImplementedError + + +def get_k8s_accelerator_name(accelerator: str): + # Used by GKE and EKS + if accelerator in ('A100-80GB', 'L4'): + # A100-80GB and L4 have a different name pattern. + return 'nvidia-{}'.format(accelerator.lower()) + else: + return 'nvidia-tesla-{}'.format( + accelerator.lower()) + + +class GKELabelFormatter(GPULabelFormatter): + @classmethod + def get_label_key(cls) -> str: + return 'cloud.google.com/gke-accelerator' + + @classmethod + def get_label_value(cls, accelerator: str) -> str: + return get_k8s_accelerator_name(accelerator) + + +class EKSLabelFormatter(GPULabelFormatter): + @classmethod + def get_label_key(cls) -> str: + return 'k8s.amazonaws.com/accelerator' + + @classmethod + def get_label_value(cls, accelerator: str) -> str: + return get_k8s_accelerator_name(accelerator) + + +class SkyPilotLabelFormatter(GPULabelFormatter): + @classmethod + def get_label_key(cls) -> str: + return 'skypilot.co/accelerator' + + @classmethod + def get_label_value(cls, accelerator: str) -> str: + return get_k8s_accelerator_name(accelerator) + + +class NvidiaGFDLabelFormatter(GPULabelFormatter): + @classmethod + def get_label_key(cls) -> str: + return 'nvidia.com/gpu.product' + + @classmethod + def get_label_value(cls, accelerator: str) -> str: + raise NotImplementedError + + +# has to be in order +LABEL_FORMATTER_REGISTRY = [SkyPilotLabelFormatter, GKELabelFormatter, + EKSLabelFormatter, NvidiaGFDLabelFormatter] + def get_head_ssh_port(cluster_name: str, namespace: str) -> int: svc_name = f'{cluster_name}-ray-head-ssh' return get_port(svc_name, namespace) From 552481c98be682e1d860fcacbbba48377a66bd2d Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 13 Aug 2023 23:24:26 +0530 Subject: [PATCH 120/183] lint and cleanup --- sky/backends/backend_utils.py | 13 ++- sky/backends/cloud_vm_ray_backend.py | 9 +- sky/clouds/kubernetes.py | 97 +++++++++---------- .../providers/kubernetes/node_provider.py | 3 +- sky/utils/kubernetes_utils.py | 52 ++++++---- tests/test_smoke.py | 7 +- 6 files changed, 97 insertions(+), 84 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 4b71ca4ac44..e00b8899496 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -833,15 +833,18 @@ def write_cluster_config( Raises: exceptions.ResourcesUnavailableError: if the region/zones requested does not appear in the catalog, or an ssh_proxy_command is specified but - not for the given region. + not for the given region, or GPUs are requested in a Kubernetes + cluster but the cluster does not have nodes labeled with GPU types. """ # task.best_resources may not be equal to to_provision if the user # is running a job with less resources than the cluster has. cloud = to_provision.cloud - # This can raise a ResourcesUnavailableError, when the region/zones - # requested does not appear in the catalog. It can be triggered when the - # user changed the catalog file, while there is a cluster in the removed - # region/zone. + # This can raise a ResourcesUnavailableError when: + # * The region/zones requested does not appear in the catalog. It can be + # triggered if the user changed the catalog file while there is a cluster + # in the removed region/zone. + # * GPUs are requested in a Kubernetes cluster but the cluster does not + # have nodes labeled with GPU types. # # TODO(zhwu): We should change the exception type to a more specific one, as # the ResourcesUnavailableError is overly used. Also, it would be better to diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index c5675d02dcc..a6df5bdaccf 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -1495,9 +1495,10 @@ def _retry_zones( dryrun=dryrun, keep_launch_fields_in_existing_config=cluster_exists) except exceptions.ResourcesUnavailableError as e: - # Failed due to catalog issue, e.g. image not found. - logger.info( - f'Failed to find catalog in region {region.name}: {e}') + # Failed due to catalog issue, e.g. image not found, or + # GPUs are requested in a Kubernetes cluster but the cluster + # does not have nodes labeled with GPU types. + logger.info(f'{e}') continue if dryrun: return config_dict @@ -2824,7 +2825,7 @@ def _sync_file_mounts( def _update_envs_for_k8s(self, handle: CloudVmRayResourceHandle, task: task_lib.Task) -> None: - """Update envs for a task with Kubernetes specific env vars if cloud is Kubernetes.""" + """Update envs with env vars from Kubernetes if cloud is Kubernetes.""" if isinstance(handle.launched_resources.cloud, clouds.Kubernetes): temp_envs = copy.deepcopy(task.envs) cloud_env_vars = handle.launched_resources.cloud.query_env_vars( diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 68b5c20f0f6..504e89b55da 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -4,7 +4,7 @@ import os import re import typing -from typing import Dict, Iterator, List, Optional, Set, Tuple, Union +from typing import Dict, Iterator, List, Optional, Tuple, Union from sky import clouds from sky import exceptions @@ -49,8 +49,8 @@ class KubernetesInstanceType: """ def __init__(self, - cpus: Optional[float] = None, - memory: Optional[float] = None, + cpus: float, + memory: float, accelerator_count: Optional[int] = None, accelerator_type: Optional[str] = None): self.cpus = cpus @@ -61,7 +61,9 @@ def __init__(self, @property def name(self) -> str: """Returns the name of the instance.""" - name = (f'{self._format_count(self.cpus)}CPU--' + assert self.cpus is not None + assert self.memory is not None + name = (f'{self._format_count(self.cpus)}CPU--' f'{self._format_count(self.memory)}GB') if self.accelerator_count: name += f'--{self.accelerator_count}{self.accelerator_type}' @@ -76,7 +78,7 @@ def is_valid_instance_type(name: str) -> bool: @classmethod def _parse_instance_type( cls, - name: str) -> Tuple[float, float, Optional[float], Optional[str]]: + name: str) -> Tuple[float, float, Optional[int], Optional[str]]: """Returns the cpus, memory, accelerator_count, and accelerator_type from the given name.""" pattern = re.compile( @@ -114,7 +116,7 @@ def from_instance_type(cls, name: str) -> 'KubernetesInstanceType': def from_resources(cls, cpus: float, memory: float, - accelerator_count: float = 0, + accelerator_count: int = 0, accelerator_type: str = '') -> 'KubernetesInstanceType': """Returns an instance name object from the given resources. @@ -239,11 +241,10 @@ def get_port(cls, svc_name) -> int: return kubernetes_utils.get_port(svc_name, ns) @classmethod - def get_default_instance_type( - cls, - cpus: Optional[str] = None, - memory: Optional[str] = None, - disk_tier: Optional[str] = None) -> Optional[str]: + def get_default_instance_type(cls, + cpus: Optional[str] = None, + memory: Optional[str] = None, + disk_tier: Optional[str] = None) -> str: del disk_tier # Unused. # TODO(romilb): We should check the maximum number of CPUs and memory # that can be requested, and return None if the requested resources @@ -251,7 +252,8 @@ def get_default_instance_type( # autoscaling clusters. # We strip '+' from resource requests since Kubernetes can provision # exactly the requested resources. - instance_cpus = float(cpus.strip('+')) if cpus is not None else cls._DEFAULT_NUM_VCPUS + instance_cpus = float( + cpus.strip('+')) if cpus is not None else cls._DEFAULT_NUM_VCPUS instance_mem = float(memory.strip('+')) if memory is not None else \ instance_cpus * cls._DEFAULT_MEMORY_CPU_RATIO virtual_instance_type = KubernetesInstanceType(instance_cpus, @@ -266,7 +268,7 @@ def get_accelerators_from_instance_type( inst = KubernetesInstanceType.from_instance_type(instance_type) return { inst.accelerator_type: inst.accelerator_count - } if inst.accelerator_count else None + } if (inst.accelerator_count and inst.accelerator_type) else None @classmethod def get_vcpus_mem_from_instance_type( @@ -320,51 +322,37 @@ def make_deploy_resources_variables( # Select image based on whether we are using GPUs or not. image = self.IMAGE_GPU if acc_count > 0 else self.IMAGE_CPU - # ==================== # - # If GPU, set up GPU env - # ==================== # - k8s_acc_label_key = None k8s_acc_label_value = None - if acc_count > 0: - def detect_gpu_label_formatter() -> [Optional[GPULabelFormatter], - Set[str]]: - # Get the set of labels across all nodes - # TODO(romilb): This is not efficient. We should cache the node labels - node_labels = set() - for node in kubernetes.core_api().list_node().items: - node_labels.update(node.metadata.labels.keys()) - - # Check if the node labels contain any of the GPU label prefixes - for label_formatter in LABEL_FORMATTER_REGISTRY: - if label_formatter.get_label_key() in node_labels: - return label_formatter, node_labels - return None, node_labels - - label_formatter, node_labels = detect_gpu_label_formatter() + # If GPUs are requested, set node label to match the GPU type. + if acc_count > 0 and acc_type is not None: + label_formatter, node_labels = \ + kubernetes_utils.detect_gpu_label_formatter() if label_formatter is None: # TODO(romilb): This will fail early for autoscaling clusters. # For AS clusters, we may need a way for users to specify the # GPULabelFormatter to use since the cluster may be scaling up # from zero nodes and may not have any GPU nodes yet. with ux_utils.print_exception_no_traceback(): + supported_formats = ', '.join([ + f.get_label_key() + for f in kubernetes_utils.LABEL_FORMATTER_REGISTRY + ]) suffix = '' if env_options.Options.SHOW_DEBUG_INFO.get(): suffix = ' Found node labels: {}'.format(node_labels) raise exceptions.ResourcesUnavailableError( 'Could not detect GPU labels in Kubernetes cluster. ' 'Please ensure at least one node in the cluster has ' - 'node labels of the format ' - f'{SkyPilotLabelFormatter.get_label_key()}, ' - f'{GKELabelFormatter.get_label_key()} or ' - f'{EKSLabelFormatter.get_label_key()}. Please refer to ' + 'node labels of either of these formats: ' + f'{supported_formats}. Please refer to ' 'the documentation on how to set up node labels.' f'{suffix}') k8s_acc_label_key = label_formatter.get_label_key() k8s_acc_label_value = label_formatter.get_label_value(acc_type) - vars = { + deploy_vars = { 'instance_type': resources.instance_type, 'custom_resources': custom_resources, 'region': region.name, @@ -378,9 +366,7 @@ def detect_gpu_label_formatter() -> [Optional[GPULabelFormatter], # TODO(romilb): Allow user to specify custom images 'image_id': image, } - return vars - - + return deploy_vars def _get_feasible_launchable_resources( self, resources: 'resources_lib.Resources'): @@ -417,11 +403,10 @@ def _make(instance_list): assert len(accelerators) == 1, resources # GPUs requested - build instance type. acc_type, acc_count = list(accelerators.items())[0] - default_inst = KubernetesInstanceType.from_instance_type(default_instance_type) - instance_type = KubernetesInstanceType.from_resources(default_inst.cpus, - default_inst.memory, - acc_count, - acc_type).name + default_inst = KubernetesInstanceType.from_instance_type( + default_instance_type) + instance_type = KubernetesInstanceType.from_resources( + default_inst.cpus, default_inst.memory, acc_count, acc_type).name # No fuzzy lists for Kubernetes return _make([instance_type]), [] @@ -507,8 +492,18 @@ def query_env_vars(cls, name: str) -> Dict[str, str]: stdout=True, tty=False, _request_timeout=kubernetes.API_TIMEOUT) - lines: List[List[str]] = [ - line.split('=', 1) for line in response.split('\n') if '=' in line - ] - return dict( - [line for line in lines if common_utils.is_valid_env_var(line[0])]) + # Split response by newline and filter lines containing '=' + raw_lines = response.split('\n') + filtered_lines = [line for line in raw_lines if '=' in line] + + # Split each line at the first '=' occurrence + lines = [line.split('=', 1) for line in filtered_lines] + + # Construct the dictionary using only valid environment variable names + env_vars = {} + for line in lines: + key = line[0] + if common_utils.is_valid_env_var(key): + env_vars[key] = line[1] + + return env_vars diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 4749c9d19b2..8aa00181c9a 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -49,7 +49,8 @@ def __init__(self, provider_config, cluster_name): self.cluster_name = cluster_name # Kubernetes namespace to user - self.namespace = kubernetes_utils.get_current_kube_config_context_namespace() + self.namespace = kubernetes_utils.get_current_kube_config_context_namespace( + ) # Timeout for resource provisioning. If it takes longer than this # timeout, the resource provisioning will be considered failed. diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index eae214461a9..75924e143b0 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -1,4 +1,5 @@ -from typing import Tuple, Optional +"""Kubernetes utilities for SkyPilot.""" +from typing import Optional, Set, Tuple from sky.utils import common_utils from sky.adaptors import kubernetes @@ -16,8 +17,12 @@ def get_label_value(cls, accelerator: str) -> str: raise NotImplementedError -def get_k8s_accelerator_name(accelerator: str): - # Used by GKE and EKS +def get_gke_eks_accelerator_name(accelerator: str) -> str: + """Returns the accelerator name for GKE and EKS clusters + + Both use the same format - nvidia-tesla-. + A100-80GB and L4 are an exception - they use nvidia-. + """ if accelerator in ('A100-80GB', 'L4'): # A100-80GB and L4 have a different name pattern. return 'nvidia-{}'.format(accelerator.lower()) @@ -33,7 +38,7 @@ def get_label_key(cls) -> str: @classmethod def get_label_value(cls, accelerator: str) -> str: - return get_k8s_accelerator_name(accelerator) + return get_gke_eks_accelerator_name(accelerator) class EKSLabelFormatter(GPULabelFormatter): @@ -43,7 +48,7 @@ def get_label_key(cls) -> str: @classmethod def get_label_value(cls, accelerator: str) -> str: - return get_k8s_accelerator_name(accelerator) + return get_gke_eks_accelerator_name(accelerator) class SkyPilotLabelFormatter(GPULabelFormatter): @@ -53,22 +58,31 @@ def get_label_key(cls) -> str: @classmethod def get_label_value(cls, accelerator: str) -> str: - return get_k8s_accelerator_name(accelerator) + # For SkyPilot formatter, we adopt GKE/EKS accelerator format. + return get_gke_eks_accelerator_name(accelerator) -class NvidiaGFDLabelFormatter(GPULabelFormatter): - @classmethod - def get_label_key(cls) -> str: - return 'nvidia.com/gpu.product' +# LABEL_FORMATTER_REGISTRY stores the label formats SkyPilot will try to +# discover the accelerator type from. The order of the list is important, as +# it will be used to determine the priority of the label formats. +LABEL_FORMATTER_REGISTRY = [SkyPilotLabelFormatter, GKELabelFormatter, + EKSLabelFormatter] - @classmethod - def get_label_value(cls, accelerator: str) -> str: - raise NotImplementedError +def detect_gpu_label_formatter() -> Tuple[Optional[GPULabelFormatter], +Set[str]]: + # Get the set of labels across all nodes + # TODO(romilb): This is not efficient. We should cache the node labels + node_labels: Set[str] = set() + for node in kubernetes.core_api().list_node().items: + node_labels.update(node.metadata.labels.keys()) + + # Check if the node labels contain any of the GPU label prefixes + for label_formatter in LABEL_FORMATTER_REGISTRY: + if label_formatter.get_label_key() in node_labels: + return label_formatter(), node_labels + return None, node_labels -# has to be in order -LABEL_FORMATTER_REGISTRY = [SkyPilotLabelFormatter, GKELabelFormatter, - EKSLabelFormatter, NvidiaGFDLabelFormatter] def get_head_ssh_port(cluster_name: str, namespace: str) -> int: svc_name = f'{cluster_name}-ray-head-ssh' @@ -108,8 +122,8 @@ def check_credentials(timeout: int = kubernetes.API_TIMEOUT) -> \ except ImportError: # TODO(romilb): Update these error strs to also include link to docs # when docs are ready. - return False, f'`kubernetes` package is not installed. ' \ - f'Install it with: pip install kubernetes' + return False, '`kubernetes` package is not installed. ' \ + 'Install it with: pip install kubernetes' except kubernetes.api_exception() as e: # Check if the error is due to invalid credentials if e.status == 401: @@ -125,7 +139,7 @@ def check_credentials(timeout: int = kubernetes.API_TIMEOUT) -> \ 'is stable.' except ValueError as e: return False, common_utils.format_exception(e) - except Exception as e: + except Exception as e: # pylint: disable=broad-except return False, f'An error occurred: {str(e)}' diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 012ec58d2c9..15a85cb7112 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -991,7 +991,6 @@ def test_scp_logs(): @pytest.mark.no_ibm # IBM Cloud does not have K80 gpus. run test_ibm_job_queue instead @pytest.mark.no_scp # SCP does not have K80 gpus. Run test_scp_job_queue instead @pytest.mark.no_oci # OCI does not have K80 gpus -@pytest.mark.no_kubernetes # Kubernetes not have gpus def test_job_queue(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -1093,7 +1092,7 @@ def test_scp_job_queue(): @pytest.mark.no_ibm # IBM Cloud does not have T4 gpus. run test_ibm_job_queue_multinode instead @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet @pytest.mark.no_oci # OCI Cloud does not have T4 gpus. -@pytest.mark.no_kubernetes # Kubernetes not have gpus +@pytest.mark.no_kubernetes # Kubernetes not support num_nodes > 1 yet def test_job_queue_multinode(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -1235,7 +1234,7 @@ def test_ibm_job_queue_multinode(): @pytest.mark.no_ibm # IBM Cloud does not have K80 gpus @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet @pytest.mark.no_oci # OCI Cloud does not have K80 gpus -@pytest.mark.no_kubernetes # Kubernetes not have gpus +@pytest.mark.no_kubernetes # Kubernetes does not support num_nodes > 1 def test_multi_echo(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -1555,7 +1554,7 @@ def test_autostop(generic_cloud: str): # ---------- Testing Autodowning ---------- @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet. Run test_scp_autodown instead. -@pytest.mark.no_kubernetes # Kubernetes does not support num_nodes > 1 yet. Run test_scp_kubernetes instead. +@pytest.mark.no_kubernetes # Kubernetes does not support num_nodes > 1 yet. Run test_kubernetes_autodown instead. def test_autodown(generic_cloud: str): name = _get_cluster_name() test = Test( From 33b29b88a4ed9719916de340508e32ec12cf2ece Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 13 Aug 2023 23:29:43 +0530 Subject: [PATCH 121/183] lint and cleanup --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 446ef965c0a..299ec455efe 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -24,7 +24,7 @@ 'aws', 'gcp', 'azure', 'lambda', 'cloudflare', 'ibm', 'scp', 'oci', 'kubernetes' ] -default_clouds_to_run = ['gcp', 'azure', 'kubernetes'] +default_clouds_to_run = ['gcp', 'azure'] # Translate cloud name to pytest keyword. We need this because # @pytest.mark.lambda is not allowed, so we use @pytest.mark.lambda_cloud From e65d3c1feb4f4d6b2cc5012e9e4d937d07e5b75c Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 13 Aug 2023 23:33:03 +0530 Subject: [PATCH 122/183] lint and cleanup --- sky/backends/cloud_vm_ray_backend.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index a6df5bdaccf..71f874b8d17 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2825,7 +2825,17 @@ def _sync_file_mounts( def _update_envs_for_k8s(self, handle: CloudVmRayResourceHandle, task: task_lib.Task) -> None: - """Update envs with env vars from Kubernetes if cloud is Kubernetes.""" + """Update envs with env vars from Kubernetes if cloud is Kubernetes. + + Kubernetes automatically populates containers with critical environment + variables, such as those for discovering services running in the + cluster and CUDA/nvidia environment variables. We need to update task + environment variables with these env vars. This is needed for GPU + support and service discovery. + + See https://github.com/skypilot-org/skypilot/issues/2287 for + more details. + """ if isinstance(handle.launched_resources.cloud, clouds.Kubernetes): temp_envs = copy.deepcopy(task.envs) cloud_env_vars = handle.launched_resources.cloud.query_env_vars( From 22fc6ad0bf4aba8b724bd4419957fa87f623c622 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 13 Aug 2023 23:43:15 +0530 Subject: [PATCH 123/183] lint and cleanup --- sky/cli.py | 2 +- sky/clouds/kubernetes.py | 1 - sky/skylet/providers/kubernetes/__init__.py | 2 -- sky/task.py | 2 +- 4 files changed, 2 insertions(+), 5 deletions(-) diff --git a/sky/cli.py b/sky/cli.py index 91939a06c94..bfd3a55cca6 100644 --- a/sky/cli.py +++ b/sky/cli.py @@ -62,12 +62,12 @@ from sky.data import storage_utils from sky.skylet import constants from sky.skylet import job_lib -from sky.skylet.providers.kubernetes import utils as kubernetes_utils from sky.usage import usage_lib from sky.utils import command_runner from sky.utils import common_utils from sky.utils import dag_utils from sky.utils import env_options +from sky.utils import kubernetes_utils from sky.utils import log_utils from sky.utils import schemas from sky.utils import subprocess_utils diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 935c1a46f59..b457ad2e301 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -10,7 +10,6 @@ from sky import exceptions from sky import status_lib from sky.adaptors import kubernetes -from sky.skylet.providers.kubernetes import utils as kubernetes_utils from sky.utils import common_utils from sky.utils import env_options from sky.utils import kubernetes_utils diff --git a/sky/skylet/providers/kubernetes/__init__.py b/sky/skylet/providers/kubernetes/__init__.py index 278c1f11123..0bb7afaea81 100644 --- a/sky/skylet/providers/kubernetes/__init__.py +++ b/sky/skylet/providers/kubernetes/__init__.py @@ -1,3 +1 @@ from sky.skylet.providers.kubernetes.node_provider import KubernetesNodeProvider -from sky.skylet.providers.kubernetes.utils import get_head_ssh_port -from sky.skylet.providers.kubernetes.utils import get_port diff --git a/sky/task.py b/sky/task.py index 1467e25852c..e9a19558f9d 100644 --- a/sky/task.py +++ b/sky/task.py @@ -17,9 +17,9 @@ from sky.data import data_utils from sky.data import storage as storage_lib from sky.skylet import constants +from sky.utils import common_utils from sky.utils import schemas from sky.utils import ux_utils -from sky.utils import common_utils if typing.TYPE_CHECKING: from sky import resources as resources_lib From 3e9656acc8b05207aaff3703164b6a16e370dce5 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 13 Aug 2023 23:47:15 +0530 Subject: [PATCH 124/183] lint and cleanup --- examples/spot/resnet_ddp/resnet_ddp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/spot/resnet_ddp/resnet_ddp.py b/examples/spot/resnet_ddp/resnet_ddp.py index 89d6d37fc83..710ee47d0a1 100644 --- a/examples/spot/resnet_ddp/resnet_ddp.py +++ b/examples/spot/resnet_ddp/resnet_ddp.py @@ -10,6 +10,7 @@ from torch.utils.data.distributed import DistributedSampler import torchvision import torchvision.transforms as transforms + import wandb From be3d905136050e9c4736740a0e75b9345fb6d210 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 13 Aug 2023 23:52:19 +0530 Subject: [PATCH 125/183] lint and cleanup --- sky/clouds/kubernetes.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index b457ad2e301..0dc68dda911 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -332,6 +332,8 @@ def make_deploy_resources_variables( label_formatter, node_labels = \ kubernetes_utils.detect_gpu_label_formatter() if label_formatter is None: + # If GPU labels are not detected, trigger failover by + # raising ResourcesUnavailableError. # TODO(romilb): This will fail early for autoscaling clusters. # For AS clusters, we may need a way for users to specify the # GPULabelFormatter to use since the cluster may be scaling up From 277295ae842ca4c1444f5b5bede3253700a05888 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 14 Aug 2023 07:52:13 +0530 Subject: [PATCH 126/183] lint --- examples/spot/resnet_ddp/resnet_ddp.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/spot/resnet_ddp/resnet_ddp.py b/examples/spot/resnet_ddp/resnet_ddp.py index 710ee47d0a1..89d6d37fc83 100644 --- a/examples/spot/resnet_ddp/resnet_ddp.py +++ b/examples/spot/resnet_ddp/resnet_ddp.py @@ -10,7 +10,6 @@ from torch.utils.data.distributed import DistributedSampler import torchvision import torchvision.transforms as transforms - import wandb From 69168dd3f2d406b4818ad52bcccdeb89485c87eb Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 14 Aug 2023 07:55:52 +0530 Subject: [PATCH 127/183] lint --- examples/spot/resnet_ddp/resnet_ddp.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/spot/resnet_ddp/resnet_ddp.py b/examples/spot/resnet_ddp/resnet_ddp.py index 89d6d37fc83..710ee47d0a1 100644 --- a/examples/spot/resnet_ddp/resnet_ddp.py +++ b/examples/spot/resnet_ddp/resnet_ddp.py @@ -10,6 +10,7 @@ from torch.utils.data.distributed import DistributedSampler import torchvision import torchvision.transforms as transforms + import wandb From 30049516d83c0d56dc417f631bb8bbc44441aa15 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 14 Aug 2023 08:30:39 +0530 Subject: [PATCH 128/183] manual lint --- sky/utils/kubernetes_utils.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 8bdca72b4d9..1076248b921 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -8,6 +8,7 @@ class GPULabelFormatter: + @classmethod def get_label_key(cls) -> str: raise NotImplementedError @@ -27,11 +28,11 @@ def get_gke_eks_accelerator_name(accelerator: str) -> str: # A100-80GB and L4 have a different name pattern. return 'nvidia-{}'.format(accelerator.lower()) else: - return 'nvidia-tesla-{}'.format( - accelerator.lower()) + return 'nvidia-tesla-{}'.format(accelerator.lower()) class GKELabelFormatter(GPULabelFormatter): + @classmethod def get_label_key(cls) -> str: return 'cloud.google.com/gke-accelerator' @@ -42,6 +43,7 @@ def get_label_value(cls, accelerator: str) -> str: class EKSLabelFormatter(GPULabelFormatter): + @classmethod def get_label_key(cls) -> str: return 'k8s.amazonaws.com/accelerator' @@ -52,6 +54,7 @@ def get_label_value(cls, accelerator: str) -> str: class SkyPilotLabelFormatter(GPULabelFormatter): + @classmethod def get_label_key(cls) -> str: return 'skypilot.co/accelerator' @@ -65,12 +68,13 @@ def get_label_value(cls, accelerator: str) -> str: # LABEL_FORMATTER_REGISTRY stores the label formats SkyPilot will try to # discover the accelerator type from. The order of the list is important, as # it will be used to determine the priority of the label formats. -LABEL_FORMATTER_REGISTRY = [SkyPilotLabelFormatter, GKELabelFormatter, - EKSLabelFormatter] +LABEL_FORMATTER_REGISTRY = [ + SkyPilotLabelFormatter, GKELabelFormatter, EKSLabelFormatter +] -def detect_gpu_label_formatter() -> Tuple[Optional[GPULabelFormatter], -Set[str]]: +def detect_gpu_label_formatter( +) -> Tuple[Optional[GPULabelFormatter], Set[str]]: # Get the set of labels across all nodes # TODO(romilb): This is not efficient. We should cache the node labels node_labels: Set[str] = set() @@ -139,7 +143,7 @@ def check_credentials(timeout: int = kubernetes.API_TIMEOUT) -> \ 'is stable.' except ValueError as e: return False, common_utils.format_exception(e) - except Exception as e: # pylint: disable=broad-except + except Exception as e: # pylint: disable=broad-except return False, f'An error occurred: {str(e)}' From b76b3a6d454608cb7300e9f2254218f29bf13516 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 14 Aug 2023 08:39:15 +0530 Subject: [PATCH 129/183] manual isort --- examples/spot/resnet_ddp/resnet_ddp.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/spot/resnet_ddp/resnet_ddp.py b/examples/spot/resnet_ddp/resnet_ddp.py index 710ee47d0a1..89d6d37fc83 100644 --- a/examples/spot/resnet_ddp/resnet_ddp.py +++ b/examples/spot/resnet_ddp/resnet_ddp.py @@ -10,7 +10,6 @@ from torch.utils.data.distributed import DistributedSampler import torchvision import torchvision.transforms as transforms - import wandb From 7207c34f555341fc18bd8038cbadb7fa395d5649 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 14 Aug 2023 08:57:59 +0530 Subject: [PATCH 130/183] test readme update --- tests/kubernetes/README.md | 73 ++++++++++++++------------------------ 1 file changed, 26 insertions(+), 47 deletions(-) diff --git a/tests/kubernetes/README.md b/tests/kubernetes/README.md index 8863988223a..9e45a1a7911 100644 --- a/tests/kubernetes/README.md +++ b/tests/kubernetes/README.md @@ -13,69 +13,48 @@ To build this image locally and optionally push to the SkyPilot registry, run: ./build_image.sh # Build and push image (CAREFUL - this will push to the SkyPilot registry!) ./build_image.sh -p +# Build and push GPU image (CAREFUL - this will push to the SkyPilot registry!) +./build_image.sh -p -g ``` ## Running a local development cluster We use (kind)[https://kind.sigs.k8s.io/] to run a local Kubernetes cluster -for development. +for development. To create a local development cluster, run: ```bash sky local up ``` ## Running a GKE cluster -1. Make sure ports 30000-32767 are open in your node pool VPC's firewall. -2. Create a GKE cluster with at least 1 node. We recommend creating nodes with at least 4 vCPUs. +1. Create a GKE cluster with at least 1 node. We recommend creating nodes with at least 4 vCPUs. * Note - only GKE standard clusters are supported. GKE autopilot clusters are not supported. + * Tip - to create an example GPU cluster for testing, this command will create a 2 node cluster with 1x T4 and another with 1x V100: + ```bash + gcloud beta container --project "skypilot-375900" clusters create "testcluster" --zone "us-central1-c" --no-enable-basic-auth --cluster-version "1.27.2-gke.1200" --release-channel "regular" --machine-type "n1-standard-8" --accelerator "type=nvidia-tesla-t4,count=1" --image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "100" --metadata disable-legacy-endpoints=true --scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" --num-nodes "1" --logging=SYSTEM,WORKLOAD --monitoring=SYSTEM --enable-ip-alias --network "projects/skypilot-375900/global/networks/default" --subnetwork "projects/skypilot-375900/regions/us-central1/subnetworks/default" --no-enable-intra-node-visibility --default-max-pods-per-node "110" --security-posture=standard --workload-vulnerability-scanning=disabled --no-enable-master-authorized-networks --addons HorizontalPodAutoscaling,HttpLoadBalancing,GcePersistentDiskCsiDriver --enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 --enable-managed-prometheus --enable-shielded-nodes --node-locations "us-central1-c" && gcloud beta container --project "skypilot-375900" node-pools create "v100" --cluster "testcluster" --zone "us-central1-c" --machine-type "n1-standard-8" --accelerator "type=nvidia-tesla-v100,count=1" --image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "100" --metadata disable-legacy-endpoints=true --scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" --num-nodes "1" --enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 --node-locations "us-central1-c" + ``` +2. Make sure ports 30000-32767 are open in your node pool VPC's firewall. 3. Get the kubeconfig for your cluster and place it in `~/.kube/config`: -```bash -gcloud container clusters get-credentials --region -# Example: -# gcloud container clusters get-credentials testcluster --region us-central1-c -``` + ```bash + gcloud container clusters get-credentials --region + # Example: + # gcloud container clusters get-credentials testcluster --region us-central1-c + ``` 4. Verify by running `kubectl get nodes`. You should see your nodes. +5. **If you want GPU support**, make sure you install GPU drivers by running: + ```bash + # If using COS based nodes (e.g., in the example above): + kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/cos/daemonset-preloaded.yaml + + # If using Ubuntu based nodes: + kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/ubuntu/daemonset-preloaded.yaml + ``` + This will create a resource like `nvidia.com/gpu: 1`. You can verify this resource is available by running: + ```bash + kubectl describe nodes + ``` 5. You can run SkyPilot tasks now. ## Other useful scripts `scripts` directory contains other useful scripts for development, including Kubernetes dashboard, ray yaml for testing the SkyPilot Kubernetes node provider and more. - -# GKE GPU support guide - -Create a GKE cluster using the cloud console. Use standard cluster, not autopilot. - - -## Creating a GPU GKE Cluster from the command line - -This command will create a GKE cluster with 2 nodes - one with 1x T4 and another with 1x V100. - -``` -gcloud beta container --project "skypilot-375900" clusters create "testcluster" --zone "us-central1-c" --no-enable-basic-auth --cluster-version "1.27.2-gke.1200" --release-channel "regular" --machine-type "n1-standard-8" --accelerator "type=nvidia-tesla-t4,count=1" --image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "100" --metadata disable-legacy-endpoints=true --scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" --num-nodes "1" --logging=SYSTEM,WORKLOAD --monitoring=SYSTEM --enable-ip-alias --network "projects/skypilot-375900/global/networks/default" --subnetwork "projects/skypilot-375900/regions/us-central1/subnetworks/default" --no-enable-intra-node-visibility --default-max-pods-per-node "110" --security-posture=standard --workload-vulnerability-scanning=disabled --no-enable-master-authorized-networks --addons HorizontalPodAutoscaling,HttpLoadBalancing,GcePersistentDiskCsiDriver --enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 --enable-managed-prometheus --enable-shielded-nodes --node-locations "us-central1-c" && gcloud beta container --project "skypilot-375900" node-pools create "v100" --cluster "testcluster" --zone "us-central1-c" --machine-type "n1-standard-8" --accelerator "type=nvidia-tesla-v100,count=1" --image-type "COS_CONTAINERD" --disk-type "pd-balanced" --disk-size "100" --metadata disable-legacy-endpoints=true --scopes "https://www.googleapis.com/auth/devstorage.read_only","https://www.googleapis.com/auth/logging.write","https://www.googleapis.com/auth/monitoring","https://www.googleapis.com/auth/servicecontrol","https://www.googleapis.com/auth/service.management.readonly","https://www.googleapis.com/auth/trace.append" --num-nodes "1" --enable-autoupgrade --enable-autorepair --max-surge-upgrade 1 --max-unavailable-upgrade 0 --node-locations "us-central1-c" -``` - -## Setup local kubectl -```bash -gcloud container clusters get-credentials testcluster --region us-central1-c -``` - -## Install nvidia drivers (if needed) -If you're using GKE and running GKE < 1.27.2-gke.1200, you'll need to manually install nvidia drivers. -```bash -# For COS image: -kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/cos/daemonset-preloaded.yaml - -# For ubuntu image: -kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/master/nvidia-driver-installer/ubuntu/daemonset-preloaded.yaml -``` - -[Not sure] This will create a resource like `nvidia.com/gpu: 1`. However, we still need labels for GPU type (e.g., A100). - -## Install GPU feature discovery -NOTE - GFD does not work on GKE! https://github.com/NVIDIA/gpu-feature-discovery/issues/44 -We can use Nvidia [gpu-feature-discovery](https://github.com/NVIDIA/gpu-feature-discovery/blob/main/README.md) to detect GPUs on the nodes and automatically label the nodes. - -```bash -kubectl apply -f https://raw.githubusercontent.com/NVIDIA/gpu-feature-discovery/v0.8.1/deployments/static/nfd.yaml -kubectl apply -f https://raw.githubusercontent.com/NVIDIA/gpu-feature-discovery/v0.8.1/deployments/static/gpu-feature-discovery-daemonset.yaml -``` \ No newline at end of file From 56ac60f89ca892b19a31f9dfd00b358d86013bd4 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 14 Aug 2023 10:12:18 +0530 Subject: [PATCH 131/183] Remove EKS --- sky/utils/kubernetes_utils.py | 25 +++++++------------------ 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 1076248b921..3b4ec40456d 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -18,10 +18,10 @@ def get_label_value(cls, accelerator: str) -> str: raise NotImplementedError -def get_gke_eks_accelerator_name(accelerator: str) -> str: - """Returns the accelerator name for GKE and EKS clusters +def get_gke_accelerator_name(accelerator: str) -> str: + """Returns the accelerator name for GKE clusters - Both use the same format - nvidia-tesla-. + Uses the format - nvidia-tesla-. A100-80GB and L4 are an exception - they use nvidia-. """ if accelerator in ('A100-80GB', 'L4'): @@ -39,18 +39,7 @@ def get_label_key(cls) -> str: @classmethod def get_label_value(cls, accelerator: str) -> str: - return get_gke_eks_accelerator_name(accelerator) - - -class EKSLabelFormatter(GPULabelFormatter): - - @classmethod - def get_label_key(cls) -> str: - return 'k8s.amazonaws.com/accelerator' - - @classmethod - def get_label_value(cls, accelerator: str) -> str: - return get_gke_eks_accelerator_name(accelerator) + return get_gke_accelerator_name(accelerator) class SkyPilotLabelFormatter(GPULabelFormatter): @@ -61,15 +50,15 @@ def get_label_key(cls) -> str: @classmethod def get_label_value(cls, accelerator: str) -> str: - # For SkyPilot formatter, we adopt GKE/EKS accelerator format. - return get_gke_eks_accelerator_name(accelerator) + # For SkyPilot formatter, we adopt GKE accelerator format. + return get_gke_accelerator_name(accelerator) # LABEL_FORMATTER_REGISTRY stores the label formats SkyPilot will try to # discover the accelerator type from. The order of the list is important, as # it will be used to determine the priority of the label formats. LABEL_FORMATTER_REGISTRY = [ - SkyPilotLabelFormatter, GKELabelFormatter, EKSLabelFormatter + SkyPilotLabelFormatter, GKELabelFormatter ] From d9883075ca554efb0a2a1304fbf261bd57a16e0c Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 14 Aug 2023 10:13:12 +0530 Subject: [PATCH 132/183] lint --- sky/utils/kubernetes_utils.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 3b4ec40456d..8f83ad7643e 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -57,9 +57,7 @@ def get_label_value(cls, accelerator: str) -> str: # LABEL_FORMATTER_REGISTRY stores the label formats SkyPilot will try to # discover the accelerator type from. The order of the list is important, as # it will be used to determine the priority of the label formats. -LABEL_FORMATTER_REGISTRY = [ - SkyPilotLabelFormatter, GKELabelFormatter -] +LABEL_FORMATTER_REGISTRY = [SkyPilotLabelFormatter, GKELabelFormatter] def detect_gpu_label_formatter( From a208d911d86216108c3ca7739d81178a39656c06 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 15 Aug 2023 17:56:32 +0530 Subject: [PATCH 133/183] add gpu labeler --- sky/utils/kubernetes/gpu_labeler.py | 128 ++++++++++++++++++ sky/utils/kubernetes/k8s_gpu_labeler_job.yaml | 41 ++++++ .../kubernetes/k8s_gpu_labeler_setup.yaml | 105 ++++++++++++++ tests/kubernetes/eks_test_cluster.yaml | 18 +++ 4 files changed, 292 insertions(+) create mode 100644 sky/utils/kubernetes/gpu_labeler.py create mode 100644 sky/utils/kubernetes/k8s_gpu_labeler_job.yaml create mode 100644 sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml create mode 100644 tests/kubernetes/eks_test_cluster.yaml diff --git a/sky/utils/kubernetes/gpu_labeler.py b/sky/utils/kubernetes/gpu_labeler.py new file mode 100644 index 00000000000..1ff0b644cc5 --- /dev/null +++ b/sky/utils/kubernetes/gpu_labeler.py @@ -0,0 +1,128 @@ +"""Script to label GPU nodes in a Kubernetes cluster for use with SkyPilot""" +import argparse +import os +import subprocess +from typing import Tuple + +import click +from kubernetes import client +from kubernetes import config +import yaml + +import sky +from sky.utils import log_utils + + +def prerequisite_check() -> Tuple[bool, str]: + """Checks if kubectl is installed and kubeconfig is set up""" + reason = '' + prereq_ok = False + try: + subprocess.check_output(['kubectl', 'get', 'pods']) + prereq_ok = True + except FileNotFoundError: + reason = 'kubectl not found. Please install kubectl and try again.' + except subprocess.CalledProcessError as e: + output = e.output.decode('utf-8') + reason = 'Error running kubectl: ' + output + return prereq_ok, reason + + +def cleanup() -> Tuple[bool, str]: + """Deletes all Kubernetes resources created by this script + + Used to provide idempotency when the script is run multiple times. Also + invoked if --cleanup is passed to the script. + """ + # Delete any existing GPU labeler Kubernetes resources: + del_command = ('kubectl delete pods,services,deployments,jobs,daemonsets,' + 'replicasets,configmaps,secrets,pv,pvc,clusterrole,' + 'serviceaccount,clusterrolebinding -n kube-system ' + '-l job=sky-gpu-labeler') + + success = False + reason = '' + try: + subprocess.check_output(del_command.split()) + success = True + except subprocess.CalledProcessError as e: + output = e.output.decode('utf-8') + reason = 'Error deleting existing GPU labeler resources: ' + output + return success, reason + + +def label(): + # Check if kubectl is installed and kubeconfig is set up + prereq_ok, reason = prerequisite_check() + if not prereq_ok: + click.echo(reason) + return + + deletion_success, reason = cleanup() + if not deletion_success: + click.echo(reason) + return + + sky_dir = os.path.dirname(sky.__file__) + manifest_dir = os.path.join(sky_dir, 'utils/kubernetes') + + # Apply the RBAC manifest using kubectl since it contains multiple resources + with log_utils.safe_rich_status('Setting up GPU labeling'): + rbac_manifest_path = os.path.join(manifest_dir, + 'k8s_gpu_labeler_setup.yaml') + try: + subprocess.check_output( + ['kubectl', 'apply', '-f', rbac_manifest_path]) + except subprocess.CalledProcessError as e: + output = e.output.decode('utf-8') + click.echo('Error setting up GPU labeling: ' + output) + return + + with log_utils.safe_rich_status('Creating GPU labeler jobs'): + config.load_kube_config() + + v1 = client.CoreV1Api() + batch_v1 = client.BatchV1Api() + # Load the job manifest + job_manifest_path = os.path.join(manifest_dir, + 'k8s_gpu_labeler_job.yaml') + + with open(job_manifest_path, 'r') as file: + job_manifest = yaml.safe_load(file) + + # Iterate over nodes + nodes = v1.list_node().items + for node in nodes: + node_name = node.metadata.name + + # Modify the job manifest for the current node + job_manifest['metadata']['name'] = f'sky-gpu-labeler-{node_name}' + job_manifest['spec']['template']['spec']['nodeSelector'] = { + 'kubernetes.io/hostname': node_name + } + namespace = job_manifest['metadata']['namespace'] + + # Create the job for this node` + batch_v1.create_namespaced_job(namespace, job_manifest) + print(f'Created GPU labeler job for node {node_name}') + + +if __name__ == '__main__': + parser = argparse.ArgumentParser( + prog='sky-gpu-labeler', + description='Labels GPU nodes in a Kubernetes cluster for use with ' + 'SkyPilot. Operates by running a job on each node that ' + 'parses nvidia-smi and patches the node with new labels. ' + 'Labels created are of the format ' + 'skypilot.co/accelerators: . Automatically ' + 'creates a service account and cluster role binding with ' + 'permissions to list nodes and create labels.') + parser.add_argument('--cleanup', + action='store_true', + help='Delete all GPU labeler resources in the ' + 'Kubernetes cluster.') + args = parser.parse_args() + if args.cleanup: + cleanup() + else: + label() diff --git a/sky/utils/kubernetes/k8s_gpu_labeler_job.yaml b/sky/utils/kubernetes/k8s_gpu_labeler_job.yaml new file mode 100644 index 00000000000..db506579eb1 --- /dev/null +++ b/sky/utils/kubernetes/k8s_gpu_labeler_job.yaml @@ -0,0 +1,41 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: sky-gpu-labeler # We append the node name in the script + namespace: kube-system +spec: + template: + metadata: + labels: + job: sky-gpu-labeler + spec: + serviceAccountName: gpu-labeler-sa + containers: + - name: gpu-labeler + image: us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot-gpu:latest # Using this image also serves as a way to "pre-pull" the image onto nodes + command: + - "python" + - "/label_gpus.py" + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - name: label-script + mountPath: /label_gpus.py + subPath: label_gpus.py + resources: + requests: + cpu: "0.1" + memory: "100Mi" + nvidia.com/gpu: "1" + limits: + nvidia.com/gpu: "1" + volumes: + - name: label-script + configMap: + name: gpu-labeler-script + defaultMode: 0744 + restartPolicy: Never + nodeSelector: {} # We set the node name in the script \ No newline at end of file diff --git a/sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml b/sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml new file mode 100644 index 00000000000..97764284d4b --- /dev/null +++ b/sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml @@ -0,0 +1,105 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + name: gpu-labeler-sa + namespace: kube-system + labels: + job: sky-gpu-labeler + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: node-patcher-role + namespace: kube-system + labels: + job: sky-gpu-labeler +rules: +- apiGroups: [""] + resources: ["nodes"] + verbs: ["patch"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: node-patcher-rolebinding + labels: + job: sky-gpu-labeler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: node-patcher-role +subjects: +- kind: ServiceAccount + name: gpu-labeler-sa + namespace: kube-system + +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: gpu-labeler-script + namespace: kube-system + labels: + job: sky-gpu-labeler +data: + label_gpus.py: | + #!/usr/bin/env python3 + import os + import subprocess + from typing import Optional + + from kubernetes import client + from kubernetes import config + + allowed_gpu_names = [ + 'A100', 'A10G', 'K80', 'M60', 'T4', 'T4g', 'V100', 'A10', 'A100-80GB', + 'P100', 'P40', 'P4' + ] + + + def get_gpu_name() -> Optional[str]: + try: + result = subprocess.run( + ['nvidia-smi', '--query-gpu=name', '--format=csv,noheader,nounits'], + stdout=subprocess.PIPE) + gpu_name = result.stdout.decode('utf-8').strip() + return gpu_name.lower() + except Exception as e: + print(f'Error getting GPU name: {e}') + return None + + + def label_node(gpu_name: str) -> None: + try: + config.load_incluster_config() # Load in-cluster configuration + v1 = client.CoreV1Api() + + # Fetch the current node's name from the environment variable + node_name = os.environ.get('MY_NODE_NAME') + if not node_name: + raise ValueError('Failed to get node name from environment') + + # Label the node with the GPU name + body = {'metadata': {'labels': {'skypilot.co/accelerator': gpu_name}}} + v1.patch_node(node_name, body) + + print(f'Labeled node {node_name} with GPU {gpu_name}') + + except Exception as e: + print(f'Error labeling node: {e}') + + + def main(): + gpu_name = get_gpu_name() + if gpu_name: + for allowed_name in allowed_gpu_names: + if allowed_name.lower() in gpu_name.lower(): + label_node(allowed_name) + else: + print('No supported GPU detected.') + + + if __name__ == '__main__': + main() diff --git a/tests/kubernetes/eks_test_cluster.yaml b/tests/kubernetes/eks_test_cluster.yaml new file mode 100644 index 00000000000..d8d8a95ad52 --- /dev/null +++ b/tests/kubernetes/eks_test_cluster.yaml @@ -0,0 +1,18 @@ +# Usage: +# eksctl create cluster -f eks_test_cluster.yaml +# eksctl delete cluster -f eks_test_cluster.yaml +apiVersion: eksctl.io/v1alpha5 +kind: ClusterConfig + +metadata: + name: my-cluster + region: us-west-2 + +managedNodeGroups: + - name: v100-nodes + instanceType: p3.2xlarge # This instance type provides 1 NVIDIA V100 GPU. + desiredCapacity: 1 + + - name: k80-nodes + instanceType: p2.xlarge # This instance type provides 1 NVIDIA K80 GPU. + desiredCapacity: 1 \ No newline at end of file From c857a9d4f73a3948d0445e7fb56ce1b7b703c578 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 15 Aug 2023 18:06:28 +0530 Subject: [PATCH 134/183] updates --- sky/utils/kubernetes/gpu_labeler.py | 61 ++++++++++++++++------------- 1 file changed, 34 insertions(+), 27 deletions(-) diff --git a/sky/utils/kubernetes/gpu_labeler.py b/sky/utils/kubernetes/gpu_labeler.py index 1ff0b644cc5..df5d84e7b3e 100644 --- a/sky/utils/kubernetes/gpu_labeler.py +++ b/sky/utils/kubernetes/gpu_labeler.py @@ -4,7 +4,6 @@ import subprocess from typing import Tuple -import click from kubernetes import client from kubernetes import config import yaml @@ -18,7 +17,7 @@ def prerequisite_check() -> Tuple[bool, str]: reason = '' prereq_ok = False try: - subprocess.check_output(['kubectl', 'get', 'pods']) + subprocess.run(['kubectl', 'get', 'pods'], capture_output=True) prereq_ok = True except FileNotFoundError: reason = 'kubectl not found. Please install kubectl and try again.' @@ -42,25 +41,23 @@ def cleanup() -> Tuple[bool, str]: success = False reason = '' - try: - subprocess.check_output(del_command.split()) - success = True - except subprocess.CalledProcessError as e: - output = e.output.decode('utf-8') - reason = 'Error deleting existing GPU labeler resources: ' + output - return success, reason + with log_utils.safe_rich_status('Cleaning up existing GPU labeling ' + 'resources'): + try: + subprocess.run(del_command.split(), + capture_output=True, + text=True) + success = True + except subprocess.CalledProcessError as e: + output = e.output.decode('utf-8') + reason = 'Error deleting existing GPU labeler resources: ' + output + return success, reason def label(): - # Check if kubectl is installed and kubeconfig is set up - prereq_ok, reason = prerequisite_check() - if not prereq_ok: - click.echo(reason) - return - deletion_success, reason = cleanup() if not deletion_success: - click.echo(reason) + print(reason) return sky_dir = os.path.dirname(sky.__file__) @@ -75,7 +72,7 @@ def label(): ['kubectl', 'apply', '-f', rbac_manifest_path]) except subprocess.CalledProcessError as e: output = e.output.decode('utf-8') - click.echo('Error setting up GPU labeling: ' + output) + print('Error setting up GPU labeling: ' + output) return with log_utils.safe_rich_status('Creating GPU labeler jobs'): @@ -106,23 +103,33 @@ def label(): batch_v1.create_namespaced_job(namespace, job_manifest) print(f'Created GPU labeler job for node {node_name}') - -if __name__ == '__main__': +def main(): parser = argparse.ArgumentParser( prog='sky-gpu-labeler', description='Labels GPU nodes in a Kubernetes cluster for use with ' - 'SkyPilot. Operates by running a job on each node that ' - 'parses nvidia-smi and patches the node with new labels. ' - 'Labels created are of the format ' - 'skypilot.co/accelerators: . Automatically ' - 'creates a service account and cluster role binding with ' - 'permissions to list nodes and create labels.') + 'SkyPilot. Operates by running a job on each node that ' + 'parses nvidia-smi and patches the node with new labels. ' + 'Labels created are of the format ' + 'skypilot.co/accelerators: . Automatically ' + 'creates a service account and cluster role binding with ' + 'permissions to list nodes and create labels.') parser.add_argument('--cleanup', action='store_true', - help='Delete all GPU labeler resources in the ' - 'Kubernetes cluster.') + help='delete all GPU labeler resources in the ' + 'Kubernetes cluster.') args = parser.parse_args() + + # Check if kubectl is installed and kubeconfig is set up + prereq_ok, reason = prerequisite_check() + if not prereq_ok: + print(reason) + return + if args.cleanup: cleanup() else: label() + + +if __name__ == '__main__': + main() From ee89f65bc2dcb6fd5b1aabc89bc2c814f699debe Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 15 Aug 2023 18:07:37 +0530 Subject: [PATCH 135/183] lint --- sky/utils/kubernetes/gpu_labeler.py | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/sky/utils/kubernetes/gpu_labeler.py b/sky/utils/kubernetes/gpu_labeler.py index df5d84e7b3e..bcc161b3296 100644 --- a/sky/utils/kubernetes/gpu_labeler.py +++ b/sky/utils/kubernetes/gpu_labeler.py @@ -17,7 +17,9 @@ def prerequisite_check() -> Tuple[bool, str]: reason = '' prereq_ok = False try: - subprocess.run(['kubectl', 'get', 'pods'], capture_output=True) + subprocess.run(['kubectl', 'get', 'pods'], + check=True, + capture_output=True) prereq_ok = True except FileNotFoundError: reason = 'kubectl not found. Please install kubectl and try again.' @@ -44,9 +46,7 @@ def cleanup() -> Tuple[bool, str]: with log_utils.safe_rich_status('Cleaning up existing GPU labeling ' 'resources'): try: - subprocess.run(del_command.split(), - capture_output=True, - text=True) + subprocess.run(del_command.split(), check=True, capture_output=True) success = True except subprocess.CalledProcessError as e: output = e.output.decode('utf-8') @@ -103,20 +103,21 @@ def label(): batch_v1.create_namespaced_job(namespace, job_manifest) print(f'Created GPU labeler job for node {node_name}') + def main(): parser = argparse.ArgumentParser( prog='sky-gpu-labeler', description='Labels GPU nodes in a Kubernetes cluster for use with ' - 'SkyPilot. Operates by running a job on each node that ' - 'parses nvidia-smi and patches the node with new labels. ' - 'Labels created are of the format ' - 'skypilot.co/accelerators: . Automatically ' - 'creates a service account and cluster role binding with ' - 'permissions to list nodes and create labels.') + 'SkyPilot. Operates by running a job on each node that ' + 'parses nvidia-smi and patches the node with new labels. ' + 'Labels created are of the format ' + 'skypilot.co/accelerators: . Automatically ' + 'creates a service account and cluster role binding with ' + 'permissions to list nodes and create labels.') parser.add_argument('--cleanup', action='store_true', help='delete all GPU labeler resources in the ' - 'Kubernetes cluster.') + 'Kubernetes cluster.') args = parser.parse_args() # Check if kubectl is installed and kubeconfig is set up From 8934b22e0a20ead07b701cfe844da3ea26d6bee6 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 15 Aug 2023 19:04:22 +0530 Subject: [PATCH 136/183] update script --- sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml b/sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml index 97764284d4b..e9abed088ce 100644 --- a/sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml +++ b/sky/utils/kubernetes/k8s_gpu_labeler_setup.yaml @@ -96,7 +96,7 @@ data: if gpu_name: for allowed_name in allowed_gpu_names: if allowed_name.lower() in gpu_name.lower(): - label_node(allowed_name) + label_node(allowed_name.lower()) else: print('No supported GPU detected.') From 9b5019b9c4e7da09fbabcd69fe093b1b5bd1182f Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 15 Aug 2023 19:10:10 +0530 Subject: [PATCH 137/183] ux --- sky/utils/kubernetes/gpu_labeler.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/sky/utils/kubernetes/gpu_labeler.py b/sky/utils/kubernetes/gpu_labeler.py index bcc161b3296..e8d75413322 100644 --- a/sky/utils/kubernetes/gpu_labeler.py +++ b/sky/utils/kubernetes/gpu_labeler.py @@ -102,6 +102,12 @@ def label(): # Create the job for this node` batch_v1.create_namespaced_job(namespace, job_manifest) print(f'Created GPU labeler job for node {node_name}') + print('GPU labeling started - this may take a few minutes to complete.' + '\nTo check the status of GPU labeling jobs, run ' + '`kubectl get jobs --namespace=kube-system -l job=sky-gpu-labeler`' + '\nYou can check if nodes have been labeled by running ' + '`kubectl describe nodes` and looking for labels of the format ' + '`skypilot.co/accelerators: `. ') def main(): From 53e5d80a13c4b833dc672bf00c3a4939fbf9012a Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 15 Aug 2023 19:27:02 +0530 Subject: [PATCH 138/183] fix formatter --- sky/utils/kubernetes_utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 8f83ad7643e..6f28282bdd2 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -50,8 +50,9 @@ def get_label_key(cls) -> str: @classmethod def get_label_value(cls, accelerator: str) -> str: - # For SkyPilot formatter, we adopt GKE accelerator format. - return get_gke_accelerator_name(accelerator) + # For SkyPilot formatter, we use the accelerator str directly. + # See sky.utils.kubernetes.gpu_labeler. + return accelerator.lower() # LABEL_FORMATTER_REGISTRY stores the label formats SkyPilot will try to From f806aed3c4ff1c92b3fea0e16efdfac05437c65f Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 15 Aug 2023 19:41:36 +0530 Subject: [PATCH 139/183] test update --- tests/test_smoke.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index 57d44db6dfd..eb88e751c79 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1780,7 +1780,7 @@ def test_cancel_azure(): @pytest.mark.no_lambda_cloud # Lambda Cloud does not have V100 gpus @pytest.mark.no_ibm # IBM cloud currently doesn't provide public image with CUDA @pytest.mark.no_scp # SCP does not support num_nodes > 1 yet -@pytest.mark.no_kubernetes # Kubernetes does not support GPU yet +@pytest.mark.no_kubernetes # Kubernetes does not support num_nodes > 1 yet def test_cancel_pytorch(generic_cloud: str): name = _get_cluster_name() test = Test( From 4bf43eeebd44a5dae1f9a2e6516f976c4d8d5567 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 15 Aug 2023 19:54:11 +0530 Subject: [PATCH 140/183] test update --- tests/test_smoke.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_smoke.py b/tests/test_smoke.py index eb88e751c79..1807d5fbfe6 100644 --- a/tests/test_smoke.py +++ b/tests/test_smoke.py @@ -1024,6 +1024,7 @@ def test_job_queue(generic_cloud: str): @pytest.mark.no_ibm # Doesn't support IBM Cloud for now @pytest.mark.no_scp # Doesn't support SCP for now @pytest.mark.no_oci # Doesn't support OCI for now +@pytest.mark.no_kubernetes # Doesn't support Kubernetes for now def test_job_queue_with_docker(generic_cloud: str): name = _get_cluster_name() test = Test( @@ -1267,6 +1268,7 @@ def test_ibm_job_queue_multinode(): @pytest.mark.no_ibm # Doesn't support IBM Cloud for now @pytest.mark.no_scp # Doesn't support SCP for now @pytest.mark.no_oci # Doesn't support OCI for now +@pytest.mark.no_kubernetes # Doesn't support Kubernetes for now def test_docker_preinstalled_package(generic_cloud: str): name = _get_cluster_name() test = Test( From 429eed4f9d59c377cf9f8e0b2437e1f5d8ae779d Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 15 Aug 2023 21:15:18 +0530 Subject: [PATCH 141/183] fix test_optimizer_dryruns --- tests/test_optimizer_dryruns.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/test_optimizer_dryruns.py b/tests/test_optimizer_dryruns.py index a673ac5694e..262787fbae8 100644 --- a/tests/test_optimizer_dryruns.py +++ b/tests/test_optimizer_dryruns.py @@ -7,6 +7,7 @@ import sky from sky import clouds from sky import exceptions +from sky.utils import kubernetes_utils def _test_parse_task_yaml(spec: str, test_fn: Optional[Callable] = None): @@ -80,6 +81,12 @@ def _make_resources( 'sky.clouds.gcp.GCP._list_reservations_for_instance_type', lambda *_args, **_kwargs: []) + # Monkey patch detect_gpu_label_formatter for k8s since it queries + # the cluster to detect available GPU labels. + monkeypatch.setattr( + 'sky.utils.kubernetes_utils.detect_gpu_label_formatter', + lambda *_args, **_kwargs: [kubernetes_utils.SkyPilotLabelFormatter, []]) + # Should create Resources here, since it uses the enabled clouds. return sky.Resources(*resources_args, **resources_kwargs) From 8dd1a768956f20b40a5a2cb1d7be959f32cd310f Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 15 Aug 2023 21:24:41 +0530 Subject: [PATCH 142/183] docs --- sky/utils/kubernetes_utils.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 6f28282bdd2..01a8239f399 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -8,13 +8,21 @@ class GPULabelFormatter: + """Base class to define a GPU label formatter for a Kubernetes cluster + + A GPU label formatter is a class that defines how to use GPU type labels in + a Kubernetes cluster. It is used by the Kubernetes cloud class to pick the + key:value pair to use as node selector for GPU nodes. + """ @classmethod def get_label_key(cls) -> str: + """Returns the label key for GPU type used by the Kubernetes cluster""" raise NotImplementedError @classmethod def get_label_value(cls, accelerator: str) -> str: + """Given a GPU type, returns the label value to be used""" raise NotImplementedError @@ -32,10 +40,17 @@ def get_gke_accelerator_name(accelerator: str) -> str: class GKELabelFormatter(GPULabelFormatter): + """GKE label formatter + + GKE nodes by default are populated with `cloud.google.com/gke-accelerator` + label, which is used to identify the GPU type. + """ + + LABEL_KEY = 'cloud.google.com/gke-accelerator' @classmethod def get_label_key(cls) -> str: - return 'cloud.google.com/gke-accelerator' + return cls.LABEL_KEY @classmethod def get_label_value(cls, accelerator: str) -> str: @@ -43,10 +58,17 @@ def get_label_value(cls, accelerator: str) -> str: class SkyPilotLabelFormatter(GPULabelFormatter): + """Custom label formatter for SkyPilot + + Uses skypilot.co/accelerator as the key, and SkyPilot accelerator str as the + value. + """ + + LABEL_KEY = 'skypilot.co/accelerator' @classmethod def get_label_key(cls) -> str: - return 'skypilot.co/accelerator' + return cls.LABEL_KEY @classmethod def get_label_value(cls, accelerator: str) -> str: From df10bc60340b83d9f2a910733922d06b552891f4 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 15 Aug 2023 21:38:51 +0530 Subject: [PATCH 143/183] cleanup --- sky/utils/kubernetes/gpu_labeler.py | 1 - 1 file changed, 1 deletion(-) diff --git a/sky/utils/kubernetes/gpu_labeler.py b/sky/utils/kubernetes/gpu_labeler.py index e8d75413322..00478646a8d 100644 --- a/sky/utils/kubernetes/gpu_labeler.py +++ b/sky/utils/kubernetes/gpu_labeler.py @@ -112,7 +112,6 @@ def label(): def main(): parser = argparse.ArgumentParser( - prog='sky-gpu-labeler', description='Labels GPU nodes in a Kubernetes cluster for use with ' 'SkyPilot. Operates by running a job on each node that ' 'parses nvidia-smi and patches the node with new labels. ' From 512d9fbfd1897426a7bc0cce85b6713e37e07991 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 16 Aug 2023 00:11:11 +0530 Subject: [PATCH 144/183] test readme update --- sky/clouds/kubernetes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 0dc68dda911..5c28e1cceb0 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -438,7 +438,7 @@ def accelerator_in_region_or_zone(self, acc_count: int, region: Optional[str] = None, zone: Optional[str] = None) -> bool: - # TODO(romilb): All accelerators are marked as not available for now. + # TODO(romilb): All accelerators are marked as available for now. # In the future, we should return false for accelerators that we know # are not supported by the cluster. return True From 858eb51369723a470e2ad348019592820bbe4b33 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 16 Aug 2023 06:25:38 +0530 Subject: [PATCH 145/183] lint --- sky/clouds/kubernetes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 5c28e1cceb0..549f904a7d1 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -420,8 +420,8 @@ def check_credentials(cls) -> Tuple[bool, Optional[str]]: # Test using python API return kubernetes_utils.check_credentials() else: - return False, 'Credentials not found - ' \ - f'check if {_CREDENTIAL_PATH} exists.' + return False, ('Credentials not found - ', + 'check if {_CREDENTIAL_PATH} exists.') def get_credential_file_mounts(self) -> Dict[str, str]: return {_CREDENTIAL_PATH: _CREDENTIAL_PATH} From 96647bfa998bc003b603ee1428899982314e2639 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 16 Aug 2023 06:44:21 +0530 Subject: [PATCH 146/183] lint --- sky/clouds/kubernetes.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 549f904a7d1..f7a2bd2510c 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -420,8 +420,8 @@ def check_credentials(cls) -> Tuple[bool, Optional[str]]: # Test using python API return kubernetes_utils.check_credentials() else: - return False, ('Credentials not found - ', - 'check if {_CREDENTIAL_PATH} exists.') + return (False, 'Credentials not found - ' + 'check if {_CREDENTIAL_PATH} exists.') def get_credential_file_mounts(self) -> Dict[str, str]: return {_CREDENTIAL_PATH: _CREDENTIAL_PATH} From fdff1a6a937d1ac2b79b1674f5fc0220c65f397c Mon Sep 17 00:00:00 2001 From: Avi Weit Date: Thu, 17 Aug 2023 07:24:35 +0300 Subject: [PATCH 147/183] [k8s_cloud_beta1] Add sshjump host support. (#2369) --- sky/authentication.py | 99 +++++++++++++++- sky/backends/backend_utils.py | 13 +-- sky/backends/cloud_vm_ray_backend.py | 3 +- sky/clouds/kubernetes.py | 6 + .../providers/kubernetes/node_provider.py | 13 +-- sky/skylet/providers/kubernetes/utils.py | 15 +++ sky/templates/kubernetes-ray.yml.j2 | 5 +- sky/templates/kubernetes-sshjump.yml.j2 | 86 ++++++++++++++ sky/utils/kubernetes/sshjump_lcm.py | 109 ++++++++++++++++++ 9 files changed, 324 insertions(+), 25 deletions(-) create mode 100644 sky/templates/kubernetes-sshjump.yml.j2 create mode 100644 sky/utils/kubernetes/sshjump_lcm.py diff --git a/sky/authentication.py b/sky/authentication.py index d5aa2ff1787..acac9db0cd3 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -33,14 +33,17 @@ from cryptography.hazmat.primitives import serialization from cryptography.hazmat.primitives.asymmetric import rsa from cryptography.hazmat.backends import default_backend +import jinja2 import yaml +import sky from sky import clouds from sky import sky_logging -from sky.adaptors import gcp, ibm +from sky.adaptors import gcp, ibm, kubernetes from sky.utils import common_utils from sky.utils import subprocess_utils from sky.utils import ux_utils +from sky.skylet.providers.kubernetes import utils as kubernetes_utils from sky.skylet.providers.lambda_cloud import lambda_utils logger = sky_logging.init_logger(__name__) @@ -402,4 +405,98 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: logger.error(suffix) raise + sshjump_name = clouds.Kubernetes.SKY_SSH_JUMP_NAME + sshjump_image = clouds.Kubernetes.IMAGE + namespace = kubernetes_utils.get_current_kube_config_context_namespace() + + template_path = os.path.join(sky.__root_dir__, 'templates', + 'kubernetes-sshjump.yml.j2') + if not os.path.exists(template_path): + raise FileNotFoundError( + 'Template "kubernetes-sshjump.j2" does not exist.') + with open(template_path) as fin: + template = fin.read() + j2_template = jinja2.Template(template) + cont = j2_template.render(name=sshjump_name, + image=sshjump_image, + secret=key_label) + + content = yaml.safe_load(cont) + + # ServiceAccount + try: + kubernetes.core_api().create_namespaced_service_account( + namespace, content['service_account']) + except kubernetes.api_exception() as e: + if e.status == 409: + logger.warning( + 'SSH Jump ServiceAcount already exists in the cluster, using ' + 'it...') + else: + raise + else: + logger.info('Creating SSH Jump ServiceAcount in the cluster...') + # Role + try: + kubernetes.auth_api().create_namespaced_role(namespace, content['role']) + except kubernetes.api_exception() as e: + if e.status == 409: + logger.warning( + 'SSH Jump Role already exists in the cluster, using it...') + else: + raise + else: + logger.info('Creating SSH Jump Role in the cluster...') + # RoleBinding + try: + kubernetes.auth_api().create_namespaced_role_binding( + namespace, content['role_binding']) + except kubernetes.api_exception() as e: + if e.status == 409: + logger.warning( + 'SSH Jump RoleBinding already exists in the cluster, using ' + 'it...') + else: + raise + else: + logger.info('Creating SSH Jump RoleBinding in the cluster...') + + # Pod + try: + kubernetes.core_api().create_namespaced_pod(namespace, + content['pod_spec']) + except kubernetes.api_exception() as e: + if e.status == 409: + logger.warning( + f'SSH Jump Host {sshjump_name} already exists in the cluster, ' + 'using it...') + else: + raise + else: + logger.info(f'Creating SSH Jump Host {sshjump_name} in the cluster...') + # Service + try: + kubernetes.core_api().create_namespaced_service(namespace, + content['service_spec']) + except kubernetes.api_exception() as e: + if e.status == 409: + logger.warning( + f'SSH Jump Service {sshjump_name} already exists in the ' + 'cluster, using it...') + else: + raise + else: + logger.info( + f'Creating SSH Jump Service {sshjump_name} in the cluster...') + + ssh_jump_port = clouds.Kubernetes.get_port(sshjump_name) + ssh_jump_ip = clouds.Kubernetes.get_external_ip() + + ssh_jump_proxy_command = f'ssh -tt -i {PRIVATE_SSH_KEY_PATH} ' + \ + '-o StrictHostKeyChecking=no ' + \ + '-o UserKnownHostsFile=/dev/null -o IdentitiesOnly=yes ' + \ + f'-p {ssh_jump_port} -W %h:%p sky@{ssh_jump_ip}' + + config['auth']['ssh_proxy_command'] = ssh_jump_proxy_command + return config diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 4b71ca4ac44..a62caf5c3a6 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -1616,18 +1616,13 @@ def get_head_ssh_port( max_attempts: int = 1, ) -> int: """Returns the ip of the head node.""" + del handle # Unused. + del use_cache # Unused. del max_attempts # Unused. - # Use port 22 for everything except Kubernetes + # Use port 22 for everything including Kubernetes. + # Note: for Kubernetes we use ssh jump host to access ray head # TODO(romilb): Add a get port method to the cloud classes. head_ssh_port = 22 - if not isinstance(handle.launched_resources.cloud, clouds.Kubernetes): - return head_ssh_port - else: - if use_cache and handle.head_ssh_port is not None: - head_ssh_port = handle.head_ssh_port - else: - svc_name = f'{handle.get_cluster_name()}-ray-head-ssh' - head_ssh_port = clouds.Kubernetes.get_port(svc_name) return head_ssh_port diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index c5675d02dcc..f4b91baebb0 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2824,7 +2824,8 @@ def _sync_file_mounts( def _update_envs_for_k8s(self, handle: CloudVmRayResourceHandle, task: task_lib.Task) -> None: - """Update envs for a task with Kubernetes specific env vars if cloud is Kubernetes.""" + """Update envs for a task with Kubernetes specific env vars if cloud is + Kubernetes.""" if isinstance(handle.launched_resources.cloud, clouds.Kubernetes): temp_envs = copy.deepcopy(task.envs) cloud_env_vars = handle.launched_resources.cloud.query_env_vars( diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 41d266b8c81..c233c9213d1 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -129,6 +129,7 @@ class Kubernetes(clouds.Cloud): """Kubernetes.""" SKY_SSH_KEY_SECRET_NAME = f'sky-ssh-{common_utils.get_user_hash()}' + SKY_SSH_JUMP_NAME = f'sshjump-{common_utils.get_user_hash()}' # Timeout for resource provisioning. This timeout determines how long to # wait for pod to be in pending status before giving up. @@ -217,6 +218,10 @@ def get_port(cls, svc_name) -> int: ns = kubernetes_utils.get_current_kube_config_context_namespace() return kubernetes_utils.get_port(svc_name, ns) + @classmethod + def get_external_ip(cls) -> str: + return kubernetes_utils.get_external_ip() + @classmethod def get_default_instance_type( cls, @@ -303,6 +308,7 @@ def make_deploy_resources_variables( 'k8s_ssh_key_secret_name': self.SKY_SSH_KEY_SECRET_NAME, # TODO(romilb): Allow user to specify custom images 'image_id': self.IMAGE, + 'sshjump': self.SKY_SSH_JUMP_NAME } def _get_feasible_launchable_resources( diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 3ab8414b2d2..986346c47a6 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -2,7 +2,6 @@ import logging import time from typing import Dict -from urllib.parse import urlparse from uuid import uuid4 from sky.adaptors import kubernetes @@ -97,17 +96,7 @@ def node_tags(self, node_id): return pod.metadata.labels def external_ip(self, node_id): - # Return the IP address of the first node with an external IP - nodes = kubernetes.core_api().list_node().items - for node in nodes: - if node.status.addresses: - for address in node.status.addresses: - if address.type == 'ExternalIP': - return address.address - # If no external IP is found, use the API server IP - api_host = kubernetes.core_api().api_client.configuration.host - parsed_url = urlparse(api_host) - return parsed_url.hostname + return utils.get_external_ip() def external_port(self, node_id): # Extract the NodePort of the head node's SSH service diff --git a/sky/skylet/providers/kubernetes/utils.py b/sky/skylet/providers/kubernetes/utils.py index 60bc99d0050..3b5be700fc4 100644 --- a/sky/skylet/providers/kubernetes/utils.py +++ b/sky/skylet/providers/kubernetes/utils.py @@ -1,5 +1,6 @@ from typing import Tuple, Optional +from urllib.parse import urlparse from sky.utils import common_utils from sky.adaptors import kubernetes @@ -25,6 +26,20 @@ def get_port(svc_name: str, namespace: str) -> int: return head_service.spec.ports[0].node_port +def get_external_ip(): + # Return the IP address of the first node with an external IP + nodes = kubernetes.core_api().list_node().items + for node in nodes: + if node.status.addresses: + for address in node.status.addresses: + if address.type == 'ExternalIP': + return address.address + # If no external IP is found, use the API server IP + api_host = kubernetes.core_api().api_client.configuration.host + parsed_url = urlparse(api_host) + return parsed_url.hostname + + def check_credentials(timeout: int = kubernetes.API_TIMEOUT) -> \ Tuple[bool, Optional[str]]: """ diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 9a2e97f551d..dccdc461e28 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -17,7 +17,7 @@ provider: module: sky.skylet.providers.kubernetes.KubernetesNodeProvider # Use False if running from outside of k8s cluster - use_internal_ips: false + use_internal_ips: true timeout: {{timeout}} @@ -78,7 +78,6 @@ provider: skypilot-cluster: {{cluster_name}} name: {{cluster_name}}-ray-head-ssh spec: - type: NodePort selector: component: {{cluster_name}}-ray-head ports: @@ -126,6 +125,8 @@ available_node_types: parent: skypilot component: {{cluster_name}}-ray-head skypilot-cluster: {{cluster_name}} + # This label is being used by the life cycle management of the ssh jump pod + skypilot-sshjump: {{sshjump}} spec: # Change this if you altered the autoscaler_service_account above # or want to provide your own. diff --git a/sky/templates/kubernetes-sshjump.yml.j2 b/sky/templates/kubernetes-sshjump.yml.j2 new file mode 100644 index 00000000000..c27b32cb24b --- /dev/null +++ b/sky/templates/kubernetes-sshjump.yml.j2 @@ -0,0 +1,86 @@ +pod_spec: + apiVersion: v1 + kind: Pod + metadata: + name: {{ name }} + labels: + component: {{ name }} + spec: + serviceAccountName: sshjump + volumes: + - name: secret-volume + secret: + secretName: {{ secret }} + containers: + - name: {{ name }} + imagePullPolicy: IfNotPresent + image: {{ image }} + command: ["python3", "-u", "/skypilot/sky/utils/kubernetes/sshjump_lcm.py"] + ports: + - containerPort: 22 + volumeMounts: + - name: secret-volume + readOnly: true + mountPath: /etc/secret-volume + lifecycle: + postStart: + exec: + command: ["/bin/bash", "-c", "mkdir -p ~/.ssh && cp /etc/secret-volume/ssh-publickey ~/.ssh/authorized_keys && sudo service ssh restart"] + env: + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: ALERT_THRESHOLD + # seconds + value: "600" + - name: RETRY_INTERVAL + # seconds + value: "60" + terminationGracePeriodSeconds: 0 +service_spec: + apiVersion: v1 + kind: Service + metadata: + name: {{ name }} + spec: + type: NodePort + selector: + component: {{ name }} + ports: + - protocol: TCP + port: 22 + targetPort: 22 + +# The following ServiceAccount/Role/RoleBinding sets up an RBAC for life cycle +# management of the jump pod/service +service_account: + apiVersion: v1 + kind: ServiceAccount + metadata: + name: sshjump +role: + kind: Role + apiVersion: rbac.authorization.k8s.io/v1 + metadata: + name: sshjump + rules: + - apiGroups: [""] + resources: ["pods", "pods/status", "pods/exec", "services"] + verbs: ["get", "list", "create", "delete"] +role_binding: + apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + name: sshjump + subjects: + - kind: ServiceAccount + name: sshjump + roleRef: + kind: Role + name: sshjump + apiGroup: rbac.authorization.k8s.io diff --git a/sky/utils/kubernetes/sshjump_lcm.py b/sky/utils/kubernetes/sshjump_lcm.py new file mode 100644 index 00000000000..51c61b6cc43 --- /dev/null +++ b/sky/utils/kubernetes/sshjump_lcm.py @@ -0,0 +1,109 @@ +"""Manages lifecycle of sshjump pod. + +This script runs inside sshjump pod as the main process (PID 1). + +It terminates itself (by removing sshjump service and pod via a call to +kubeapi), if it does not see ray pods in the duration of 10 minutes. If the +user re-launches a task before the duration is over, then sshjump pod is being +reused and will terminate itself when it sees that no ray cluster exist in that +amount duration. +""" +import datetime +import os +import sys +import time + +from kubernetes import client, config + +# Load kube config +config.load_incluster_config() + +v1 = client.CoreV1Api() + +current_name = os.getenv('MY_POD_NAME') +current_namespace = os.getenv('MY_POD_NAMESPACE') + +# The amount of time in seconds where no Ray pods exist in which after that time +# sshjump pod terminates itself +alert_threshold = int(os.getenv('ALERT_THRESHOLD', '600')) +# The amount of time in seconds to wait between Ray pods existence checks +retry_interval = int(os.getenv('RETRY_INTERVAL', '60')) + +# Ray pods are labeled with this value i.e sshjump name which is unique per user +#(based on userhash) +label_selector = f'skypilot-sshjump={current_name}' + + +def poll(): + sys.stdout.write('enter poll()\n') + + alert_delta = datetime.timedelta(seconds=alert_threshold) + + # Set delay for each retry + retry_interval_delta = datetime.timedelta(seconds=retry_interval) + + # Accumulated time of where no Ray pod exist. Used to compare against + # alert_threshold + noray_delta = datetime.timedelta() + + while True: + sys.stdout.write(f'Sleep {retry_interval} seconds..\n') + time.sleep(retry_interval) + + # List the pods in the current namespace + try: + ret = v1.list_namespaced_pod(current_namespace, + label_selector=label_selector) + except Exception as e: + sys.stdout.write(f'[ERROR] exit poll() with error: {e}\n') + raise + + if len(ret.items) == 0: + sys.stdout.write( + f'DID NOT FIND pods with label "{label_selector}" in ' + f'namespace: "{current_namespace}"\n') + noray_delta = noray_delta + retry_interval_delta + sys.stdout.write( + f'noray_delta after time increment: {noray_delta}, alert ' + f'threshold: {alert_delta}\n') + else: + sys.stdout.write( + f'FOUND pods with label "{label_selector}" in namespace: ' + f'"{current_namespace}"\n') + # reset .. + noray_delta = datetime.timedelta() + sys.stdout.write(f'noray_delta is reset: {noray_delta}\n') + + if noray_delta >= alert_delta: + sys.stdout.write( + f'noray_delta: {noray_delta} crossed alert threshold: ' + f'{alert_delta}. Time to terminate myself\n') + try: + # sshjump resources created under same name + v1.delete_namespaced_service(current_name, current_namespace) + v1.delete_namespaced_pod(current_name, current_namespace) + except Exception as e: + sys.stdout.write(f'[ERROR] exit poll() with error: {e}\n') + raise + + break + + sys.stdout.write('exit poll()\n') + + +def main(): + sys.stdout.write('enter main()\n') + sys.stdout.write(f'*** current_name {current_name}\n') + sys.stdout.write(f'*** current_namespace {current_namespace}\n') + sys.stdout.write(f'*** alert_threshold time {alert_threshold}\n') + sys.stdout.write(f'*** retry_interval time {retry_interval}\n') + sys.stdout.write(f'*** label_selector {label_selector}\n') + + if not current_name or not current_namespace: + raise Exception('[ERROR] One or more environment variables is missing ' + 'with an actual value.') + poll() + + +if __name__ == '__main__': + main() From 3ef135a28425793a3a4a2d76336c3541ff845d91 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 17 Aug 2023 17:38:36 +0530 Subject: [PATCH 148/183] Update build image --- tests/kubernetes/build_image.sh | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/tests/kubernetes/build_image.sh b/tests/kubernetes/build_image.sh index 0bcc6aead69..4ee6ac6b582 100644 --- a/tests/kubernetes/build_image.sh +++ b/tests/kubernetes/build_image.sh @@ -10,6 +10,9 @@ TAG=us-central1-docker.pkg.dev/skypilot-375900/skypilotk8s/skypilot +push=false +gpu=false + # Parse command line arguments while getopts ":pg" opt; do case ${opt} in @@ -28,6 +31,9 @@ while getopts ":pg" opt; do esac done +# Shift off the options +shift $((OPTIND-1)) + # Add -gpu to the tag if the GPU image is being built if [[ $gpu ]]; then TAG=$TAG-gpu:latest @@ -38,14 +44,15 @@ fi # Navigate to the root of the project (inferred from git) cd "$(git rev-parse --show-toplevel)" +echo $push +echo $gpu + # If push is used, build the image for both amd64 and arm64 -if [[ $push ]]; then +if [[ $push == "true" ]]; then # If gpu is used, build the GPU image - if [[ $gpu ]]; then + if [[ $gpu == "true" ]]; then echo "Building and pushing GPU image for amd64" docker buildx build --push --platform linux/amd64 -t $TAG -f Dockerfile_k8s_gpu ./sky - fi - # Else, build the CPU image else echo "Building and pushing CPU image for amd64 and arm64" docker buildx build --push --platform linux/arm64,linux/amd64 -t $TAG -f Dockerfile_k8s ./sky From 7b638ccff1d518a7a808f1b9ac220d49f95819db Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 17 Aug 2023 17:39:44 +0530 Subject: [PATCH 149/183] fix image path --- sky/authentication.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/authentication.py b/sky/authentication.py index f68445c58ac..61c67c334cc 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -408,7 +408,7 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: raise sshjump_name = clouds.Kubernetes.SKY_SSH_JUMP_NAME - sshjump_image = clouds.Kubernetes.IMAGE + sshjump_image = clouds.Kubernetes.IMAGE_CPU namespace = kubernetes_utils.get_current_kube_config_context_namespace() template_path = os.path.join(sky.__root_dir__, 'templates', From 7da33e4ee1dcede8ef99448debbd214f69dae8ba Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 17 Aug 2023 17:45:59 +0530 Subject: [PATCH 150/183] fix merge --- sky/clouds/kubernetes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 4c4ed79e3c3..f1033cecaf7 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -373,7 +373,7 @@ def make_deploy_resources_variables( 'k8s_acc_label_key': k8s_acc_label_key, 'k8s_acc_label_value': k8s_acc_label_value, # TODO(romilb): Allow user to specify custom images - 'image_id': self.IMAGE, + 'image_id': image, 'sshjump': self.SKY_SSH_JUMP_NAME } return deploy_vars From 5d4d27c4002d4ab1f4eca98e7634057a3faebf1b Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 17 Aug 2023 21:37:44 +0530 Subject: [PATCH 151/183] cleanup --- Dockerfile_k8s | 3 + Dockerfile_k8s_gpu | 3 + sky/authentication.py | 97 +++------------------- sky/clouds/kubernetes.py | 2 +- sky/templates/kubernetes-sshjump.yml.j2 | 18 ++-- sky/utils/kubernetes/sshjump_lcm.py | 30 +++---- sky/utils/kubernetes_utils.py | 106 ++++++++++++++++++++++++ tests/kubernetes/build_image.sh | 13 ++- 8 files changed, 154 insertions(+), 118 deletions(-) diff --git a/Dockerfile_k8s b/Dockerfile_k8s index 12dbaa9006c..cf6ff86cbed 100644 --- a/Dockerfile_k8s +++ b/Dockerfile_k8s @@ -45,6 +45,9 @@ RUN cd /skypilot/ && \ sudo mv -v sky/setup_files/* . && \ pip install ".[aws]" +# Set PYTHONUNBUFFERED=1 to have Python print to stdout/stderr immediately +ENV PYTHONUNBUFFERED=1 + # Set WORKDIR and initialize conda for sky user WORKDIR /home/sky RUN conda init diff --git a/Dockerfile_k8s_gpu b/Dockerfile_k8s_gpu index 0908d320cff..ba610277ef1 100644 --- a/Dockerfile_k8s_gpu +++ b/Dockerfile_k8s_gpu @@ -51,6 +51,9 @@ RUN cd /skypilot/ && \ sudo mv -v sky/setup_files/* . && \ pip install ".[aws]" +# Set PYTHONUNBUFFERED=1 to have Python print to stdout/stderr immediately +ENV PYTHONUNBUFFERED=1 + # Set WORKDIR and initialize conda for sky user WORKDIR /home/sky RUN conda init diff --git a/sky/authentication.py b/sky/authentication.py index 61c67c334cc..147353fea09 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -33,15 +33,12 @@ from cryptography.hazmat.backends import default_backend from cryptography.hazmat.primitives import serialization from cryptography.hazmat.primitives.asymmetric import rsa -import jinja2 import yaml -import sky from sky import clouds from sky import sky_logging from sky.adaptors import gcp from sky.adaptors import ibm -from sky.adaptors import kubernetes from sky.skylet.providers.lambda_cloud import lambda_utils from sky.utils import common_utils from sky.utils import kubernetes_utils @@ -70,7 +67,7 @@ def _generate_rsa_key_pair() -> Tuple[str, str]: encoding=serialization.Encoding.PEM, format=serialization.PrivateFormat.TraditionalOpenSSL, encryption_algorithm=serialization.NoEncryption()).decode( - 'utf-8').strip() + 'utf-8').strip() public_key = key.public_key().public_bytes( serialization.Encoding.OpenSSH, @@ -411,93 +408,19 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: sshjump_image = clouds.Kubernetes.IMAGE_CPU namespace = kubernetes_utils.get_current_kube_config_context_namespace() - template_path = os.path.join(sky.__root_dir__, 'templates', - 'kubernetes-sshjump.yml.j2') - if not os.path.exists(template_path): - raise FileNotFoundError( - 'Template "kubernetes-sshjump.j2" does not exist.') - with open(template_path) as fin: - template = fin.read() - j2_template = jinja2.Template(template) - cont = j2_template.render(name=sshjump_name, - image=sshjump_image, - secret=key_label) - - content = yaml.safe_load(cont) - - # ServiceAccount - try: - kubernetes.core_api().create_namespaced_service_account( - namespace, content['service_account']) - except kubernetes.api_exception() as e: - if e.status == 409: - logger.warning( - 'SSH Jump ServiceAcount already exists in the cluster, using ' - 'it...') - else: - raise - else: - logger.info('Creating SSH Jump ServiceAcount in the cluster...') - # Role - try: - kubernetes.auth_api().create_namespaced_role(namespace, content['role']) - except kubernetes.api_exception() as e: - if e.status == 409: - logger.warning( - 'SSH Jump Role already exists in the cluster, using it...') - else: - raise - else: - logger.info('Creating SSH Jump Role in the cluster...') - # RoleBinding - try: - kubernetes.auth_api().create_namespaced_role_binding( - namespace, content['role_binding']) - except kubernetes.api_exception() as e: - if e.status == 409: - logger.warning( - 'SSH Jump RoleBinding already exists in the cluster, using ' - 'it...') - else: - raise - else: - logger.info('Creating SSH Jump RoleBinding in the cluster...') - - # Pod - try: - kubernetes.core_api().create_namespaced_pod(namespace, - content['pod_spec']) - except kubernetes.api_exception() as e: - if e.status == 409: - logger.warning( - f'SSH Jump Host {sshjump_name} already exists in the cluster, ' - 'using it...') - else: - raise - else: - logger.info(f'Creating SSH Jump Host {sshjump_name} in the cluster...') - # Service - try: - kubernetes.core_api().create_namespaced_service(namespace, - content['service_spec']) - except kubernetes.api_exception() as e: - if e.status == 409: - logger.warning( - f'SSH Jump Service {sshjump_name} already exists in the ' - 'cluster, using it...') - else: - raise - else: - logger.info( - f'Creating SSH Jump Service {sshjump_name} in the cluster...') + kubernetes_utils.setup_sshjump(sshjump_name, + sshjump_image, + key_label, + namespace) ssh_jump_port = clouds.Kubernetes.get_port(sshjump_name) ssh_jump_ip = clouds.Kubernetes.get_external_ip() - ssh_jump_proxy_command = f'ssh -tt -i {PRIVATE_SSH_KEY_PATH} ' + \ - '-o StrictHostKeyChecking=no ' + \ - '-o UserKnownHostsFile=/dev/null -o IdentitiesOnly=yes ' + \ - f'-p {ssh_jump_port} -W %h:%p sky@{ssh_jump_ip}' + ssh_jump_proxy_command = (f'ssh -tt -i {PRIVATE_SSH_KEY_PATH} ' + '-o StrictHostKeyChecking=no ' + '-o UserKnownHostsFile=/dev/null ' + '-o IdentitiesOnly=yes ' + f'-p {ssh_jump_port} -W %h:%p sky@{ssh_jump_ip}') config['auth']['ssh_proxy_command'] = ssh_jump_proxy_command diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index f1033cecaf7..e499ce88781 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -150,7 +150,7 @@ class Kubernetes(clouds.Cloud): """Kubernetes.""" SKY_SSH_KEY_SECRET_NAME = f'sky-ssh-{common_utils.get_user_hash()}' - SKY_SSH_JUMP_NAME = f'sshjump-{common_utils.get_user_hash()}' + SKY_SSH_JUMP_NAME = f'sky-sshjump-{common_utils.get_user_hash()}' # Timeout for resource provisioning. This timeout determines how long to # wait for pod to be in pending status before giving up. diff --git a/sky/templates/kubernetes-sshjump.yml.j2 b/sky/templates/kubernetes-sshjump.yml.j2 index c27b32cb24b..bc3d35f9595 100644 --- a/sky/templates/kubernetes-sshjump.yml.j2 +++ b/sky/templates/kubernetes-sshjump.yml.j2 @@ -5,15 +5,16 @@ pod_spec: name: {{ name }} labels: component: {{ name }} + app: skypilot spec: - serviceAccountName: sshjump + serviceAccountName: sky-sshjump-sa volumes: - name: secret-volume secret: secretName: {{ secret }} containers: - name: {{ name }} - imagePullPolicy: IfNotPresent + imagePullPolicy: Always image: {{ image }} command: ["python3", "-u", "/skypilot/sky/utils/kubernetes/sshjump_lcm.py"] ports: @@ -47,6 +48,7 @@ service_spec: kind: Service metadata: name: {{ name }} + app: skypilot spec: type: NodePort selector: @@ -62,12 +64,13 @@ service_account: apiVersion: v1 kind: ServiceAccount metadata: - name: sshjump + name: sky-sshjump-sa + app: skypilot role: kind: Role apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: sshjump + name: sky-sshjump-role rules: - apiGroups: [""] resources: ["pods", "pods/status", "pods/exec", "services"] @@ -76,11 +79,12 @@ role_binding: apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: - name: sshjump + name: sky-sshjump-rb + app: skypilot subjects: - kind: ServiceAccount - name: sshjump + name: sky-sshjump-sa roleRef: kind: Role - name: sshjump + name: sky-sshjump-role apiGroup: rbac.authorization.k8s.io diff --git a/sky/utils/kubernetes/sshjump_lcm.py b/sky/utils/kubernetes/sshjump_lcm.py index 51c61b6cc43..2998e724675 100644 --- a/sky/utils/kubernetes/sshjump_lcm.py +++ b/sky/utils/kubernetes/sshjump_lcm.py @@ -1,12 +1,12 @@ -"""Manages lifecycle of sshjump pod. +"""Manages lifecycle of ssh jump pod. -This script runs inside sshjump pod as the main process (PID 1). +This script runs inside ssh jump pod as the main process (PID 1). It terminates itself (by removing sshjump service and pod via a call to kubeapi), if it does not see ray pods in the duration of 10 minutes. If the -user re-launches a task before the duration is over, then sshjump pod is being +user re-launches a task before the duration is over, then ssh jump pod is being reused and will terminate itself when it sees that no ray cluster exist in that -amount duration. +duration. """ import datetime import os @@ -29,13 +29,13 @@ # The amount of time in seconds to wait between Ray pods existence checks retry_interval = int(os.getenv('RETRY_INTERVAL', '60')) -# Ray pods are labeled with this value i.e sshjump name which is unique per user -#(based on userhash) +# Ray pods are labeled with this value i.e., sshjump name which is unique per +# user (based on user hash) label_selector = f'skypilot-sshjump={current_name}' def poll(): - sys.stdout.write('enter poll()\n') + logging.info('enter poll()\n') alert_delta = datetime.timedelta(seconds=alert_threshold) @@ -55,7 +55,7 @@ def poll(): ret = v1.list_namespaced_pod(current_namespace, label_selector=label_selector) except Exception as e: - sys.stdout.write(f'[ERROR] exit poll() with error: {e}\n') + sys.stdout.write(f'Error: listing pods failed with error: {e}\n') raise if len(ret.items) == 0: @@ -92,15 +92,15 @@ def poll(): def main(): - sys.stdout.write('enter main()\n') - sys.stdout.write(f'*** current_name {current_name}\n') - sys.stdout.write(f'*** current_namespace {current_namespace}\n') - sys.stdout.write(f'*** alert_threshold time {alert_threshold}\n') - sys.stdout.write(f'*** retry_interval time {retry_interval}\n') - sys.stdout.write(f'*** label_selector {label_selector}\n') + sys.stdout.write('SkyPilot SSH Jump Pod Lifecycle Manager\n') + sys.stdout.write(f'current_name: {current_name}\n') + sys.stdout.write(f'current_namespace: {current_namespace}\n') + sys.stdout.write(f'alert_threshold time: {alert_threshold}\n') + sys.stdout.write(f'retry_interval time: {retry_interval}\n') + sys.stdout.write(f'label_selector: {label_selector}\n') if not current_name or not current_namespace: - raise Exception('[ERROR] One or more environment variables is missing ' + raise Exception('One or more environment variables is missing ' 'with an actual value.') poll() diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 5b732491e61..5b5da464229 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -1,11 +1,20 @@ """Kubernetes utilities for SkyPilot.""" +import os from typing import Optional, Set, Tuple +from urllib.parse import urlparse +import jinja2 +import yaml + +import sky +from sky import sky_logging from sky.adaptors import kubernetes from sky.utils import common_utils DEFAULT_NAMESPACE = 'default' +logger = sky_logging.init_logger(__name__) + class GPULabelFormatter: """Base class to define a GPU label formatter for a Kubernetes cluster @@ -203,3 +212,100 @@ def get_current_kube_config_context_namespace() -> str: return DEFAULT_NAMESPACE except k8s.config.config_exception.ConfigException: return DEFAULT_NAMESPACE + + +def setup_sshjump(sshjump_name: str, + sshjump_image: str, + ssh_key_secret: str, + namespace: str): + """ + Sets up Kubernetes resources (RBAC and pod) for SSH jump host. + + Our Kubernetes implementation uses a SSH jump pod to reach SkyPilot clusters + running inside a cluster. This function sets up the resources needed for + the SSH jump pod. This includes a service account which grants the jump pod + permission to watch for other SkyPilot pods and terminate itself if there + are no SkyPilot pods running. + + Args: + sshjump_image: Container image to use for the SSH jump pod + sshjump_name: Name to use for the SSH jump pod + ssh_key_secret: Secret name for the SSH key stored in the cluster + namespace: Namespace to create the SSH jump pod in + """ + template_path = os.path.join(sky.__root_dir__, 'templates', + 'kubernetes-sshjump.yml.j2') + if not os.path.exists(template_path): + raise FileNotFoundError( + 'Template "kubernetes-sshjump.j2" does not exist.') + with open(template_path) as fin: + template = fin.read() + j2_template = jinja2.Template(template) + cont = j2_template.render(name=sshjump_name, + image=sshjump_image, + secret=ssh_key_secret) + content = yaml.safe_load(cont) + # ServiceAccount + try: + kubernetes.core_api().create_namespaced_service_account( + namespace, content['service_account']) + except kubernetes.api_exception() as e: + if e.status == 409: + logger.warning( + 'SSH Jump ServiceAcount already exists in the cluster, using ' + 'it.') + else: + raise + else: + logger.info('Creating SSH Jump ServiceAcount in the cluster.') + # Role + try: + kubernetes.auth_api().create_namespaced_role(namespace, content['role']) + except kubernetes.api_exception() as e: + if e.status == 409: + logger.warning( + 'SSH Jump Role already exists in the cluster, using it.') + else: + raise + else: + logger.info('Creating SSH Jump Role in the cluster.') + # RoleBinding + try: + kubernetes.auth_api().create_namespaced_role_binding( + namespace, content['role_binding']) + except kubernetes.api_exception() as e: + if e.status == 409: + logger.warning( + 'SSH Jump RoleBinding already exists in the cluster, using ' + 'it.') + else: + raise + else: + logger.info('Creating SSH Jump RoleBinding in the cluster.') + # Pod + try: + kubernetes.core_api().create_namespaced_pod(namespace, + content['pod_spec']) + except kubernetes.api_exception() as e: + if e.status == 409: + logger.warning( + f'SSH Jump Host {sshjump_name} already exists in the cluster, ' + 'using it.') + else: + raise + else: + logger.info(f'Creating SSH Jump Host {sshjump_name} in the cluster.') + # Service + try: + kubernetes.core_api().create_namespaced_service(namespace, + content['service_spec']) + except kubernetes.api_exception() as e: + if e.status == 409: + logger.warning( + f'SSH Jump Service {sshjump_name} already exists in the ' + 'cluster, using it.') + else: + raise + else: + logger.info( + f'Creating SSH Jump Service {sshjump_name} in the cluster.') diff --git a/tests/kubernetes/build_image.sh b/tests/kubernetes/build_image.sh index 4ee6ac6b582..1f887ef9b80 100644 --- a/tests/kubernetes/build_image.sh +++ b/tests/kubernetes/build_image.sh @@ -35,7 +35,7 @@ done shift $((OPTIND-1)) # Add -gpu to the tag if the GPU image is being built -if [[ $gpu ]]; then +if [[ $gpu == "true" ]]; then TAG=$TAG-gpu:latest else TAG=$TAG:latest @@ -44,27 +44,24 @@ fi # Navigate to the root of the project (inferred from git) cd "$(git rev-parse --show-toplevel)" -echo $push -echo $gpu - # If push is used, build the image for both amd64 and arm64 if [[ $push == "true" ]]; then # If gpu is used, build the GPU image if [[ $gpu == "true" ]]; then - echo "Building and pushing GPU image for amd64" + echo "Building and pushing GPU image for amd64: $TAG" docker buildx build --push --platform linux/amd64 -t $TAG -f Dockerfile_k8s_gpu ./sky else - echo "Building and pushing CPU image for amd64 and arm64" + echo "Building and pushing CPU image for amd64 and arm64: $TAG" docker buildx build --push --platform linux/arm64,linux/amd64 -t $TAG -f Dockerfile_k8s ./sky fi fi # Load the right image depending on the architecture of the host machine (Apple Silicon or Intel) if [[ $(uname -m) == "arm64" ]]; then - echo "Loading image for arm64 (Apple Silicon etc.)" + echo "Loading image for arm64 (Apple Silicon etc.): $TAG" docker buildx build --load --platform linux/arm64 -t $TAG -f Dockerfile_k8s ./sky elif [[ $(uname -m) == "x86_64" ]]; then - echo "Building for amd64 (Intel CPUs)" + echo "Building for amd64 (Intel CPUs): $TAG" docker buildx build --load --platform linux/amd64 -t $TAG -f Dockerfile_k8s ./sky else echo "Unsupported architecture: $(uname -m)" From e9d0ed1033094130ca6ca846f5a73de4fc0ff38b Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 17 Aug 2023 21:43:55 +0530 Subject: [PATCH 152/183] lint --- sky/authentication.py | 6 ++---- sky/utils/kubernetes/sshjump_lcm.py | 5 +++-- sky/utils/kubernetes_utils.py | 7 ++----- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/sky/authentication.py b/sky/authentication.py index 147353fea09..80ef8c07569 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -67,7 +67,7 @@ def _generate_rsa_key_pair() -> Tuple[str, str]: encoding=serialization.Encoding.PEM, format=serialization.PrivateFormat.TraditionalOpenSSL, encryption_algorithm=serialization.NoEncryption()).decode( - 'utf-8').strip() + 'utf-8').strip() public_key = key.public_key().public_bytes( serialization.Encoding.OpenSSH, @@ -408,9 +408,7 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: sshjump_image = clouds.Kubernetes.IMAGE_CPU namespace = kubernetes_utils.get_current_kube_config_context_namespace() - kubernetes_utils.setup_sshjump(sshjump_name, - sshjump_image, - key_label, + kubernetes_utils.setup_sshjump(sshjump_name, sshjump_image, key_label, namespace) ssh_jump_port = clouds.Kubernetes.get_port(sshjump_name) diff --git a/sky/utils/kubernetes/sshjump_lcm.py b/sky/utils/kubernetes/sshjump_lcm.py index 2998e724675..055d6717595 100644 --- a/sky/utils/kubernetes/sshjump_lcm.py +++ b/sky/utils/kubernetes/sshjump_lcm.py @@ -13,7 +13,8 @@ import sys import time -from kubernetes import client, config +from kubernetes import client +from kubernetes import config # Load kube config config.load_incluster_config() @@ -35,7 +36,7 @@ def poll(): - logging.info('enter poll()\n') + sys.stdout.write('enter poll()\n') alert_delta = datetime.timedelta(seconds=alert_threshold) diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 5b5da464229..fb7b3ab4302 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -214,9 +214,7 @@ def get_current_kube_config_context_namespace() -> str: return DEFAULT_NAMESPACE -def setup_sshjump(sshjump_name: str, - sshjump_image: str, - ssh_key_secret: str, +def setup_sshjump(sshjump_name: str, sshjump_image: str, ssh_key_secret: str, namespace: str): """ Sets up Kubernetes resources (RBAC and pod) for SSH jump host. @@ -307,5 +305,4 @@ def setup_sshjump(sshjump_name: str, else: raise else: - logger.info( - f'Creating SSH Jump Service {sshjump_name} in the cluster.') + logger.info(f'Creating SSH Jump Service {sshjump_name} in the cluster.') From f7362363d7cef6641a0b3c424837e5589690f1af Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 20 Aug 2023 07:58:45 +0530 Subject: [PATCH 153/183] fix utils ref --- sky/skylet/providers/kubernetes/node_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 4429df187e8..49b1d1243b3 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -99,7 +99,7 @@ def node_tags(self, node_id): return pod.metadata.labels def external_ip(self, node_id): - return utils.get_external_ip() + return kubernetes_utils.get_external_ip() def external_port(self, node_id): # Extract the NodePort of the head node's SSH service From 7b5d0b5dc47833ed90c58c48ef20816be053009b Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 20 Aug 2023 07:59:22 +0530 Subject: [PATCH 154/183] typo --- sky/skylet/providers/kubernetes/node_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 49b1d1243b3..c16039d691b 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -50,7 +50,7 @@ def __init__(self, provider_config, cluster_name): NodeProvider.__init__(self, provider_config, cluster_name) self.cluster_name = cluster_name - # Kubernetes namespace to user + # Kubernetes namespace to use self.namespace = kubernetes_utils.get_current_kube_config_context_namespace( ) From 8a3d5a7b5deb361bebe9d29ce8bb14e0454e04c2 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 20 Aug 2023 10:00:15 +0530 Subject: [PATCH 155/183] refactor pod creation --- sky/authentication.py | 23 ++--- sky/skylet/providers/kubernetes/config.py | 31 ++++++ sky/utils/kubernetes_utils.py | 120 +++++++++++++++------- 3 files changed, 124 insertions(+), 50 deletions(-) diff --git a/sky/authentication.py b/sky/authentication.py index 80ef8c07569..adb69126af0 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -404,22 +404,17 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: logger.error(suffix) raise - sshjump_name = clouds.Kubernetes.SKY_SSH_JUMP_NAME - sshjump_image = clouds.Kubernetes.IMAGE_CPU + # Setup service for SSH jump pod. We create the SSH jump service here + # because we need to know the service IP address and port to set the + # ssh_proxy_command in the autoscaler config. namespace = kubernetes_utils.get_current_kube_config_context_namespace() + sshjump_name = clouds.Kubernetes.SKY_SSH_JUMP_NAME - kubernetes_utils.setup_sshjump(sshjump_name, sshjump_image, key_label, - namespace) - - ssh_jump_port = clouds.Kubernetes.get_port(sshjump_name) - ssh_jump_ip = clouds.Kubernetes.get_external_ip() - - ssh_jump_proxy_command = (f'ssh -tt -i {PRIVATE_SSH_KEY_PATH} ' - '-o StrictHostKeyChecking=no ' - '-o UserKnownHostsFile=/dev/null ' - '-o IdentitiesOnly=yes ' - f'-p {ssh_jump_port} -W %h:%p sky@{ssh_jump_ip}') + kubernetes_utils.setup_sshjump_svc(sshjump_name, namespace) - config['auth']['ssh_proxy_command'] = ssh_jump_proxy_command + ssh_proxy_cmd = kubernetes_utils.get_ssh_proxy_command(PRIVATE_SSH_KEY_PATH, + sshjump_name, + namespace) + config['auth']['ssh_proxy_command'] = ssh_proxy_cmd return config diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index e861f88b9ca..3e85061faf2 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -3,6 +3,7 @@ import math import re +from sky import clouds from sky.adaptors import kubernetes from sky.utils import kubernetes_utils @@ -63,6 +64,8 @@ def bootstrap_kubernetes(config): _configure_services(namespace, config['provider']) + config = _configure_ssh_jump(namespace, config) + if not config['provider'].get('_operator'): # These steps are unecessary when using the Operator. _configure_autoscaler_service_account(namespace, config['provider']) @@ -280,6 +283,34 @@ def _configure_autoscaler_role_binding(namespace, provider_config): logger.info(log_prefix + created_msg(binding_field, name)) +def _configure_ssh_jump(namespace, config): + """Creates a SSH jump pod to connect to the cluster. + + Also updates config['auth']['ssh_proxy_command'] to use the newly created + jump pod. + """ + # TODO(romilb): These variables should be moved and fetched from config + sshjump_name = clouds.Kubernetes.SKY_SSH_JUMP_NAME + sshjump_image = clouds.Kubernetes.IMAGE_CPU + key_label = clouds.Kubernetes.SKY_SSH_KEY_SECRET_NAME + + # TODO(romilb): We currently split SSH jump pod and svc creation. Service + # is first created in authentication.py::setup_kubernetes_authentication + # and then SSH jump pod creation happens here. This is because we need to + # set the ssh_proxy_command in the ray YAML before we pass it to the + # autoscaler. If in the future if we can write the ssh_proxy_command to the + # cluster yaml through this method, then we should move the service + # creation here. + + # TODO(romilb): We should add a check here to make sure the service is up + # and available before we create the SSH jump pod. If for any reason the + # service is missing, we should raise an error. + + kubernetes_utils.setup_sshjump_pod(sshjump_name, sshjump_image, key_label, + namespace) + return config + + def _configure_services(namespace, provider_config): service_field = 'services' if service_field not in provider_config: diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index fb7b3ab4302..f017541cb73 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -1,6 +1,6 @@ """Kubernetes utilities for SkyPilot.""" import os -from typing import Optional, Set, Tuple +from typing import Dict, Optional, Set, Tuple from urllib.parse import urlparse import jinja2 @@ -214,10 +214,63 @@ def get_current_kube_config_context_namespace() -> str: return DEFAULT_NAMESPACE -def setup_sshjump(sshjump_name: str, sshjump_image: str, ssh_key_secret: str, +def get_ssh_proxy_command(private_key_path: str, + sshjump_name: str, + namespace: str) -> str: + """Generates the SSH proxy command to connect through the SSH jump pod. + + Args: + private_key_path: Path to the private key to use for SSH. This key must + be authorized to access the SSH jump pod. + sshjump_name: Name of the SSH jump service to use + namespace: Kubernetes namespace to use + """ + # Fetch service port and IP to connect to for the jump svc + ssh_jump_port = get_port(sshjump_name, namespace) + ssh_jump_ip = get_external_ip() + + ssh_jump_proxy_command = (f'ssh -tt -i {private_key_path} ' + '-o StrictHostKeyChecking=no ' + '-o UserKnownHostsFile=/dev/null ' + '-o IdentitiesOnly=yes ' + f'-p {ssh_jump_port} -W %h:%p sky@{ssh_jump_ip}') + + return ssh_jump_proxy_command + + +def setup_sshjump_svc(sshjump_name: str, namespace: str): + """ + Sets up Kubernetes service resource to access for SSH jump pod. + + This method acts as a necessary complement to be run along with + setup_sshjump_pod(...) method. This service ensures the pod is accessible. + + Args: + sshjump_name: Name to use for the SSH jump service + namespace: Namespace to create the SSH jump service in + """ + # Fill in template - ssh_key_secret and sshjump_image are not required for + # the service spec, so we pass in None. + content = fill_sshjump_template(None, None, sshjump_name) + # Create service + try: + kubernetes.core_api().create_namespaced_service(namespace, + content['service_spec']) + except kubernetes.api_exception() as e: + if e.status == 409: + logger.warning( + f'SSH Jump Service {sshjump_name} already exists in the ' + 'cluster, using it.') + else: + raise + else: + logger.info(f'Created SSH Jump Service {sshjump_name}.') + + +def setup_sshjump_pod(sshjump_name: str, sshjump_image: str, ssh_key_secret: str, namespace: str): """ - Sets up Kubernetes resources (RBAC and pod) for SSH jump host. + Sets up Kubernetes RBAC and pod for SSH jump host. Our Kubernetes implementation uses a SSH jump pod to reach SkyPilot clusters running inside a cluster. This function sets up the resources needed for @@ -225,84 +278,79 @@ def setup_sshjump(sshjump_name: str, sshjump_image: str, ssh_key_secret: str, permission to watch for other SkyPilot pods and terminate itself if there are no SkyPilot pods running. + setup_sshjump_service must also be run to ensure that the SSH jump pod is + reachable. + Args: sshjump_image: Container image to use for the SSH jump pod sshjump_name: Name to use for the SSH jump pod ssh_key_secret: Secret name for the SSH key stored in the cluster namespace: Namespace to create the SSH jump pod in """ - template_path = os.path.join(sky.__root_dir__, 'templates', - 'kubernetes-sshjump.yml.j2') - if not os.path.exists(template_path): - raise FileNotFoundError( - 'Template "kubernetes-sshjump.j2" does not exist.') - with open(template_path) as fin: - template = fin.read() - j2_template = jinja2.Template(template) - cont = j2_template.render(name=sshjump_name, - image=sshjump_image, - secret=ssh_key_secret) - content = yaml.safe_load(cont) + content = fill_sshjump_template(ssh_key_secret, sshjump_image, sshjump_name) # ServiceAccount try: kubernetes.core_api().create_namespaced_service_account( namespace, content['service_account']) except kubernetes.api_exception() as e: if e.status == 409: - logger.warning( + logger.info( 'SSH Jump ServiceAcount already exists in the cluster, using ' 'it.') else: raise else: - logger.info('Creating SSH Jump ServiceAcount in the cluster.') + logger.info('Created SSH Jump ServiceAcount.') # Role try: kubernetes.auth_api().create_namespaced_role(namespace, content['role']) except kubernetes.api_exception() as e: if e.status == 409: - logger.warning( + logger.info( 'SSH Jump Role already exists in the cluster, using it.') else: raise else: - logger.info('Creating SSH Jump Role in the cluster.') + logger.info('Created SSH Jump Role.') # RoleBinding try: kubernetes.auth_api().create_namespaced_role_binding( namespace, content['role_binding']) except kubernetes.api_exception() as e: if e.status == 409: - logger.warning( + logger.info( 'SSH Jump RoleBinding already exists in the cluster, using ' 'it.') else: raise else: - logger.info('Creating SSH Jump RoleBinding in the cluster.') + logger.info('Created SSH Jump RoleBinding.') # Pod try: kubernetes.core_api().create_namespaced_pod(namespace, content['pod_spec']) except kubernetes.api_exception() as e: if e.status == 409: - logger.warning( + logger.info( f'SSH Jump Host {sshjump_name} already exists in the cluster, ' 'using it.') else: raise else: - logger.info(f'Creating SSH Jump Host {sshjump_name} in the cluster.') - # Service - try: - kubernetes.core_api().create_namespaced_service(namespace, - content['service_spec']) - except kubernetes.api_exception() as e: - if e.status == 409: - logger.warning( - f'SSH Jump Service {sshjump_name} already exists in the ' - 'cluster, using it.') - else: - raise - else: - logger.info(f'Creating SSH Jump Service {sshjump_name} in the cluster.') + logger.info(f'Created SSH Jump Host {sshjump_name}.') + + +def fill_sshjump_template(ssh_key_secret: str, sshjump_image: str, sshjump_name: str) -> Dict: + template_path = os.path.join(sky.__root_dir__, 'templates', + 'kubernetes-sshjump.yml.j2') + if not os.path.exists(template_path): + raise FileNotFoundError( + 'Template "kubernetes-sshjump.j2" does not exist.') + with open(template_path) as fin: + template = fin.read() + j2_template = jinja2.Template(template) + cont = j2_template.render(name=sshjump_name, + image=sshjump_image, + secret=ssh_key_secret) + content = yaml.safe_load(cont) + return content From 58b81263326624652fb3b1c8a7f05f2d4d985cd9 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Sun, 20 Aug 2023 10:02:10 +0530 Subject: [PATCH 156/183] lint --- sky/authentication.py | 5 ++--- sky/utils/kubernetes_utils.py | 32 +++++++++++++------------------- 2 files changed, 15 insertions(+), 22 deletions(-) diff --git a/sky/authentication.py b/sky/authentication.py index adb69126af0..aa9f336c27a 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -412,9 +412,8 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: kubernetes_utils.setup_sshjump_svc(sshjump_name, namespace) - ssh_proxy_cmd = kubernetes_utils.get_ssh_proxy_command(PRIVATE_SSH_KEY_PATH, - sshjump_name, - namespace) + ssh_proxy_cmd = kubernetes_utils.get_ssh_proxy_command( + PRIVATE_SSH_KEY_PATH, sshjump_name, namespace) config['auth']['ssh_proxy_command'] = ssh_proxy_cmd return config diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index f017541cb73..57220d68f67 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -113,8 +113,7 @@ def get_head_ssh_port(cluster_name: str, namespace: str) -> int: def get_port(svc_name: str, namespace: str) -> int: - """ - Gets the nodeport of the specified service. + """Gets the nodeport of the specified service. Args: svc_name (str): Name of the kubernetes service. Note that this may be @@ -142,8 +141,7 @@ def get_external_ip(): def check_credentials(timeout: int = kubernetes.API_TIMEOUT) -> \ Tuple[bool, Optional[str]]: - """ - Check if the credentials in kubeconfig file are valid + """Check if the credentials in kubeconfig file are valid Args: timeout (int): Timeout in seconds for the test API call @@ -181,8 +179,7 @@ def check_credentials(timeout: int = kubernetes.API_TIMEOUT) -> \ def get_current_kube_config_context_name() -> Optional[str]: - """ - Get the current kubernetes context from the kubeconfig file + """Get the current kubernetes context from the kubeconfig file Returns: str | None: The current kubernetes context if it exists, None otherwise @@ -196,8 +193,7 @@ def get_current_kube_config_context_name() -> Optional[str]: def get_current_kube_config_context_namespace() -> str: - """ - Get the current kubernetes context namespace from the kubeconfig file + """Get the current kubernetes context namespace from the kubeconfig file Returns: str | None: The current kubernetes context namespace if it exists, else @@ -214,8 +210,7 @@ def get_current_kube_config_context_namespace() -> str: return DEFAULT_NAMESPACE -def get_ssh_proxy_command(private_key_path: str, - sshjump_name: str, +def get_ssh_proxy_command(private_key_path: str, sshjump_name: str, namespace: str) -> str: """Generates the SSH proxy command to connect through the SSH jump pod. @@ -239,8 +234,7 @@ def get_ssh_proxy_command(private_key_path: str, def setup_sshjump_svc(sshjump_name: str, namespace: str): - """ - Sets up Kubernetes service resource to access for SSH jump pod. + """Sets up Kubernetes service resource to access for SSH jump pod. This method acts as a necessary complement to be run along with setup_sshjump_pod(...) method. This service ensures the pod is accessible. @@ -250,8 +244,8 @@ def setup_sshjump_svc(sshjump_name: str, namespace: str): namespace: Namespace to create the SSH jump service in """ # Fill in template - ssh_key_secret and sshjump_image are not required for - # the service spec, so we pass in None. - content = fill_sshjump_template(None, None, sshjump_name) + # the service spec, so we pass in empty strs. + content = fill_sshjump_template('', '', sshjump_name) # Create service try: kubernetes.core_api().create_namespaced_service(namespace, @@ -267,10 +261,9 @@ def setup_sshjump_svc(sshjump_name: str, namespace: str): logger.info(f'Created SSH Jump Service {sshjump_name}.') -def setup_sshjump_pod(sshjump_name: str, sshjump_image: str, ssh_key_secret: str, - namespace: str): - """ - Sets up Kubernetes RBAC and pod for SSH jump host. +def setup_sshjump_pod(sshjump_name: str, sshjump_image: str, + ssh_key_secret: str, namespace: str): + """Sets up Kubernetes RBAC and pod for SSH jump host. Our Kubernetes implementation uses a SSH jump pod to reach SkyPilot clusters running inside a cluster. This function sets up the resources needed for @@ -340,7 +333,8 @@ def setup_sshjump_pod(sshjump_name: str, sshjump_image: str, ssh_key_secret: str logger.info(f'Created SSH Jump Host {sshjump_name}.') -def fill_sshjump_template(ssh_key_secret: str, sshjump_image: str, sshjump_name: str) -> Dict: +def fill_sshjump_template(ssh_key_secret: str, sshjump_image: str, + sshjump_name: str) -> Dict: template_path = os.path.join(sky.__root_dir__, 'templates', 'kubernetes-sshjump.yml.j2') if not os.path.exists(template_path): From 950de005af9cd29c9f34a9c8432027dae2ca2ec0 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 25 Aug 2023 08:50:20 +0530 Subject: [PATCH 157/183] merge fixes --- sky/utils/kubernetes_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 1e1341c7da2..6302d43904a 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -2,12 +2,13 @@ import os import math import re -from typing import Any, List, Optional, Set, Tuple, Union +from typing import Any, Dict, List, Optional, Set, Tuple, Union from urllib.parse import urlparse import jinja2 import yaml +import sky from sky import exceptions from sky import sky_logging from sky.adaptors import kubernetes From 292a3508187e02cd8be68cfd3ecb56ebe91b2295 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 25 Aug 2023 09:18:16 +0530 Subject: [PATCH 158/183] portfix --- sky/backends/cloud_vm_ray_backend.py | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index b51c99550db..7695c6117ee 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2300,22 +2300,11 @@ def _update_cluster_region(self): self.launched_resources = self.launched_resources.copy(region=region) def update_ssh_ports(self, max_attempts: int = 1) -> None: - # TODO(romilb): Replace this with a call to the cloud class to get ports - # Use port 22 for everything except Kubernetes - if not isinstance(self.launched_resources.cloud, clouds.Kubernetes): - head_ssh_port = 22 - else: - svc_name = f'{self.cluster_name}-ray-head-ssh' - retry_cnt = 0 - while True: - try: - head_ssh_port = clouds.Kubernetes.get_port(svc_name) - break - except Exception: # pylint: disable=broad-except - retry_cnt += 1 - if retry_cnt >= max_attempts: - raise - # TODO(romilb): Multinode doesn't work with Kubernetes yet. + """Fetches and sets the SSH ports for the cluster nodes. + + Use this method to use any cloud-specific port fetching logic. + """ + head_ssh_port = 22 self.stable_ssh_ports = ([head_ssh_port] + [22] * (self.num_node_ips - 1)) From c17f8546f54f21aee5afa7bd636980286dbab786 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 25 Aug 2023 09:21:58 +0530 Subject: [PATCH 159/183] merge fixes --- sky/backends/cloud_vm_ray_backend.py | 1 + sky/clouds/kubernetes.py | 4 +--- sky/utils/kubernetes_utils.py | 4 ++-- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 7695c6117ee..ebc853f107e 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2304,6 +2304,7 @@ def update_ssh_ports(self, max_attempts: int = 1) -> None: Use this method to use any cloud-specific port fetching logic. """ + del max_attempts # Unused. head_ssh_port = 22 self.stable_ssh_ports = ([head_ssh_port] + [22] * (self.num_node_ips - 1)) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 238e9642c50..5c8f12c1798 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -1,9 +1,8 @@ """Kubernetes.""" import json -import math import os import typing -from typing import Dict, Iterator, List, Optional, Tuple, Union +from typing import Dict, Iterator, List, Optional, Tuple from sky import clouds from sky import exceptions @@ -11,7 +10,6 @@ from sky import status_lib from sky.adaptors import kubernetes from sky.utils import common_utils -from sky.utils import env_options from sky.utils import kubernetes_utils from sky.utils import ux_utils diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 6302d43904a..e3fbe3a00b2 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -1,6 +1,6 @@ """Kubernetes utilities for SkyPilot.""" -import os import math +import os import re from typing import Any, Dict, List, Optional, Set, Tuple, Union from urllib.parse import urlparse @@ -12,9 +12,9 @@ from sky import exceptions from sky import sky_logging from sky.adaptors import kubernetes +from sky.utils import common_utils from sky.utils import env_options from sky.utils import ux_utils -from sky.utils import common_utils DEFAULT_NAMESPACE = 'default' From 330c3b4f2fa5b23189ecebd6313c5c5f84072d26 Mon Sep 17 00:00:00 2001 From: Avi Weit Date: Sun, 27 Aug 2023 14:01:16 +0300 Subject: [PATCH 160/183] [k8s_cloud_beta1] Sky down for a cluster deployed in Kubernetes to possibly remove sshjump pod. (#2425) * Sky down for a kubernetes cluster to possibly terminate sshjump pod. - If the related sshjump pod is being reported as its main container not have been started, then remove its pod and service. This is to minimize the chances for remaining with dangling sshjump pod. * Remove sshjump service in case of an failure to analyze sshjump. - remove _request_timeout as it might not be needed due to terminationGracePeriodSeconds being set in sshjump template. * Move sshjump analysis to kubernetes_utils. * Apply changes per ./format.sh. * Minor comment rephrase. * Use sshjump_name from ray pod label. - rather than from clouds.Kubernetes --- .../providers/kubernetes/node_provider.py | 5 ++ sky/utils/kubernetes_utils.py | 56 +++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 49b1d1243b3..7ec675978eb 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -243,6 +243,11 @@ def create_node(self, node_config, tags, count): def terminate_node(self, node_id): logger.info(config.log_prefix + 'calling delete_namespaced_pod') + try: + kubernetes_utils.analyze_sshjump_pod(self.namespace, node_id) + except Exception as e: + logger.warning(config.log_prefix + + f'Error occurred when analyzing SSH Jump pod: {e}') try: kubernetes.core_api().delete_namespaced_pod( node_id, diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index e3fbe3a00b2..c60487f2433 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -726,6 +726,62 @@ def setup_sshjump_pod(sshjump_name: str, sshjump_image: str, logger.info(f'Created SSH Jump Host {sshjump_name}.') +def analyze_sshjump_pod(namespace: str, node_id: str): + """Analyzes SSH jump pod to check its readiness. + + Prevents the existence of a dangling SSH jump pod. This could happen + in case the pod main container did not start properly (or failed) and SSH + jump pod LCM will not function properly to take care of removing the pod + and service when needed. + + Args: + namespace: Namespace to remove the SSH jump pod and service from + node_id: Name of ray head pod + """ + + def find(l, predicate): + """Utility function to find element in given list + """ + results = [x for x in l if predicate(x)] + return results[0] if len(results) > 0 else None + + try: + pod = kubernetes.core_api().read_namespaced_pod(node_id, namespace) + except kubernetes.api_exception() as e: + if e.status == 404: + logger.warning(f'Tried to retrieve pod {node_id},' + ' but the pod was not found (404).') + raise + else: + sshjump_name = pod.metadata.labels.get('skypilot-sshjump') + try: + sshjump_pod = kubernetes.core_api().read_namespaced_pod( + sshjump_name, namespace) + cont_ready_cond = find(sshjump_pod.status.conditions, + lambda c: c.type == 'ContainersReady') + if cont_ready_cond and \ + cont_ready_cond.status == 'False': + # The main container is not ready. To be on the safe-side + # and prevent a dangling sshjump pod - lets remove it and + # the service. Otherwise main container is ready and its lcm + # takes care of the cleaning + kubernetes.core_api().delete_namespaced_pod(sshjump_name, namespace) + kubernetes.core_api().delete_namespaced_service( + sshjump_name, namespace) + + # only warn and proceed as usual + except kubernetes.api_exception() as e: + logger.warning(f'Tried to analyze sshjump pod {sshjump_name},' + f' but got error {e}\n') + # we encountered an issue while analyzing sshjump pod. To be on + # the safe side, lets remove its service so the port is freed + try: + kubernetes.core_api().delete_namespaced_service( + sshjump_name, namespace) + except kubernetes.api_exception(): + pass + + def fill_sshjump_template(ssh_key_secret: str, sshjump_image: str, sshjump_name: str) -> Dict: template_path = os.path.join(sky.__root_dir__, 'templates', From f2ea761b891a3276c7835b7860632aa5448321b6 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 28 Aug 2023 08:47:23 +0530 Subject: [PATCH 161/183] cleanup --- .../providers/kubernetes/node_provider.py | 2 +- sky/utils/kubernetes_utils.py | 33 ++++++++++--------- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 7ec675978eb..502689cd657 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -244,7 +244,7 @@ def create_node(self, node_config, tags, count): def terminate_node(self, node_id): logger.info(config.log_prefix + 'calling delete_namespaced_pod') try: - kubernetes_utils.analyze_sshjump_pod(self.namespace, node_id) + kubernetes_utils.clean_zombie_sshjump_pod(self.namespace, node_id) except Exception as e: logger.warning(config.log_prefix + f'Error occurred when analyzing SSH Jump pod: {e}') diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index c60487f2433..5fdcfd5831a 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -726,30 +726,30 @@ def setup_sshjump_pod(sshjump_name: str, sshjump_image: str, logger.info(f'Created SSH Jump Host {sshjump_name}.') -def analyze_sshjump_pod(namespace: str, node_id: str): - """Analyzes SSH jump pod to check its readiness. +def clean_zombie_sshjump_pod(namespace: str, node_id: str): + """Analyzes SSH jump pod and removes if it is in a bad state Prevents the existence of a dangling SSH jump pod. This could happen in case the pod main container did not start properly (or failed) and SSH - jump pod LCM will not function properly to take care of removing the pod - and service when needed. + jump pod lifecycle management (LCM) will not function properly to take care + of removing the pod and service when needed. Args: namespace: Namespace to remove the SSH jump pod and service from - node_id: Name of ray head pod + node_id: Name of head pod """ def find(l, predicate): - """Utility function to find element in given list - """ + """Utility function to find element in given list""" results = [x for x in l if predicate(x)] return results[0] if len(results) > 0 else None + # Get the SSH jump pod name from the head pod try: pod = kubernetes.core_api().read_namespaced_pod(node_id, namespace) except kubernetes.api_exception() as e: if e.status == 404: - logger.warning(f'Tried to retrieve pod {node_id},' + logger.warning(f'Failed to get pod {node_id},' ' but the pod was not found (404).') raise else: @@ -761,19 +761,20 @@ def find(l, predicate): lambda c: c.type == 'ContainersReady') if cont_ready_cond and \ cont_ready_cond.status == 'False': - # The main container is not ready. To be on the safe-side - # and prevent a dangling sshjump pod - lets remove it and - # the service. Otherwise main container is ready and its lcm - # takes care of the cleaning + # The main container is not ready. To be on the safe side + # and prevent a dangling sshjump pod, lets remove it and + # the service. Otherwise main container is ready and its lifecycle + # management script takes care of the cleaning. kubernetes.core_api().delete_namespaced_pod(sshjump_name, namespace) kubernetes.core_api().delete_namespaced_service( sshjump_name, namespace) - # only warn and proceed as usual except kubernetes.api_exception() as e: - logger.warning(f'Tried to analyze sshjump pod {sshjump_name},' - f' but got error {e}\n') - # we encountered an issue while analyzing sshjump pod. To be on + logger.warning(f'Tried to check sshjump pod {sshjump_name},' + f' but got error {e}\n. Consider running `kubectl ' + f'delete pod {sshjump_name} -n {namespace}` to manually ' + 'remove the pod if it has crashed.') + # We encountered an issue while checking sshjump pod. To be on # the safe side, lets remove its service so the port is freed try: kubernetes.core_api().delete_namespaced_service( From 9d34ff72ba30f10ddcced8e4e4ff640024c1af3b Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Mon, 28 Aug 2023 13:02:50 +0530 Subject: [PATCH 162/183] Add networking benchmarks --- .../k8s_network_benchmarks.md | 55 ++++++++++++++ .../networking_benchmarks/rsync_bench.sh | 76 +++++++++++++++++++ .../networking_benchmarks/skylaunch_bench.sh | 65 ++++++++++++++++ 3 files changed, 196 insertions(+) create mode 100644 tests/kubernetes/networking_benchmarks/k8s_network_benchmarks.md create mode 100644 tests/kubernetes/networking_benchmarks/rsync_bench.sh create mode 100644 tests/kubernetes/networking_benchmarks/skylaunch_bench.sh diff --git a/tests/kubernetes/networking_benchmarks/k8s_network_benchmarks.md b/tests/kubernetes/networking_benchmarks/k8s_network_benchmarks.md new file mode 100644 index 00000000000..050444ac94d --- /dev/null +++ b/tests/kubernetes/networking_benchmarks/k8s_network_benchmarks.md @@ -0,0 +1,55 @@ +# Kubernetes Networking Benchmarking + +A SkyPilot pod in Kubernetes can be accessed via three methods: +1. `direct`: NodePort service directly exposing the pod's SSH port +2. `sshjump`: NodePort service exposing a SSH jump pod that connects to the SkyPilot pod +3. `port-forward`: Uses `kubectl port-forward` to connect to ClusterIP service pointing to a SSH jump pod that connects to the SkyPilot pod + +`direct` requires opening a large range of ports on the cluster's firewall. +`sshjump` requires opening only one port on the cluster's firewall, but requires an additional SSH connection to the jump pod. +`port-forward` does not require opening any ports on the cluster's firewall, but routes all traffic over the kubernetes control plane. + +This document benchmarks the three approaches on a Kind cluster and a GKE cluster. + +We run two kinds of benchmarks: +1. `sky launch` benchmarks: how long does it take to launch a SkyPilot pod +2. Rsync benchmarks: how long does it take to copy a directory containing 1000 1MB files to the SkyPilot pod + +In summary, we find that `direct` is only marginally faster (~10%) than `sshjump` and `port-forward` for both `sky launch` and rsync benchmarks. + +Given these results, this document recommends using `port-forward` for all SkyPilot deployments because of its significant ease of use and security benefits. + +## Benchmark environment +These benchmarks were run on a 2023 M2 Max Macbook Pro with 32GB of RAM. Each benchmark was run on a GKE cluster and a local kind cluster (`sky local up`). Kubernetes v1.27 was used. This is on a 100mbit home connection. + +Note that GKE benchmarks, particularly rsync, are sensitive the network connection between the benchmarking machine and the GKE cluster. + +# `sky launch` benchmarks + +Runs 5 sky launch times and reports the average of the last four runs. + +Usage: +``` +./skylaunch_bench.sh +# e.g., `./skylaunch_bench.sh gkedirect` will create a file called skylaunch_results_gkedirect.txt +``` + +| | Direct | SSHJump | port-forward | +|-------------|---------|---------|--------------| +| **GKE** | 64.51s | 62.51s | 69.75s | +| **Kind** | 26.65s | 28.37s | 28.75s | + +## Rsync benchmarks + +Creates a directory with 1000 1MB files and copies it to the SkyPilot pod. Runs 5 rsync times and reports the average of the last four runs. + +Usage: +``` +./rsync_bench.sh +# e.g., `./rsync_bench.sh gkedirect` will create a file called rsync_results_gkedirect.txt +``` + +| | Direct | SSHJump | port-forward | +|-------------|---------|---------|--------------| +| **GKE** | 337.49s | 347.49s | 361.49s | +| **Kind** | 31.49s | 31.71s | 33.21s | diff --git a/tests/kubernetes/networking_benchmarks/rsync_bench.sh b/tests/kubernetes/networking_benchmarks/rsync_bench.sh new file mode 100644 index 00000000000..de0b4bf0845 --- /dev/null +++ b/tests/kubernetes/networking_benchmarks/rsync_bench.sh @@ -0,0 +1,76 @@ +#!/bin/bash + +average=0 +# Check if the command line argument (suffix) is provided +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +suffix="$1" + +# Declare a file to store the output of the time command with the given suffix +output_file="rsync_results_${suffix}.txt" + +runexpt=1 + +# Check if the output file exists and ask if it should be overwritten +if [ -f "$output_file" ]; then + read -p "The output file $output_file already exists. Do you want to overwrite it? (y/n) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Analyzing existing results..." + runexpt=0 + fi +fi + +if [ "$runexpt" -eq 1 ]; then + # Delete existing results + rm -f "$output_file" + + # Setup 1000 1MB (total 1GB) files to copy + mkdir -p $HOME/tmp/uploadtest + for i in {1..1000}; do dd if=/dev/urandom of=$HOME/tmp/uploadtest/$i bs=1M count=1; done + + # Create the cluster + sky launch -y -c test + + for i in {1..5}; do + ( + # Use the `time` command in a subshell to capture its output + time rsync -avz ~/tmp/uploadtest/ test:~/sky_workdir + ) 2>> "$output_file" + ssh test 'rm -rf ~/sky_workdir/' + done + + # Delete cluster after done + sky down -y test +fi + +# Process the results from the 2nd to 5th run +count=0 +while read -r line; do + # Check for the real time output from the time command + if [[ $line == real* ]]; then + if [ "$count" -eq 0 ]; then + # Skip first result + count=$((count+1)) + continue + fi + count=$((count+1)) + # Extract the minutes and seconds and convert to seconds + minutes=$(echo $line | cut -d'm' -f1 | sed 's/real //') + seconds=$(echo $line | cut -d'm' -f2 | cut -d's' -f1) + total_seconds=$(echo "$minutes*60 + $seconds" | bc) + # Accumulate the total time + average=$(echo "$average + $total_seconds" | bc) + fi +done < <(cat "$output_file") # start reading from the 2nd run + +# Subtract one from the count to account for the skipped first result +count=$((count-1)) +# Compute the average time +average=$(echo "scale=2; $average/$count" | bc) + +# Print the average time +echo "Average total time (from 2nd to 5th run): $average seconds" diff --git a/tests/kubernetes/networking_benchmarks/skylaunch_bench.sh b/tests/kubernetes/networking_benchmarks/skylaunch_bench.sh new file mode 100644 index 00000000000..ee67bc7a233 --- /dev/null +++ b/tests/kubernetes/networking_benchmarks/skylaunch_bench.sh @@ -0,0 +1,65 @@ +#!/bin/bash + +average=0 +# Check if the command line argument (suffix) is provided +if [ "$#" -ne 1 ]; then + echo "Usage: $0 " + exit 1 +fi + +suffix="$1" + +# Declare a file to store the output of the time command with the given suffix +output_file="skylaunch_results_${suffix}.txt" + +runexpt=1 + +# Check if the output file exists and ask if it should be overwritten +if [ -f "$output_file" ]; then + read -p "The output file $output_file already exists. Do you want to overwrite it? (y/n) " -n 1 -r + echo + if [[ ! $REPLY =~ ^[Yy]$ ]]; then + echo "Analyzing existing results..." + runexpt=0 + fi +fi + +if [ "$runexpt" -eq 1 ]; then + # Delete existing results + rm -f "$output_file" + for i in {1..5}; do + ( + # Use the `time` command in a subshell to capture its output + time sky launch -y -c test + ) 2>> "$output_file" + sky down -y test + done +fi + +# Process the results from the 2nd to 5th run +count=0 +while read -r line; do + # Check for the real time output from the time command + if [[ $line == real* ]]; then + if [ "$count" -eq 0 ]; then + # Skip first result + count=$((count+1)) + continue + fi + count=$((count+1)) + # Extract the minutes and seconds and convert to seconds + minutes=$(echo $line | cut -d'm' -f1 | sed 's/real //') + seconds=$(echo $line | cut -d'm' -f2 | cut -d's' -f1) + total_seconds=$(echo "$minutes*60 + $seconds" | bc) + # Accumulate the total time + average=$(echo "$average + $total_seconds" | bc) + fi +done < <(cat "$output_file") # start reading from the 2nd run + +# Subtract one from the count to account for the skipped first result +count=$((count-1)) +# Compute the average time +average=$(echo "scale=2; $average/$count" | bc) + +# Print the average time +echo "Average total time (from 2nd to 5th run): $average seconds" From 5b5aacdc5834878df2d387c71cdaa92be894f400 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 29 Aug 2023 10:02:16 +0530 Subject: [PATCH 163/183] comment --- sky/templates/kubernetes-ray.yml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 7f9026fbeee..3142f5d5715 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -125,7 +125,7 @@ available_node_types: parent: skypilot component: {{cluster_name_on_cloud}}-ray-head skypilot-cluster: {{cluster_name_on_cloud}} - # This label is being used by the life cycle management of the ssh jump pod + # Identifies the SSH jump pod used by this pod. Used in life cycle management of the ssh jump pod. skypilot-sshjump: {{sshjump}} spec: # Change this if you altered the autoscaler_service_account above From aae46767e7093565cb92f6c44ddba6d5b12219a5 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 29 Aug 2023 10:07:43 +0530 Subject: [PATCH 164/183] comment --- sky/utils/kubernetes/sshjump_lcm.py | 42 +++++++++++++++-------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/sky/utils/kubernetes/sshjump_lcm.py b/sky/utils/kubernetes/sshjump_lcm.py index 055d6717595..d6fbec352ce 100644 --- a/sky/utils/kubernetes/sshjump_lcm.py +++ b/sky/utils/kubernetes/sshjump_lcm.py @@ -36,19 +36,19 @@ def poll(): - sys.stdout.write('enter poll()\n') + sys.stdout.write('Starting polling.\n') alert_delta = datetime.timedelta(seconds=alert_threshold) # Set delay for each retry retry_interval_delta = datetime.timedelta(seconds=retry_interval) - # Accumulated time of where no Ray pod exist. Used to compare against - # alert_threshold - noray_delta = datetime.timedelta() + # Accumulated time of where no SkyPilot cluster exists. Used to compare + # against alert_threshold + nocluster_delta = datetime.timedelta() while True: - sys.stdout.write(f'Sleep {retry_interval} seconds..\n') + sys.stdout.write(f'Sleeping {retry_interval} seconds..\n') time.sleep(retry_interval) # List the pods in the current namespace @@ -61,35 +61,36 @@ def poll(): if len(ret.items) == 0: sys.stdout.write( - f'DID NOT FIND pods with label "{label_selector}" in ' - f'namespace: "{current_namespace}"\n') - noray_delta = noray_delta + retry_interval_delta + f'Did not pods with label "{label_selector}" in ' + f'namespace {current_namespace}\n') + nocluster_delta = nocluster_delta + retry_interval_delta sys.stdout.write( - f'noray_delta after time increment: {noray_delta}, alert ' + f'Time since no pods found: {nocluster_delta}, alert ' f'threshold: {alert_delta}\n') else: sys.stdout.write( - f'FOUND pods with label "{label_selector}" in namespace: ' - f'"{current_namespace}"\n') + f'Found pods with label "{label_selector}" in namespace ' + f'{current_namespace}\n') # reset .. - noray_delta = datetime.timedelta() - sys.stdout.write(f'noray_delta is reset: {noray_delta}\n') + nocluster_delta = datetime.timedelta() + sys.stdout.write(f'noray_delta is reset: {nocluster_delta}\n') - if noray_delta >= alert_delta: + if nocluster_delta >= alert_delta: sys.stdout.write( - f'noray_delta: {noray_delta} crossed alert threshold: ' - f'{alert_delta}. Time to terminate myself\n') + f'nocluster_delta: {nocluster_delta} crossed alert threshold: ' + f'{alert_delta}. Time to terminate myself and my service.\n') try: # sshjump resources created under same name v1.delete_namespaced_service(current_name, current_namespace) v1.delete_namespaced_pod(current_name, current_namespace) except Exception as e: - sys.stdout.write(f'[ERROR] exit poll() with error: {e}\n') + sys.stdout.write('[ERROR] Deletion failed. Exiting ' + f'poll() with error: {e}\n') raise break - sys.stdout.write('exit poll()\n') + sys.stdout.write('Done polling.\n') def main(): @@ -101,8 +102,9 @@ def main(): sys.stdout.write(f'label_selector: {label_selector}\n') if not current_name or not current_namespace: - raise Exception('One or more environment variables is missing ' - 'with an actual value.') + # Raise Exception with message to terminate pod + raise Exception('Missing environment variables MY_POD_NAME or ' + 'MY_POD_NAMESPACE') poll() From 2eedca697643b06e1cfc6c2a054c68edf2df2094 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 29 Aug 2023 10:08:43 +0530 Subject: [PATCH 165/183] lint --- sky/utils/kubernetes/sshjump_lcm.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/sky/utils/kubernetes/sshjump_lcm.py b/sky/utils/kubernetes/sshjump_lcm.py index d6fbec352ce..e76672bf2f8 100644 --- a/sky/utils/kubernetes/sshjump_lcm.py +++ b/sky/utils/kubernetes/sshjump_lcm.py @@ -60,9 +60,8 @@ def poll(): raise if len(ret.items) == 0: - sys.stdout.write( - f'Did not pods with label "{label_selector}" in ' - f'namespace {current_namespace}\n') + sys.stdout.write(f'Did not pods with label "{label_selector}" in ' + f'namespace {current_namespace}\n') nocluster_delta = nocluster_delta + retry_interval_delta sys.stdout.write( f'Time since no pods found: {nocluster_delta}, alert ' From b07748d8ae9ec9ca7e75452add144e11477b7990 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 29 Aug 2023 20:18:05 +0530 Subject: [PATCH 166/183] autodown fixes --- sky/clouds/kubernetes.py | 4 ++- sky/skylet/providers/kubernetes/config.py | 18 ++++++++----- .../providers/kubernetes/node_provider.py | 25 +++++++++++-------- sky/templates/kubernetes-ray.yml.j2 | 4 ++- sky/templates/kubernetes-sshjump.yml.j2 | 8 +++--- sky/utils/kubernetes_utils.py | 4 +-- 6 files changed, 38 insertions(+), 25 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 5c8f12c1798..9855b5410ac 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -224,9 +224,11 @@ def make_deploy_resources_variables( 'k8s_ssh_key_secret_name': self.SKY_SSH_KEY_SECRET_NAME, 'k8s_acc_label_key': k8s_acc_label_key, 'k8s_acc_label_value': k8s_acc_label_value, + 'k8s_sshjump_name': self.SKY_SSH_JUMP_NAME, + # TODO(romilb): Create a lightweight image for SSH jump host + 'k8s_sshjump_image': self.IMAGE_CPU, # TODO(romilb): Allow user to specify custom images 'image_id': image, - 'sshjump': self.SKY_SSH_JUMP_NAME } return deploy_vars diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index 505d0269a2c..d77bf88220b 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -266,10 +266,16 @@ def _configure_ssh_jump(namespace, config): Also updates config['auth']['ssh_proxy_command'] to use the newly created jump pod. """ - # TODO(romilb): These variables should be moved and fetched from config - sshjump_name = clouds.Kubernetes.SKY_SSH_JUMP_NAME - sshjump_image = clouds.Kubernetes.IMAGE_CPU - key_label = clouds.Kubernetes.SKY_SSH_KEY_SECRET_NAME + pod_cfg = config['available_node_types']['ray_head_default']['node_config'] + + sshjump_name = pod_cfg['metadata']['labels']['skypilot-sshjump'] + sshjump_image = config['provider']['sshjump_image'] + + volumes = pod_cfg['spec']['volumes'] + # find 'secret-volume' and get the secret name + secret_volume = next( + filter(lambda x: x['name'] == 'secret-volume', volumes)) + ssh_key_secret_name = secret_volume['secret']['secretName'] # TODO(romilb): We currently split SSH jump pod and svc creation. Service # is first created in authentication.py::setup_kubernetes_authentication @@ -283,8 +289,8 @@ def _configure_ssh_jump(namespace, config): # and available before we create the SSH jump pod. If for any reason the # service is missing, we should raise an error. - kubernetes_utils.setup_sshjump_pod(sshjump_name, sshjump_image, key_label, - namespace) + kubernetes_utils.setup_sshjump_pod(sshjump_name, sshjump_image, + ssh_key_secret_name, namespace) return config diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 502689cd657..82fab8c9c02 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -248,6 +248,20 @@ def terminate_node(self, node_id): except Exception as e: logger.warning(config.log_prefix + f'Error occurred when analyzing SSH Jump pod: {e}') + try: + kubernetes.core_api().delete_namespaced_service( + node_id, + self.namespace, + _request_timeout=config.DELETION_TIMEOUT) + kubernetes.core_api().delete_namespaced_service( + f'{node_id}-ssh', + self.namespace, + _request_timeout=config.DELETION_TIMEOUT) + except kubernetes.api_exception(): + pass + # Note - delete pod after all other resources are deleted. + # This is to ensure there are no leftover resources if this down is run + # from within the pod, e.g., for autodown. try: kubernetes.core_api().delete_namespaced_pod( node_id, @@ -260,17 +274,6 @@ def terminate_node(self, node_id): ' but the pod was not found (404).') else: raise - try: - kubernetes.core_api().delete_namespaced_service( - node_id, - self.namespace, - _request_timeout=config.DELETION_TIMEOUT) - kubernetes.core_api().delete_namespaced_service( - f'{node_id}-ssh', - self.namespace, - _request_timeout=config.DELETION_TIMEOUT) - except kubernetes.api_exception(): - pass def terminate_nodes(self, node_ids): # TODO(romilb): terminate_nodes should be include optimizations for diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 3142f5d5715..7e46915b434 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -21,6 +21,8 @@ provider: timeout: {{timeout}} + sshjump_image: {{k8s_sshjump_image}} + # ServiceAccount created by the autoscaler for the head node pod that it # runs in. If this field isn't provided, the head pod config below must # contain a user-created service account with the proper permissions. @@ -126,7 +128,7 @@ available_node_types: component: {{cluster_name_on_cloud}}-ray-head skypilot-cluster: {{cluster_name_on_cloud}} # Identifies the SSH jump pod used by this pod. Used in life cycle management of the ssh jump pod. - skypilot-sshjump: {{sshjump}} + skypilot-sshjump: {{k8s_sshjump_name}} spec: # Change this if you altered the autoscaler_service_account above # or want to provide your own. diff --git a/sky/templates/kubernetes-sshjump.yml.j2 b/sky/templates/kubernetes-sshjump.yml.j2 index bc3d35f9595..d2844034263 100644 --- a/sky/templates/kubernetes-sshjump.yml.j2 +++ b/sky/templates/kubernetes-sshjump.yml.j2 @@ -5,7 +5,7 @@ pod_spec: name: {{ name }} labels: component: {{ name }} - app: skypilot + parent: skypilot spec: serviceAccountName: sky-sshjump-sa volumes: @@ -48,7 +48,7 @@ service_spec: kind: Service metadata: name: {{ name }} - app: skypilot + parent: skypilot spec: type: NodePort selector: @@ -65,7 +65,7 @@ service_account: kind: ServiceAccount metadata: name: sky-sshjump-sa - app: skypilot + parent: skypilot role: kind: Role apiVersion: rbac.authorization.k8s.io/v1 @@ -80,7 +80,7 @@ role_binding: kind: RoleBinding metadata: name: sky-sshjump-rb - app: skypilot + parent: skypilot subjects: - kind: ServiceAccount name: sky-sshjump-sa diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 5fdcfd5831a..78771d4cc5f 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -681,12 +681,12 @@ def setup_sshjump_pod(sshjump_name: str, sshjump_image: str, except kubernetes.api_exception() as e: if e.status == 409: logger.info( - 'SSH Jump ServiceAcount already exists in the cluster, using ' + 'SSH Jump ServiceAccount already exists in the cluster, using ' 'it.') else: raise else: - logger.info('Created SSH Jump ServiceAcount.') + logger.info('Created SSH Jump ServiceAccount.') # Role try: kubernetes.auth_api().create_namespaced_role(namespace, content['role']) From e379291d0ac73405034c77a98b76e5a9f28ebf66 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 29 Aug 2023 20:18:05 +0530 Subject: [PATCH 167/183] lint --- sky/skylet/providers/kubernetes/config.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index d77bf88220b..715e71b7e4f 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -273,8 +273,8 @@ def _configure_ssh_jump(namespace, config): volumes = pod_cfg['spec']['volumes'] # find 'secret-volume' and get the secret name - secret_volume = next( - filter(lambda x: x['name'] == 'secret-volume', volumes)) + secret_volume = next(filter(lambda x: x['name'] == 'secret-volume', + volumes)) ssh_key_secret_name = secret_volume['secret']['secretName'] # TODO(romilb): We currently split SSH jump pod and svc creation. Service From 482a69df5adcef44e26b6709613579702aaaa071 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Wed, 30 Aug 2023 22:26:15 +0530 Subject: [PATCH 168/183] fix label --- sky/templates/kubernetes-sshjump.yml.j2 | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sky/templates/kubernetes-sshjump.yml.j2 b/sky/templates/kubernetes-sshjump.yml.j2 index d2844034263..490891548b9 100644 --- a/sky/templates/kubernetes-sshjump.yml.j2 +++ b/sky/templates/kubernetes-sshjump.yml.j2 @@ -48,7 +48,8 @@ service_spec: kind: Service metadata: name: {{ name }} - parent: skypilot + labels: + parent: skypilot spec: type: NodePort selector: From fb09398bbc118a9bb6b07bb73ae9f513e60fa4e0 Mon Sep 17 00:00:00 2001 From: Doyoung Kim <34902420+landscapepainter@users.noreply.github.com> Date: Thu, 31 Aug 2023 09:21:38 -0700 Subject: [PATCH 169/183] [k8s_cloud_beta1] Adding support for ssh using kubectl port-forward to access k8s instance (#2412) * Add sshjump support. * Update lcm script. - add comments - rename variables - typo * Set imagePullPolicy to IfNotPresent. * add support for port-forward * remove unused * comments * Disable ControlMaster for ssh_options_list * nit * update to disable rest of the ControlMaster * command runner rsync update * relocating run_on_k8s * relocate run_on_k8s * Make Kubernetes specific env variables available when joining a cluster via SSH * merge k8s_cloud_beta1 * format * remove redundant utils.py * format and comments * update with proxy_to_k8s * Update sky/authentication.py Co-authored-by: Romil Bhardwaj * resolving comments on structures * Update sky/utils/command_runner.py Co-authored-by: Romil Bhardwaj * document on nodeport/port-forward proxycommand * error handling when socat is not installed * removing KUBECONFIG from port-forward shell script * nit * nit * Add suport for nodeport * Update sky/utils/kubernetes_utils.py Co-authored-by: Romil Bhardwaj * update * switch svc when conflicting jump pod svc exist * format * Update sky/utils/kubernetes_utils.py Co-authored-by: Romil Bhardwaj * refactoring check for socat * resolve comments * add ServiceType enum and port-forward proxy script * update k8s env var access * add check for container status remove unused func * nit * update get_external_ip for portforward mode * conditionally use sudo and quote values of env var --------- Co-authored-by: Avi Weit Co-authored-by: hemildesai Co-authored-by: Romil Bhardwaj --- sky/authentication.py | 36 +++- sky/backends/backend_utils.py | 6 +- sky/backends/cloud_vm_ray_backend.py | 26 --- sky/clouds/kubernetes.py | 45 +--- .../providers/kubernetes/node_provider.py | 40 +++- ...ubernetes-port-forward-proxy-command.sh.j2 | 43 ++++ sky/templates/kubernetes-sshjump.yml.j2 | 3 +- sky/utils/command_runner.py | 42 ++-- sky/utils/command_runner.pyi | 30 ++- sky/utils/kubernetes_utils.py | 192 +++++++++++++++--- tests/test_config.py | 14 +- 11 files changed, 353 insertions(+), 124 deletions(-) create mode 100644 sky/templates/kubernetes-port-forward-proxy-command.sh.j2 diff --git a/sky/authentication.py b/sky/authentication.py index aa9f336c27a..53fc27fd1c1 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -37,6 +37,7 @@ from sky import clouds from sky import sky_logging +from sky import skypilot_config from sky.adaptors import gcp from sky.adaptors import ibm from sky.skylet.providers.lambda_cloud import lambda_utils @@ -378,8 +379,13 @@ def setup_scp_authentication(config: Dict[str, Any]) -> Dict[str, Any]: def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: + # Default ssh session is established with kubectl port-forwarding with + # ClusterIP service + nodeport_mode = kubernetes_utils.KubernetesNetworkingMode.NODEPORT + port_forward_mode = kubernetes_utils.KubernetesNetworkingMode.PORT_FORWARD + ssh_setup_mode = skypilot_config.get_nested(('kubernetes', 'networking'), + port_forward_mode.value) get_or_generate_keys() - # Run kubectl command to add the public key to the cluster. public_key_path = os.path.expanduser(PUBLIC_SSH_KEY_PATH) key_label = clouds.Kubernetes.SKY_SSH_KEY_SECRET_NAME @@ -404,16 +410,36 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: logger.error(suffix) raise + ssh_jump_name = clouds.Kubernetes.SKY_SSH_JUMP_NAME + if ssh_setup_mode.lower() == nodeport_mode.value.lower(): + network_mode = nodeport_mode + service_type = kubernetes_utils.KubernetesServiceType.NODEPORT + + elif ssh_setup_mode.lower() == port_forward_mode.value.lower(): + kubernetes_utils.check_port_forward_mode_dependencies() + network_mode = port_forward_mode + # Using `kubectl port-forward` creates a direct tunnel to jump pod and + # does not require opening any ports on Kubernetes nodes. As a result, + # the service can be a simple ClusterIP service which we access with + # `kubectl port-forward`. + service_type = kubernetes_utils.KubernetesServiceType.CLUSTERIP + else: + raise ValueError(f'Unsupported kubernetes networking mode: ' + f'{ssh_setup_mode}. The mode has to be either ' + f'\'{port_forward_mode.value}\' or ' + f'\'{nodeport_mode.value}\'. ' + 'Please check: ~/.sky/config.yaml') # Setup service for SSH jump pod. We create the SSH jump service here # because we need to know the service IP address and port to set the # ssh_proxy_command in the autoscaler config. namespace = kubernetes_utils.get_current_kube_config_context_namespace() - sshjump_name = clouds.Kubernetes.SKY_SSH_JUMP_NAME - - kubernetes_utils.setup_sshjump_svc(sshjump_name, namespace) + kubernetes_utils.setup_sshjump_svc(ssh_jump_name, namespace, service_type) ssh_proxy_cmd = kubernetes_utils.get_ssh_proxy_command( - PRIVATE_SSH_KEY_PATH, sshjump_name, namespace) + PRIVATE_SSH_KEY_PATH, ssh_jump_name, network_mode, namespace, + clouds.Kubernetes.PORT_FORWARD_PROXY_CMD_PATH, + clouds.Kubernetes.PORT_FORWARD_PROXY_CMD_TEMPLATE) config['auth']['ssh_proxy_command'] = ssh_proxy_cmd + return config diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 1d793496cc4..f41c45e545f 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -1355,7 +1355,7 @@ def wait_until_ray_cluster_ready( def ssh_credential_from_yaml(cluster_yaml: str, docker_user: Optional[str] = None - ) -> Dict[str, str]: + ) -> Dict[str, Any]: """Returns ssh_user, ssh_private_key and ssh_control name.""" config = common_utils.read_yaml(cluster_yaml) auth_section = config['auth'] @@ -1371,6 +1371,10 @@ def ssh_credential_from_yaml(cluster_yaml: str, } if docker_user is not None: credentials['docker_user'] = docker_user + ssh_provider_module = config['provider']['module'] + # If we are running ssh command on kubernetes node. + if 'kubernetes' in ssh_provider_module: + credentials['disable_control_master'] = True return credentials diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 30377f26881..0a034261dbf 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2988,37 +2988,12 @@ def _sync_file_mounts( self._execute_file_mounts(handle, all_file_mounts) self._execute_storage_mounts(handle, storage_mounts) - def _update_envs_for_k8s(self, handle: CloudVmRayResourceHandle, - task: task_lib.Task) -> None: - """Update envs with env vars from Kubernetes if cloud is Kubernetes. - - Kubernetes automatically populates containers with critical environment - variables, such as those for discovering services running in the - cluster and CUDA/nvidia environment variables. We need to update task - environment variables with these env vars. This is needed for GPU - support and service discovery. - - See https://github.com/skypilot-org/skypilot/issues/2287 for - more details. - """ - if isinstance(handle.launched_resources.cloud, clouds.Kubernetes): - temp_envs = copy.deepcopy(task.envs) - cloud_env_vars = handle.launched_resources.cloud.query_env_vars( - handle.cluster_name_on_cloud) - task.update_envs(cloud_env_vars) - - # Re update the envs with the original envs to give priority to - # the original envs. - task.update_envs(temp_envs) - def _setup(self, handle: CloudVmRayResourceHandle, task: task_lib.Task, detach_setup: bool) -> None: start = time.time() style = colorama.Style fore = colorama.Fore - self._update_envs_for_k8s(handle, task) - if task.setup is None: return @@ -3327,7 +3302,6 @@ def _execute( # Check the task resources vs the cluster resources. Since `sky exec` # will not run the provision and _check_existing_cluster self.check_resources_fit_cluster(handle, task) - self._update_envs_for_k8s(handle, task) resources_str = backend_utils.get_task_resources_str(task) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 9855b5410ac..0963215452d 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -19,7 +19,7 @@ logger = sky_logging.init_logger(__name__) -_CREDENTIAL_PATH = '~/.kube/config' +CREDENTIAL_PATH = '~/.kube/config' @clouds.CLOUD_REGISTRY.register @@ -28,7 +28,9 @@ class Kubernetes(clouds.Cloud): SKY_SSH_KEY_SECRET_NAME = f'sky-ssh-{common_utils.get_user_hash()}' SKY_SSH_JUMP_NAME = f'sky-sshjump-{common_utils.get_user_hash()}' - + PORT_FORWARD_PROXY_CMD_TEMPLATE = \ + 'kubernetes-port-forward-proxy-command.sh.j2' + PORT_FORWARD_PROXY_CMD_PATH = '~/.sky/port-forward-proxy-cmd.sh' # Timeout for resource provisioning. This timeout determines how long to # wait for pod to be in pending status before giving up. # Larger timeout may be required for autoscaling clusters, since autoscaler @@ -296,7 +298,7 @@ def _make(instance_list): @classmethod def check_credentials(cls) -> Tuple[bool, Optional[str]]: - if os.path.exists(os.path.expanduser(_CREDENTIAL_PATH)): + if os.path.exists(os.path.expanduser(CREDENTIAL_PATH)): # Test using python API try: return kubernetes_utils.check_credentials() @@ -305,10 +307,10 @@ def check_credentials(cls) -> Tuple[bool, Optional[str]]: f'{common_utils.format_exception(e)}') else: return (False, 'Credentials not found - ' - f'check if {_CREDENTIAL_PATH} exists.') + f'check if {CREDENTIAL_PATH} exists.') def get_credential_file_mounts(self) -> Dict[str, str]: - return {_CREDENTIAL_PATH: _CREDENTIAL_PATH} + return {CREDENTIAL_PATH: CREDENTIAL_PATH} def instance_type_exists(self, instance_type: str) -> bool: return kubernetes_utils.KubernetesInstanceType.is_valid_instance_type( @@ -365,36 +367,3 @@ def query_status(cls, name: str, tag_filters: Dict[str, str], cluster_status.append(status_lib.ClusterStatus.INIT) # If pods are not found, we don't add them to the return list return cluster_status - - @classmethod - def query_env_vars(cls, name: str) -> Dict[str, str]: - namespace = kubernetes_utils.get_current_kube_config_context_namespace() - pod = kubernetes.core_api().list_namespaced_pod( - namespace, - label_selector=f'skypilot-cluster={name},ray-node-type=head' - ).items[0] - response = kubernetes.stream()( - kubernetes.core_api().connect_get_namespaced_pod_exec, - pod.metadata.name, - namespace, - command=['env'], - stderr=True, - stdin=False, - stdout=True, - tty=False, - _request_timeout=kubernetes.API_TIMEOUT) - # Split response by newline and filter lines containing '=' - raw_lines = response.split('\n') - filtered_lines = [line for line in raw_lines if '=' in line] - - # Split each line at the first '=' occurrence - lines = [line.split('=', 1) for line in filtered_lines] - - # Construct the dictionary using only valid environment variable names - env_vars = {} - for line in lines: - key = line[0] - if common_utils.is_valid_env_var(key): - env_vars[key] = line[1] - - return env_vars diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 82fab8c9c02..8e44379d19e 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -214,10 +214,12 @@ def create_node(self, node_config, tags, count): 'Cluster may be out of resources or ' 'may be too slow to autoscale.') all_ready = True - + pods_and_containers_running = False + pods = [] for node in new_nodes: pod = kubernetes.core_api().read_namespaced_pod( node.metadata.name, self.namespace) + pods.append(pod) if pod.status.phase == 'Pending': # Iterate over each pod to check their status if pod.status.container_statuses is not None: @@ -237,10 +239,44 @@ def create_node(self, node_config, tags, count): # If container_statuses is None, then the pod hasn't # been scheduled yet. all_ready = False - if all_ready: + + # check if all the pods and containers within the pods are running + if all([ pod.status.phase == "Running" for pod in pods]) \ + and all([container.state.running for pod in pods for container in pod.status.container_statuses]): + pods_and_containers_running = True + + if all_ready and pods_and_containers_running: break time.sleep(1) + # Kubernetes automatically populates containers with critical + # environment variables, such as those for discovering services running + # in the cluster and CUDA/nvidia environment variables. We need to + # update task environment variables with these env vars. This is needed + # for GPU support and service discovery. + # See https://github.com/skypilot-org/skypilot/issues/2287 for + # more details. + # Capturing env. var. from the pod's runtime and writes them to + # /etc/profile.d/ making them available for all users in future + # shell sessions. + set_k8s_env_var_cmd = [ + '/bin/sh', '-c', + ('printenv | awk -F "=" \'{print "export " $1 "=\\047" $2 "\\047"}\' > ~/k8s_env_var.sh;' + 'mv ~/k8s_env_var.sh /etc/profile.d/k8s_env_var.sh || ' + 'sudo mv ~/k8s_env_var.sh /etc/profile.d/k8s_env_var.sh') + ] + for new_node in new_nodes: + kubernetes.stream()( + kubernetes.core_api().connect_get_namespaced_pod_exec, + new_node.metadata.name, + self.namespace, + command=set_k8s_env_var_cmd, + stderr=True, + stdin=False, + stdout=True, + tty=False, + _request_timeout=kubernetes.API_TIMEOUT) + def terminate_node(self, node_id): logger.info(config.log_prefix + 'calling delete_namespaced_pod') try: diff --git a/sky/templates/kubernetes-port-forward-proxy-command.sh.j2 b/sky/templates/kubernetes-port-forward-proxy-command.sh.j2 new file mode 100644 index 00000000000..fa71df3a0ec --- /dev/null +++ b/sky/templates/kubernetes-port-forward-proxy-command.sh.j2 @@ -0,0 +1,43 @@ +#!/usr/bin/env bash +set -uo pipefail + +# Checks if socat is installed +if ! command -v socat > /dev/null; then + echo "Using 'port-forward' mode to run ssh session on Kubernetes instances requires 'socat' to be installed. Please install 'socat'" >&2 + exit +fi + +# Checks if lsof is installed +if ! command -v lsof > /dev/null; then + echo "Checking port availability for 'port-forward' mode requires 'lsof' to be installed. Please install 'lsof'" >&2 + exit 1 +fi + +# Function to check if port is in use +is_port_in_use() { + local port="$1" + lsof -i :${port} > /dev/null 2>&1 +} + +# Start from a fixed local port and increment if in use +local_port={{ local_port }} +while is_port_in_use "${local_port}"; do + local_port=$((local_port + 1)) +done + +# Establishes connection between local port and the ssh jump pod +kubectl port-forward svc/{{ ssh_jump_name }} "${local_port}":22 & + +# Terminate the port-forward process when this script exits. +K8S_PORT_FWD_PID=$! +trap "kill $K8S_PORT_FWD_PID" EXIT + +# checks if a connection to local_port of 127.0.0.1:[local_port] is established +while ! nc -z 127.0.0.1 "${local_port}"; do + sleep 0.1 +done + +# Establishes two directional byte streams to handle stdin/stdout between +# terminal and the jump pod. +# socat process terminates when port-forward terminates. +socat - tcp:127.0.0.1:"${local_port}" \ No newline at end of file diff --git a/sky/templates/kubernetes-sshjump.yml.j2 b/sky/templates/kubernetes-sshjump.yml.j2 index 490891548b9..7c7c3fbc877 100644 --- a/sky/templates/kubernetes-sshjump.yml.j2 +++ b/sky/templates/kubernetes-sshjump.yml.j2 @@ -51,14 +51,13 @@ service_spec: labels: parent: skypilot spec: - type: NodePort + type: {{ service_type }} selector: component: {{ name }} ports: - protocol: TCP port: 22 targetPort: 22 - # The following ServiceAccount/Role/RoleBinding sets up an RBAC for life cycle # management of the jump pod/service service_account: diff --git a/sky/utils/command_runner.py b/sky/utils/command_runner.py index 08fde49354d..7969694632c 100644 --- a/sky/utils/command_runner.py +++ b/sky/utils/command_runner.py @@ -42,13 +42,16 @@ def _ssh_control_path(ssh_control_filename: Optional[str]) -> Optional[str]: return path -def ssh_options_list(ssh_private_key: Optional[str], - ssh_control_name: Optional[str], - *, - ssh_proxy_command: Optional[str] = None, - docker_ssh_proxy_command: Optional[str] = None, - timeout: int = 30, - port: int = 22) -> List[str]: +def ssh_options_list( + ssh_private_key: Optional[str], + ssh_control_name: Optional[str], + *, + ssh_proxy_command: Optional[str] = None, + docker_ssh_proxy_command: Optional[str] = None, + timeout: int = 30, + port: int = 22, + disable_control_master: Optional[bool] = False, +) -> List[str]: """Returns a list of sane options for 'ssh'.""" # Forked from Ray SSHOptions: # https://github.com/ray-project/ray/blob/master/python/ray/autoscaler/_private/command_runner.py @@ -79,7 +82,13 @@ def ssh_options_list(ssh_private_key: Optional[str], } # SSH Control will have a severe delay when using docker_ssh_proxy_command. # TODO(tian): Investigate why. - if ssh_control_name is not None and docker_ssh_proxy_command is None: + # We also do not use ControlMaster when we use `kubectl port-forward` + # to access Kubernetes pods over SSH+Proxycommand. This is because the + # process running ProxyCommand is kept running as long as the ssh session + # is running and the ControlMaster keeps the session, which results in + # 'ControlPersist' number of seconds delay per ssh commands ran. + if ssh_control_name is not None and docker_ssh_proxy_command is None \ + and not disable_control_master: arg_dict.update({ # Control path: important optimization as we do multiple ssh in one # sky.launch(). @@ -136,6 +145,7 @@ def __init__( ssh_proxy_command: Optional[str] = None, port: int = 22, docker_user: Optional[str] = None, + disable_control_master: Optional[bool] = False, ): """Initialize SSHCommandRunner. @@ -158,13 +168,17 @@ def __init__( port: The port to use for ssh. docker_user: The docker user to use for ssh. If specified, the command will be run inside a docker container which have a ssh - server running at port sky.skylet.constants.DEFAULT_DOCKER_PORT. + server running at port sky.skylet.constants.DEFAULT_DOCKER_PORT + disable_control_master: bool; specifies either or not the ssh + command will utilize ControlMaster. We currently disable + it for k8s instance. """ self.ssh_private_key = ssh_private_key self.ssh_control_name = ( None if ssh_control_name is None else hashlib.md5( ssh_control_name.encode()).hexdigest()[:_HASH_MAX_LENGTH]) self._ssh_proxy_command = ssh_proxy_command + self.disable_control_master = disable_control_master if docker_user is not None: assert port is None or port == 22, ( f'port must be None or 22 for docker_user, got {port}.') @@ -190,6 +204,7 @@ def make_runner_list( ssh_private_key: str, ssh_control_name: Optional[str] = None, ssh_proxy_command: Optional[str] = None, + disable_control_master: Optional[bool] = False, port_list: Optional[List[int]] = None, docker_user: Optional[str] = None, ) -> List['SSHCommandRunner']: @@ -198,7 +213,8 @@ def make_runner_list( port_list = [22] * len(ip_list) return [ SSHCommandRunner(ip, ssh_user, ssh_private_key, ssh_control_name, - ssh_proxy_command, port, docker_user) + ssh_proxy_command, port, docker_user, + disable_control_master) for ip, port in zip(ip_list, port_list) ] @@ -228,7 +244,9 @@ def _ssh_base_command(self, *, ssh_mode: SshMode, ssh_proxy_command=self._ssh_proxy_command, docker_ssh_proxy_command=docker_ssh_proxy_command, port=self.port, - ) + [f'{self.ssh_user}@{self.ip}'] + disable_control_master=self.disable_control_master) + [ + f'{self.ssh_user}@{self.ip}' + ] def run( self, @@ -388,7 +406,7 @@ def rsync( ssh_proxy_command=self._ssh_proxy_command, docker_ssh_proxy_command=docker_ssh_proxy_command, port=self.port, - )) + disable_control_master=self.disable_control_master)) rsync_command.append(f'-e "ssh {ssh_options}"') # To support spaces in the path, we need to quote source and target. # rsync doesn't support '~' in a quoted local path, but it is ok to diff --git a/sky/utils/command_runner.pyi b/sky/utils/command_runner.pyi index e5feb5fb8db..425f5c60213 100644 --- a/sky/utils/command_runner.pyi +++ b/sky/utils/command_runner.pyi @@ -20,10 +20,13 @@ RSYNC_FILTER_OPTION: str RSYNC_EXCLUDE_OPTION: str -def ssh_options_list(ssh_private_key: Optional[str], - ssh_control_name: Optional[str], - *, - timeout: int = ...) -> List[str]: +def ssh_options_list( + ssh_private_key: Optional[str], + ssh_control_name: Optional[str], + *, + timeout: int = ..., + disable_control_master: Optional[bool] = False, +) -> List[str]: ... @@ -40,14 +43,18 @@ class SSHCommandRunner: ssh_control_name: Optional[str] docker_user: str port: int + disable_control_master: Optional[bool] - def __init__(self, - ip: str, - ssh_user: str, - ssh_private_key: str, - ssh_control_name: Optional[str] = ..., - port: int = ..., - docker_user: Optional[str] = ...) -> None: + def __init__( + self, + ip: str, + ssh_user: str, + ssh_private_key: str, + ssh_control_name: Optional[str] = ..., + port: int = ..., + docker_user: Optional[str] = ..., + disable_control_master: Optional[bool] = ..., + ) -> None: ... @staticmethod @@ -59,6 +66,7 @@ class SSHCommandRunner: ssh_proxy_command: Optional[str] = ..., port_list: Optional[List[int]] = ..., docker_user: Optional[str] = ..., + disable_control_master: Optional[bool] = ..., ) -> List['SSHCommandRunner']: ... diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 78771d4cc5f..7b3f762705a 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -1,7 +1,9 @@ """Kubernetes utilities for SkyPilot.""" +import enum import math import os import re +import subprocess from typing import Any, Dict, List, Optional, Set, Tuple, Union from urllib.parse import urlparse @@ -12,11 +14,13 @@ from sky import exceptions from sky import sky_logging from sky.adaptors import kubernetes +from sky.backends import backend_utils from sky.utils import common_utils from sky.utils import env_options from sky.utils import ux_utils DEFAULT_NAMESPACE = 'default' +LOCAL_PORT_FOR_PORT_FORWARD = 23100 MEMORY_SIZE_UNITS = { 'B': 1, @@ -30,6 +34,20 @@ logger = sky_logging.init_logger(__name__) +class KubernetesNetworkingMode(enum.Enum): + """Enum for the different types of networking modes for accessing + jump pods. + """ + NODEPORT = 'NodePort' + PORT_FORWARD = 'Port_Forward' + + +class KubernetesServiceType(enum.Enum): + """Enum for the different types of services.""" + NODEPORT = 'NodePort' + CLUSTERIP = 'ClusterIP' + + class GPULabelFormatter: """Base class to define a GPU label formatter for a Kubernetes cluster @@ -355,7 +373,9 @@ def get_port(svc_name: str, namespace: str) -> int: return head_service.spec.ports[0].node_port -def get_external_ip(): +def get_external_ip(network_mode: Optional[KubernetesNetworkingMode]): + if network_mode == KubernetesNetworkingMode.PORT_FORWARD: + return '127.0.0.1' # Return the IP address of the first node with an external IP nodes = kubernetes.core_api().list_node().items for node in nodes: @@ -603,30 +623,97 @@ def __str__(self): return self.name -def get_ssh_proxy_command(private_key_path: str, sshjump_name: str, - namespace: str) -> str: +def construct_ssh_jump_command(private_key_path: str, + ssh_jump_port: int, + ssh_jump_ip: str, + proxy_cmd_path: Optional[str] = None) -> str: + ssh_jump_proxy_command = (f'ssh -tt -i {private_key_path} ' + '-o StrictHostKeyChecking=no ' + '-o UserKnownHostsFile=/dev/null ' + f'-o IdentitiesOnly=yes -p {ssh_jump_port} ' + f'-W %h:%p sky@{ssh_jump_ip}') + if proxy_cmd_path is not None: + proxy_cmd_path = os.path.expanduser(proxy_cmd_path) + # adding execution permission to the proxy command script + os.chmod(proxy_cmd_path, os.stat(proxy_cmd_path).st_mode | 0o111) + ssh_jump_proxy_command += f' -o ProxyCommand=\'{proxy_cmd_path}\' ' + return ssh_jump_proxy_command + + +def get_ssh_proxy_command(private_key_path: str, ssh_jump_name: str, + network_mode: KubernetesNetworkingMode, + namespace: str, port_fwd_proxy_cmd_path: str, + port_fwd_proxy_cmd_template: str) -> str: """Generates the SSH proxy command to connect through the SSH jump pod. + By default, establishing an SSH connection creates a communication + channel to a remote node by setting up a TCP connection. When a + ProxyCommand is specified, this default behavior is overridden. The command + specified in ProxyCommand is executed, and its standard input and output + become the communication channel for the SSH session. + + Pods within a Kubernetes cluster have internal IP addresses that are + typically not accessible from outside the cluster. Since the default TCP + connection of SSH won't allow access to these pods, we employ a + ProxyCommand to establish the required communication channel. We offer this + in two different networking options: NodePort/port-forward. + + With the NodePort networking mode, a NodePort service is launched. This + service opens an external port on the node which redirects to the desired + port within the pod. When establishing an SSH session in this mode, the + ProxyCommand makes use of this external port to create a communication + channel directly to port 22, which is the default port ssh server listens + on, of the jump pod. + + With Port-forward mode, instead of directly exposing an external port, + 'kubectl port-forward' sets up a tunnel between a local port + (127.0.0.1:23100) and port 22 of the jump pod. Then we establish a TCP + connection to the local end of this tunnel, 127.0.0.1:23100, using 'socat'. + This is setup in the inner ProxyCommand of the nested ProxyCommand, and the + rest is the same as NodePort approach, which the outer ProxyCommand + establishes a communication channel between 127.0.0.1:23100 and port 22 on + the jump pod. Consequently, any stdin provided on the local machine is + forwarded through this tunnel to the application (SSH server) listening in + the pod. Similarly, any output from the application in the pod is tunneled + back and displayed in the terminal on the local machine. + Args: - private_key_path: Path to the private key to use for SSH. This key must - be authorized to access the SSH jump pod. - sshjump_name: Name of the SSH jump service to use + private_key_path: str; Path to the private key to use for SSH. + This key must be authorized to access the SSH jump pod. + ssh_jump_name: str; Name of the SSH jump service to use + network_mode: KubernetesNetworkingMode; networking mode for ssh + session. It is either 'NODEPORT' or 'PORT_FORWARD' namespace: Kubernetes namespace to use + port_fwd_proxy_cmd_path: str; path to the script used as Proxycommand + with 'kubectl port-forward' + port_fwd_proxy_cmd_template: str; template used to create + 'kubectl port-forward' Proxycommand """ - # Fetch service port and IP to connect to for the jump svc - ssh_jump_port = get_port(sshjump_name, namespace) - ssh_jump_ip = get_external_ip() - - ssh_jump_proxy_command = (f'ssh -tt -i {private_key_path} ' - '-o StrictHostKeyChecking=no ' - '-o UserKnownHostsFile=/dev/null ' - '-o IdentitiesOnly=yes ' - f'-p {ssh_jump_port} -W %h:%p sky@{ssh_jump_ip}') - + # Fetch IP to connect to for the jump svc + ssh_jump_ip = get_external_ip(network_mode) + if network_mode == KubernetesNetworkingMode.NODEPORT: + ssh_jump_port = get_port(ssh_jump_name, namespace) + ssh_jump_proxy_command = construct_ssh_jump_command( + private_key_path, ssh_jump_port, ssh_jump_ip) + # Setting kubectl port-forward/socat to establish ssh session using + # ClusterIP service to disallow any ports opened + else: + ssh_jump_port = LOCAL_PORT_FOR_PORT_FORWARD + vars_to_fill = { + 'ssh_jump_name': ssh_jump_name, + 'local_port': ssh_jump_port, + } + backend_utils.fill_template(port_fwd_proxy_cmd_template, + vars_to_fill, + output_path=port_fwd_proxy_cmd_path) + ssh_jump_proxy_command = construct_ssh_jump_command( + private_key_path, ssh_jump_port, ssh_jump_ip, + port_fwd_proxy_cmd_path) return ssh_jump_proxy_command -def setup_sshjump_svc(sshjump_name: str, namespace: str): +def setup_sshjump_svc(ssh_jump_name: str, namespace: str, + service_type: KubernetesServiceType): """Sets up Kubernetes service resource to access for SSH jump pod. This method acts as a necessary complement to be run along with @@ -635,23 +722,56 @@ def setup_sshjump_svc(sshjump_name: str, namespace: str): Args: sshjump_name: Name to use for the SSH jump service namespace: Namespace to create the SSH jump service in + service_type: Networking configuration on either to use NodePort + or ClusterIP service to ssh in """ # Fill in template - ssh_key_secret and sshjump_image are not required for # the service spec, so we pass in empty strs. - content = fill_sshjump_template('', '', sshjump_name) + content = fill_sshjump_template('', '', ssh_jump_name, service_type.value) # Create service try: kubernetes.core_api().create_namespaced_service(namespace, content['service_spec']) except kubernetes.api_exception() as e: + # SSH Jump Pod service already exists. if e.status == 409: - logger.warning( - f'SSH Jump Service {sshjump_name} already exists in the ' - 'cluster, using it.') + ssh_jump_service = kubernetes.core_api().read_namespaced_service( + name=ssh_jump_name, namespace=namespace) + curr_svc_type = ssh_jump_service.spec.type + if service_type.value == curr_svc_type: + # If the currently existing SSH Jump service's type is identical + # to user's configuration for networking mode + logger.warning( + f'SSH Jump Service {ssh_jump_name} already exists in the ' + 'cluster, using it.') + else: + # If a different type of service type for SSH Jump pod compared + # to user's configuration for networking mode exists, we remove + # existing servie to create a new one following user's config + kubernetes.core_api().delete_namespaced_service( + name=ssh_jump_name, namespace=namespace) + kubernetes.core_api().create_namespaced_service( + namespace, content['service_spec']) + port_forward_mode = KubernetesNetworkingMode.PORT_FORWARD.value + nodeport_mode = KubernetesNetworkingMode.NODEPORT.value + clusterip_svc = KubernetesServiceType.CLUSTERIP.value + nodeport_svc = KubernetesServiceType.NODEPORT.value + curr_network_mode = port_forward_mode \ + if curr_svc_type == clusterip_svc else nodeport_mode + new_network_mode = nodeport_mode \ + if curr_svc_type == clusterip_svc else port_forward_mode + new_svc_type = nodeport_svc \ + if curr_svc_type == clusterip_svc else clusterip_svc + logger.info( + f'Switching the networking mode from ' + f'\'{curr_network_mode}\' to \'{new_network_mode}\' ' + f'following networking configuration. Deleting existing ' + f'\'{curr_svc_type}\' service and recreating as ' + f'\'{new_svc_type}\' service.') else: raise else: - logger.info(f'Created SSH Jump Service {sshjump_name}.') + logger.info(f'Created SSH Jump Service {ssh_jump_name}.') def setup_sshjump_pod(sshjump_name: str, sshjump_image: str, @@ -673,7 +793,10 @@ def setup_sshjump_pod(sshjump_name: str, sshjump_image: str, ssh_key_secret: Secret name for the SSH key stored in the cluster namespace: Namespace to create the SSH jump pod in """ - content = fill_sshjump_template(ssh_key_secret, sshjump_image, sshjump_name) + # Fill in template - service is created separately so service_type is not + # required, so we pass in empty str. + content = fill_sshjump_template(ssh_key_secret, sshjump_image, sshjump_name, + '') # ServiceAccount try: kubernetes.core_api().create_namespaced_service_account( @@ -784,7 +907,7 @@ def find(l, predicate): def fill_sshjump_template(ssh_key_secret: str, sshjump_image: str, - sshjump_name: str) -> Dict: + sshjump_name: str, service_type: str) -> Dict: template_path = os.path.join(sky.__root_dir__, 'templates', 'kubernetes-sshjump.yml.j2') if not os.path.exists(template_path): @@ -795,6 +918,25 @@ def fill_sshjump_template(ssh_key_secret: str, sshjump_image: str, j2_template = jinja2.Template(template) cont = j2_template.render(name=sshjump_name, image=sshjump_image, - secret=ssh_key_secret) + secret=ssh_key_secret, + service_type=service_type) content = yaml.safe_load(cont) return content + + +def check_port_forward_mode_dependencies() -> None: + """Checks if 'socat' and 'lsof' is installed""" + for name, option in [('socat', '-V'), ('lsof', '-v')]: + try: + subprocess.run([name, option], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + check=True) + except FileNotFoundError: + with ux_utils.print_exception_no_traceback(): + raise RuntimeError( + f'`{name}` is required to setup Kubernetes cloud with ' + f'`{KubernetesNetworkingMode.PORT_FORWARD.value}` default ' + 'networking mode and it is not installed. ' + 'For Debian/Ubuntu system, install it with:\n' + f' $ sudo apt install {name}') from None diff --git a/tests/test_config.py b/tests/test_config.py index dffd4843ffd..cb0ab42df78 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -6,9 +6,12 @@ from sky import skypilot_config from sky.utils import common_utils +from sky.utils import kubernetes_utils VPC_NAME = 'vpc-12345678' PROXY_COMMAND = 'ssh -W %h:%p -i ~/.ssh/id_rsa -o StrictHostKeyChecking=no' +NODEPORT_MODE_NAME = kubernetes_utils.KubernetesNetworkingMode.NODEPORT.value +PORT_FORWARD_MODE_NAME = kubernetes_utils.KubernetesNetworkingMode.PORT_FORWARD.value def _reload_config() -> None: @@ -34,6 +37,8 @@ def _create_config_file(config_file_path: pathlib.Path) -> None: vpc_name: {VPC_NAME} use_internal_ips: true ssh_proxy_command: {PROXY_COMMAND} + kubernetes: + networking: {NODEPORT_MODE_NAME} """)) @@ -67,14 +72,19 @@ def test_config_get_set_nested(monkeypatch, tmp_path) -> None: assert skypilot_config.get_nested(('aws', 'use_internal_ips'), None) assert skypilot_config.get_nested(('aws', 'ssh_proxy_command'), None) == PROXY_COMMAND - + assert skypilot_config.get_nested(('kubernetes', 'networking'), + None) == NODEPORT_MODE_NAME # Check set_nested() will copy the config dict and return a new dict new_config = skypilot_config.set_nested(('aws', 'ssh_proxy_command'), 'new_value') assert new_config['aws']['ssh_proxy_command'] == 'new_value' assert skypilot_config.get_nested(('aws', 'ssh_proxy_command'), None) == PROXY_COMMAND - + new_config = skypilot_config.set_nested(('kubernetes', 'networking'), + PORT_FORWARD_MODE_NAME) + assert new_config['aws']['ssh_proxy_command'] == PORT_FORWARD_MODE_NAME + assert skypilot_config.get_nested(('kubernetes', 'networking'), + None) == NODEPORT_MODE_NAME # Check that dumping the config to a file with the new None can be reloaded new_config2 = skypilot_config.set_nested(('aws', 'ssh_proxy_command'), None) new_config_path = tmp_path / 'new_config.yaml' From a721f832e0c48a16ab95f511270185410de37f5d Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 31 Aug 2023 22:01:46 +0530 Subject: [PATCH 170/183] refactor --- sky/authentication.py | 2 +- sky/skylet/providers/kubernetes/config.py | 1 - sky/utils/kubernetes_utils.py | 10 +++++----- tests/test_config.py | 2 +- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/sky/authentication.py b/sky/authentication.py index 53fc27fd1c1..ecda611a134 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -382,7 +382,7 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: # Default ssh session is established with kubectl port-forwarding with # ClusterIP service nodeport_mode = kubernetes_utils.KubernetesNetworkingMode.NODEPORT - port_forward_mode = kubernetes_utils.KubernetesNetworkingMode.PORT_FORWARD + port_forward_mode = kubernetes_utils.KubernetesNetworkingMode.PORTFORWARD ssh_setup_mode = skypilot_config.get_nested(('kubernetes', 'networking'), port_forward_mode.value) get_or_generate_keys() diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index 715e71b7e4f..92ca5d0c13c 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -3,7 +3,6 @@ import math from typing import Any, Dict, Union -from sky import clouds from sky.adaptors import kubernetes from sky.utils import kubernetes_utils diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 7b3f762705a..b1b6eb1dc13 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -39,7 +39,7 @@ class KubernetesNetworkingMode(enum.Enum): jump pods. """ NODEPORT = 'NodePort' - PORT_FORWARD = 'Port_Forward' + PORTFORWARD = 'PortForward' class KubernetesServiceType(enum.Enum): @@ -374,7 +374,7 @@ def get_port(svc_name: str, namespace: str) -> int: def get_external_ip(network_mode: Optional[KubernetesNetworkingMode]): - if network_mode == KubernetesNetworkingMode.PORT_FORWARD: + if network_mode == KubernetesNetworkingMode.PORTFORWARD: return '127.0.0.1' # Return the IP address of the first node with an external IP nodes = kubernetes.core_api().list_node().items @@ -682,7 +682,7 @@ def get_ssh_proxy_command(private_key_path: str, ssh_jump_name: str, This key must be authorized to access the SSH jump pod. ssh_jump_name: str; Name of the SSH jump service to use network_mode: KubernetesNetworkingMode; networking mode for ssh - session. It is either 'NODEPORT' or 'PORT_FORWARD' + session. It is either 'NODEPORT' or 'PORTFORWARD' namespace: Kubernetes namespace to use port_fwd_proxy_cmd_path: str; path to the script used as Proxycommand with 'kubectl port-forward' @@ -752,7 +752,7 @@ def setup_sshjump_svc(ssh_jump_name: str, namespace: str, name=ssh_jump_name, namespace=namespace) kubernetes.core_api().create_namespaced_service( namespace, content['service_spec']) - port_forward_mode = KubernetesNetworkingMode.PORT_FORWARD.value + port_forward_mode = KubernetesNetworkingMode.PORTFORWARD.value nodeport_mode = KubernetesNetworkingMode.NODEPORT.value clusterip_svc = KubernetesServiceType.CLUSTERIP.value nodeport_svc = KubernetesServiceType.NODEPORT.value @@ -936,7 +936,7 @@ def check_port_forward_mode_dependencies() -> None: with ux_utils.print_exception_no_traceback(): raise RuntimeError( f'`{name}` is required to setup Kubernetes cloud with ' - f'`{KubernetesNetworkingMode.PORT_FORWARD.value}` default ' + f'`{KubernetesNetworkingMode.PORTFORWARD.value}` default ' 'networking mode and it is not installed. ' 'For Debian/Ubuntu system, install it with:\n' f' $ sudo apt install {name}') from None diff --git a/tests/test_config.py b/tests/test_config.py index cb0ab42df78..70705dde068 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -11,7 +11,7 @@ VPC_NAME = 'vpc-12345678' PROXY_COMMAND = 'ssh -W %h:%p -i ~/.ssh/id_rsa -o StrictHostKeyChecking=no' NODEPORT_MODE_NAME = kubernetes_utils.KubernetesNetworkingMode.NODEPORT.value -PORT_FORWARD_MODE_NAME = kubernetes_utils.KubernetesNetworkingMode.PORT_FORWARD.value +PORT_FORWARD_MODE_NAME = kubernetes_utils.KubernetesNetworkingMode.PORTFORWARD.value def _reload_config() -> None: From c620f94508e01f08e8941cea352344e22964b063 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 31 Aug 2023 22:09:13 +0530 Subject: [PATCH 171/183] fix --- sky/authentication.py | 1 + 1 file changed, 1 insertion(+) diff --git a/sky/authentication.py b/sky/authentication.py index ecda611a134..e2042acd8b8 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -386,6 +386,7 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: ssh_setup_mode = skypilot_config.get_nested(('kubernetes', 'networking'), port_forward_mode.value) get_or_generate_keys() + # Run kubectl command to add the public key to the cluster. public_key_path = os.path.expanduser(PUBLIC_SSH_KEY_PATH) key_label = clouds.Kubernetes.SKY_SSH_KEY_SECRET_NAME From 94bf1a991e492222b27a07b3a6aa35e68ac528e7 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 31 Aug 2023 22:39:27 +0530 Subject: [PATCH 172/183] updates --- sky/authentication.py | 4 +-- .../providers/kubernetes/node_provider.py | 27 ++++++++++++------- sky/utils/kubernetes_utils.py | 10 +++---- tests/kubernetes/build_image.sh | 0 tests/test_config.py | 2 +- 5 files changed, 25 insertions(+), 18 deletions(-) mode change 100644 => 100755 tests/kubernetes/build_image.sh diff --git a/sky/authentication.py b/sky/authentication.py index e2042acd8b8..6ac991e3310 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -412,11 +412,11 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: raise ssh_jump_name = clouds.Kubernetes.SKY_SSH_JUMP_NAME - if ssh_setup_mode.lower() == nodeport_mode.value.lower(): + if ssh_setup_mode.lower() == nodeport_mode.value: network_mode = nodeport_mode service_type = kubernetes_utils.KubernetesServiceType.NODEPORT - elif ssh_setup_mode.lower() == port_forward_mode.value.lower(): + elif ssh_setup_mode.lower() == port_forward_mode.value: kubernetes_utils.check_port_forward_mode_dependencies() network_mode = port_forward_mode # Using `kubectl port-forward` creates a direct tunnel to jump pod and diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 8e44379d19e..90581fe7379 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -214,12 +214,9 @@ def create_node(self, node_config, tags, count): 'Cluster may be out of resources or ' 'may be too slow to autoscale.') all_ready = True - pods_and_containers_running = False - pods = [] for node in new_nodes: pod = kubernetes.core_api().read_namespaced_pod( node.metadata.name, self.namespace) - pods.append(pod) if pod.status.phase == 'Pending': # Iterate over each pod to check their status if pod.status.container_statuses is not None: @@ -239,16 +236,26 @@ def create_node(self, node_config, tags, count): # If container_statuses is None, then the pod hasn't # been scheduled yet. all_ready = False + if all_ready: + break + time.sleep(1) - # check if all the pods and containers within the pods are running - if all([ pod.status.phase == "Running" for pod in pods]) \ - and all([container.state.running for pod in pods for container in pod.status.container_statuses]): - pods_and_containers_running = True - - if all_ready and pods_and_containers_running: + # Wait for pod containers to be ready - they may be pulling images or + # may be in the process of container creation. + while True: + pods = [] + for node in new_nodes: + pod = kubernetes.core_api().read_namespaced_pod( + node.metadata.name, self.namespace) + pods.append(pod) + if all([pod.status.phase == "Running" for pod in pods]) \ + and all( + [container.state.running for pod in pods for container in + pod.status.container_statuses]): break time.sleep(1) + # Once all containers are ready, we can exec into them and set env vars. # Kubernetes automatically populates containers with critical # environment variables, such as those for discovering services running # in the cluster and CUDA/nvidia environment variables. We need to @@ -256,7 +263,7 @@ def create_node(self, node_config, tags, count): # for GPU support and service discovery. # See https://github.com/skypilot-org/skypilot/issues/2287 for # more details. - # Capturing env. var. from the pod's runtime and writes them to + # Capturing env vars from the pod's runtime and writes them to # /etc/profile.d/ making them available for all users in future # shell sessions. set_k8s_env_var_cmd = [ diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index b1b6eb1dc13..80e9568e173 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -38,8 +38,8 @@ class KubernetesNetworkingMode(enum.Enum): """Enum for the different types of networking modes for accessing jump pods. """ - NODEPORT = 'NodePort' - PORTFORWARD = 'PortForward' + NODEPORT = 'nodeport' + PORTFORWARD = 'portforward' class KubernetesServiceType(enum.Enum): @@ -853,9 +853,9 @@ def clean_zombie_sshjump_pod(namespace: str, node_id: str): """Analyzes SSH jump pod and removes if it is in a bad state Prevents the existence of a dangling SSH jump pod. This could happen - in case the pod main container did not start properly (or failed) and SSH - jump pod lifecycle management (LCM) will not function properly to take care - of removing the pod and service when needed. + in case the pod main container did not start properly (or failed). In that + case, jump pod lifecycle management (LCM) will not functioning properly to + remove the pod and service automatically, and must be done manually. Args: namespace: Namespace to remove the SSH jump pod and service from diff --git a/tests/kubernetes/build_image.sh b/tests/kubernetes/build_image.sh old mode 100644 new mode 100755 diff --git a/tests/test_config.py b/tests/test_config.py index 70705dde068..f2378198645 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -82,7 +82,7 @@ def test_config_get_set_nested(monkeypatch, tmp_path) -> None: None) == PROXY_COMMAND new_config = skypilot_config.set_nested(('kubernetes', 'networking'), PORT_FORWARD_MODE_NAME) - assert new_config['aws']['ssh_proxy_command'] == PORT_FORWARD_MODE_NAME + assert new_config['kubernetes']['ssh_proxy_command'] == PORT_FORWARD_MODE_NAME assert skypilot_config.get_nested(('kubernetes', 'networking'), None) == NODEPORT_MODE_NAME # Check that dumping the config to a file with the new None can be reloaded From 48d53a5ffcddc6ee964f99bc3ba3bb9fa1b422f4 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 31 Aug 2023 22:50:08 +0530 Subject: [PATCH 173/183] lint --- tests/test_config.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_config.py b/tests/test_config.py index f2378198645..91aa1197346 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -82,7 +82,8 @@ def test_config_get_set_nested(monkeypatch, tmp_path) -> None: None) == PROXY_COMMAND new_config = skypilot_config.set_nested(('kubernetes', 'networking'), PORT_FORWARD_MODE_NAME) - assert new_config['kubernetes']['ssh_proxy_command'] == PORT_FORWARD_MODE_NAME + assert new_config['kubernetes'][ + 'ssh_proxy_command'] == PORT_FORWARD_MODE_NAME assert skypilot_config.get_nested(('kubernetes', 'networking'), None) == NODEPORT_MODE_NAME # Check that dumping the config to a file with the new None can be reloaded From 08fd88df3e28e556d0ae8f2eb2366adfe8859a3a Mon Sep 17 00:00:00 2001 From: Doyoung Kim <34902420+landscapepainter@users.noreply.github.com> Date: Thu, 31 Aug 2023 16:37:32 -0700 Subject: [PATCH 174/183] Update sky/skylet/providers/kubernetes/node_provider.py --- sky/skylet/providers/kubernetes/node_provider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 90581fe7379..3b69f427e29 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -268,7 +268,7 @@ def create_node(self, node_config, tags, count): # shell sessions. set_k8s_env_var_cmd = [ '/bin/sh', '-c', - ('printenv | awk -F "=" \'{print "export " $1 "=\\047" $2 "\\047"}\' > ~/k8s_env_var.sh;' + ('printenv | awk -F "=" \'{print "export " $1 "=\\047" $2 "\\047"}\' > ~/k8s_env_var.sh && ' 'mv ~/k8s_env_var.sh /etc/profile.d/k8s_env_var.sh || ' 'sudo mv ~/k8s_env_var.sh /etc/profile.d/k8s_env_var.sh') ] From 693af6d4572397d9ef00b269fc9b9ea2626770a2 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Tue, 5 Sep 2023 12:20:50 -0700 Subject: [PATCH 175/183] fix test --- tests/test_config.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index 91aa1197346..72d16ac362a 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -82,8 +82,6 @@ def test_config_get_set_nested(monkeypatch, tmp_path) -> None: None) == PROXY_COMMAND new_config = skypilot_config.set_nested(('kubernetes', 'networking'), PORT_FORWARD_MODE_NAME) - assert new_config['kubernetes'][ - 'ssh_proxy_command'] == PORT_FORWARD_MODE_NAME assert skypilot_config.get_nested(('kubernetes', 'networking'), None) == NODEPORT_MODE_NAME # Check that dumping the config to a file with the new None can be reloaded From d21449508b914139a15a6e6084124f7dff2e50bd Mon Sep 17 00:00:00 2001 From: Doyoung Kim <34902420+landscapepainter@users.noreply.github.com> Date: Thu, 14 Sep 2023 19:06:06 -0700 Subject: [PATCH 176/183] [k8s] Showing reasons for provisioning failure in K8s (#2422) * surface provision failure message * nit * nit * format * nit * CPU message fix * update Insufficient memory handling * nit * nit * Update sky/skylet/providers/kubernetes/node_provider.py Co-authored-by: Romil Bhardwaj * Update sky/skylet/providers/kubernetes/node_provider.py Co-authored-by: Romil Bhardwaj * Update sky/skylet/providers/kubernetes/node_provider.py Co-authored-by: Romil Bhardwaj * Update sky/skylet/providers/kubernetes/node_provider.py Co-authored-by: Romil Bhardwaj * format * update gpu failure message and condition * fix GPU handling cases * fix * comment * nit * add try except block with general error handling --------- Co-authored-by: Romil Bhardwaj --- .../providers/kubernetes/node_provider.py | 72 +++++++++++++++++-- 1 file changed, 66 insertions(+), 6 deletions(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 3b69f427e29..e20822957ed 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -12,7 +12,9 @@ from sky.adaptors import kubernetes from sky.skylet.providers.kubernetes import config +from sky.utils import common_utils from sky.utils import kubernetes_utils +from sky.utils import ux_utils logger = logging.getLogger(__name__) @@ -161,6 +163,59 @@ def _set_node_tags(self, node_id, tags): pod.metadata.labels.update(tags) kubernetes.core_api().patch_namespaced_pod(node_id, self.namespace, pod) + def _raise_pod_scheduling_errors(self, new_nodes): + for new_node in new_nodes: + pod_status = new_node.status.phase + pod_name = new_node._metadata._name + events = kubernetes.core_api().list_namespaced_event( + self.namespace, + field_selector=(f'involvedObject.name={pod_name},' + 'involvedObject.kind=Pod')) + # Events created in the past hours are kept by + # Kubernetes python client and we want to surface + # the latest event message + events_desc_by_time = \ + sorted(events.items, + key=lambda e: e.metadata.creation_timestamp, + reverse=True) + for event in events_desc_by_time: + if event.reason == 'FailedScheduling': + event_message = event.message + break + timeout_err_msg = ('Timed out while waiting for nodes to start. ' + 'Cluster may be out of resources or ' + 'may be too slow to autoscale.') + lack_resource_msg = ( + 'Insufficient {resource}. Other SkyPilot tasks or pods on ' + 'the cluster may be using resources. Check resource usage ' + 'by running `kubectl describe nodes`.') + if event_message is not None: + if pod_status == 'Pending': + if 'Insufficient cpu' in event_message: + raise config.KubernetesError( + lack_resource_msg.format(resource='CPUs')) + if 'Insufficient memory' in event_message: + raise config.KubernetesError( + lack_resource_msg.format(resource='memory')) + gpu_lf_keys = [lf.get_label_key() \ + for lf in kubernetes_utils.LABEL_FORMATTER_REGISTRY] + # Confirms if the nodeSelector in the pod spec is + # set for GPU scheduling. + if new_node.spec.node_selector: + for label_key in new_node.spec.node_selector.keys(): + if label_key in gpu_lf_keys: + if 'Insufficient nvidia.com/gpu' in event_message or \ + 'didn\'t match Pod\'s node affinity/selector' in event_message: + raise config.KubernetesError( + f'{lack_resource_msg.format(resource="GPUs")} ' + f'Please confirm if {new_node.spec.node_selector[label_key]}' + ' is available in the cluster.') + raise config.KubernetesError( + f'{timeout_err_msg} ' + f'Error details: \'{event_message}\' ' + f'Error pod status: {pod_status}') + raise config.KubernetesError(f'{timeout_err_msg}') + def create_node(self, node_config, tags, count): conf = copy.deepcopy(node_config) pod_spec = conf.get('pod', conf) @@ -193,7 +248,6 @@ def create_node(self, node_config, tags, count): '(count={}).'.format(count)) for new_node in new_nodes: - metadata = service_spec.get('metadata', {}) metadata['name'] = new_node.metadata.name service_spec['metadata'] = metadata @@ -205,14 +259,20 @@ def create_node(self, node_config, tags, count): # Wait for all pods to be ready, and if it exceeds the timeout, raise an # exception. If pod's container is ContainerCreating, then we can assume # that resources have been allocated and we can exit. - start = time.time() while True: if time.time() - start > self.timeout: - raise config.KubernetesError( - 'Timed out while waiting for nodes to start. ' - 'Cluster may be out of resources or ' - 'may be too slow to autoscale.') + try: + self._raise_pod_scheduling_errors(new_nodes) + except config.KubernetesError: + raise + except Exception as e: + with ux_utils.print_exception_no_traceback(): + raise RuntimeError( + f'An error occurred during pod creation. ' + f'Status: {common_utils.format_exception(e, use_bracket=True)}' + ) from None + all_ready = True for node in new_nodes: pod = kubernetes.core_api().read_namespaced_pod( From 4e8b67806049e99bef2909f924d7d3849e6fc7bb Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 14 Sep 2023 19:45:56 -0700 Subject: [PATCH 177/183] cleanup --- .../providers/kubernetes/node_provider.py | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index e20822957ed..4b6422e7faf 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -14,7 +14,6 @@ from sky.skylet.providers.kubernetes import config from sky.utils import common_utils from sky.utils import kubernetes_utils -from sky.utils import ux_utils logger = logging.getLogger(__name__) @@ -186,34 +185,36 @@ def _raise_pod_scheduling_errors(self, new_nodes): 'Cluster may be out of resources or ' 'may be too slow to autoscale.') lack_resource_msg = ( - 'Insufficient {resource}. Other SkyPilot tasks or pods on ' - 'the cluster may be using resources. Check resource usage ' - 'by running `kubectl describe nodes`.') + 'Insufficient {resource} capacity on the cluster. ' + 'Other SkyPilot tasks or pods may be using resources. ' + 'Check resource usage by running `kubectl describe nodes`.') if event_message is not None: if pod_status == 'Pending': if 'Insufficient cpu' in event_message: raise config.KubernetesError( - lack_resource_msg.format(resource='CPUs')) + lack_resource_msg.format(resource='CPU')) if 'Insufficient memory' in event_message: raise config.KubernetesError( lack_resource_msg.format(resource='memory')) - gpu_lf_keys = [lf.get_label_key() \ + gpu_lf_keys = [lf.get_label_key() for lf in kubernetes_utils.LABEL_FORMATTER_REGISTRY] - # Confirms if the nodeSelector in the pod spec is - # set for GPU scheduling. if new_node.spec.node_selector: for label_key in new_node.spec.node_selector.keys(): if label_key in gpu_lf_keys: + # TODO(romilb): We may have additional node + # affinity selectors in the future - in that + # case we will need to update this logic. if 'Insufficient nvidia.com/gpu' in event_message or \ 'didn\'t match Pod\'s node affinity/selector' in event_message: raise config.KubernetesError( - f'{lack_resource_msg.format(resource="GPUs")} ' - f'Please confirm if {new_node.spec.node_selector[label_key]}' + f'{lack_resource_msg.format(resource="GPU")} ' + f'Verify if {new_node.spec.node_selector[label_key]}' ' is available in the cluster.') raise config.KubernetesError( f'{timeout_err_msg} ' - f'Error details: \'{event_message}\' ' - f'Error pod status: {pod_status}') + f'Pod status: {pod_status}' + f'Details: \'{event_message}\' ' + ) raise config.KubernetesError(f'{timeout_err_msg}') def create_node(self, node_config, tags, count): @@ -267,11 +268,11 @@ def create_node(self, node_config, tags, count): except config.KubernetesError: raise except Exception as e: - with ux_utils.print_exception_no_traceback(): - raise RuntimeError( - f'An error occurred during pod creation. ' - f'Status: {common_utils.format_exception(e, use_bracket=True)}' - ) from None + raise config.KubernetesError( + 'An error occurred while trying to fetch the reason ' + 'for pod scheduling failure. ' + f'Error: {common_utils.format_exception(e)}' + ) from None all_ready = True for node in new_nodes: From d8302f0dce4a8f9ee9afadba9c9c7368441ba781 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Thu, 14 Sep 2023 19:48:21 -0700 Subject: [PATCH 178/183] lint --- .../providers/kubernetes/node_provider.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 4b6422e7faf..632b02ab633 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -196,8 +196,10 @@ def _raise_pod_scheduling_errors(self, new_nodes): if 'Insufficient memory' in event_message: raise config.KubernetesError( lack_resource_msg.format(resource='memory')) - gpu_lf_keys = [lf.get_label_key() - for lf in kubernetes_utils.LABEL_FORMATTER_REGISTRY] + gpu_lf_keys = [ + lf.get_label_key() + for lf in kubernetes_utils.LABEL_FORMATTER_REGISTRY + ] if new_node.spec.node_selector: for label_key in new_node.spec.node_selector.keys(): if label_key in gpu_lf_keys: @@ -210,11 +212,9 @@ def _raise_pod_scheduling_errors(self, new_nodes): f'{lack_resource_msg.format(resource="GPU")} ' f'Verify if {new_node.spec.node_selector[label_key]}' ' is available in the cluster.') - raise config.KubernetesError( - f'{timeout_err_msg} ' - f'Pod status: {pod_status}' - f'Details: \'{event_message}\' ' - ) + raise config.KubernetesError(f'{timeout_err_msg} ' + f'Pod status: {pod_status}' + f'Details: \'{event_message}\' ') raise config.KubernetesError(f'{timeout_err_msg}') def create_node(self, node_config, tags, count): @@ -271,8 +271,7 @@ def create_node(self, node_config, tags, count): raise config.KubernetesError( 'An error occurred while trying to fetch the reason ' 'for pod scheduling failure. ' - f'Error: {common_utils.format_exception(e)}' - ) from None + f'Error: {common_utils.format_exception(e)}') from None all_ready = True for node in new_nodes: From fd2976ab348a6b2fdcb4194ffa61eefb9bb56cc7 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 15 Sep 2023 11:01:23 -0700 Subject: [PATCH 179/183] fix for ssh jump image_id --- sky/clouds/kubernetes.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index afe9ad116b0..d308d7c4c3d 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -212,6 +212,9 @@ def make_deploy_resources_variables( assert image_id.startswith('skypilot:') image_id = service_catalog.get_image_id_from_tag(image_id, clouds='kubernetes') + # TODO(romilb): Create a lightweight image for SSH jump host + sshjump_image = service_catalog.get_image_id_from_tag(self.IMAGE_CPU, + clouds='kubernetes') k8s_acc_label_key = None k8s_acc_label_value = None @@ -233,8 +236,7 @@ def make_deploy_resources_variables( 'k8s_acc_label_key': k8s_acc_label_key, 'k8s_acc_label_value': k8s_acc_label_value, 'k8s_sshjump_name': self.SKY_SSH_JUMP_NAME, - # TODO(romilb): Create a lightweight image for SSH jump host - 'k8s_sshjump_image': self.IMAGE_CPU, + 'k8s_sshjump_image': sshjump_image, # TODO(romilb): Allow user to specify custom images 'image_id': image_id, } From 9827bbbcec05a55a7da331c4312c1121a43c6cb5 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 15 Sep 2023 14:30:03 -0700 Subject: [PATCH 180/183] comments --- sky/authentication.py | 25 ++++++++++--------- .../providers/kubernetes/node_provider.py | 8 +++--- sky/templates/kubernetes-ray.yml.j2 | 4 ++- sky/utils/command_runner.py | 4 +-- sky/utils/command_runner.pyi | 5 +++- sky/utils/kubernetes/sshjump_lcm.py | 4 +-- sky/utils/kubernetes_utils.py | 22 +++++++++++++--- 7 files changed, 47 insertions(+), 25 deletions(-) diff --git a/sky/authentication.py b/sky/authentication.py index 6ac991e3310..c8523fdb0d4 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -380,11 +380,18 @@ def setup_scp_authentication(config: Dict[str, Any]) -> Dict[str, Any]: def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: # Default ssh session is established with kubectl port-forwarding with - # ClusterIP service + # ClusterIP service. nodeport_mode = kubernetes_utils.KubernetesNetworkingMode.NODEPORT port_forward_mode = kubernetes_utils.KubernetesNetworkingMode.PORTFORWARD - ssh_setup_mode = skypilot_config.get_nested(('kubernetes', 'networking'), + network_mode_str = skypilot_config.get_nested(('kubernetes', 'networking'), port_forward_mode.value) + try: + network_mode = kubernetes_utils.KubernetesNetworkingMode.from_str(network_mode_str) + except ValueError as e: + # Add message saying "Please check: ~/.sky/config.yaml" to the error + # message. + e.message += f'\nPlease check {skypilot_config.CONFIG_PATH}.' + raise get_or_generate_keys() # Run kubectl command to add the public key to the cluster. @@ -412,24 +419,18 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: raise ssh_jump_name = clouds.Kubernetes.SKY_SSH_JUMP_NAME - if ssh_setup_mode.lower() == nodeport_mode.value: - network_mode = nodeport_mode + if network_mode == nodeport_mode: service_type = kubernetes_utils.KubernetesServiceType.NODEPORT - - elif ssh_setup_mode.lower() == port_forward_mode.value: + elif network_mode == port_forward_mode: kubernetes_utils.check_port_forward_mode_dependencies() - network_mode = port_forward_mode # Using `kubectl port-forward` creates a direct tunnel to jump pod and # does not require opening any ports on Kubernetes nodes. As a result, # the service can be a simple ClusterIP service which we access with # `kubectl port-forward`. service_type = kubernetes_utils.KubernetesServiceType.CLUSTERIP else: - raise ValueError(f'Unsupported kubernetes networking mode: ' - f'{ssh_setup_mode}. The mode has to be either ' - f'\'{port_forward_mode.value}\' or ' - f'\'{nodeport_mode.value}\'. ' - 'Please check: ~/.sky/config.yaml') + # This should never happen because we check for this in from_str above. + raise ValueError(f'Unsupported networking mode: {network_mode_str}') # Setup service for SSH jump pod. We create the SSH jump service here # because we need to know the service IP address and port to set the # ssh_proxy_command in the autoscaler config. diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index 632b02ab633..b4496855006 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -319,12 +319,12 @@ def create_node(self, node_config, tags, count): # Kubernetes automatically populates containers with critical # environment variables, such as those for discovering services running # in the cluster and CUDA/nvidia environment variables. We need to - # update task environment variables with these env vars. This is needed - # for GPU support and service discovery. + # make sure these env vars are available in every task and ssh session. + # This is needed for GPU support and service discovery. # See https://github.com/skypilot-org/skypilot/issues/2287 for # more details. - # Capturing env vars from the pod's runtime and writes them to - # /etc/profile.d/ making them available for all users in future + # To do so, we capture env vars from the pod's runtime and write them to + # /etc/profile.d/, making them available for all users in future # shell sessions. set_k8s_env_var_cmd = [ '/bin/sh', '-c', diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 7e46915b434..79a5e4ccbe1 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -16,7 +16,9 @@ provider: type: external module: sky.skylet.providers.kubernetes.KubernetesNodeProvider - # Use False if running from outside of k8s cluster + # We use internal IPs since we set up a port-forward between the kubernetes + # cluster and the local machine, or directly use NodePort to reach the + # head node. use_internal_ips: true timeout: {{timeout}} diff --git a/sky/utils/command_runner.py b/sky/utils/command_runner.py index 7969694632c..a762172a0f9 100644 --- a/sky/utils/command_runner.py +++ b/sky/utils/command_runner.py @@ -87,8 +87,8 @@ def ssh_options_list( # process running ProxyCommand is kept running as long as the ssh session # is running and the ControlMaster keeps the session, which results in # 'ControlPersist' number of seconds delay per ssh commands ran. - if ssh_control_name is not None and docker_ssh_proxy_command is None \ - and not disable_control_master: + if (ssh_control_name is not None and docker_ssh_proxy_command is None + and not disable_control_master): arg_dict.update({ # Control path: important optimization as we do multiple ssh in one # sky.launch(). diff --git a/sky/utils/command_runner.pyi b/sky/utils/command_runner.pyi index 425f5c60213..893acb1e57b 100644 --- a/sky/utils/command_runner.pyi +++ b/sky/utils/command_runner.pyi @@ -24,8 +24,11 @@ def ssh_options_list( ssh_private_key: Optional[str], ssh_control_name: Optional[str], *, + ssh_proxy_command: Optional[str] = ..., + docker_ssh_proxy_command: Optional[str] = ..., timeout: int = ..., - disable_control_master: Optional[bool] = False, + port: int = ..., + disable_control_master: Optional[bool] = ..., ) -> List[str]: ... diff --git a/sky/utils/kubernetes/sshjump_lcm.py b/sky/utils/kubernetes/sshjump_lcm.py index e76672bf2f8..491d507a62d 100644 --- a/sky/utils/kubernetes/sshjump_lcm.py +++ b/sky/utils/kubernetes/sshjump_lcm.py @@ -60,8 +60,8 @@ def poll(): raise if len(ret.items) == 0: - sys.stdout.write(f'Did not pods with label "{label_selector}" in ' - f'namespace {current_namespace}\n') + sys.stdout.write(f'Did not find pods with label "{label_selector}" ' + f'in namespace {current_namespace}\n') nocluster_delta = nocluster_delta + retry_interval_delta sys.stdout.write( f'Time since no pods found: {nocluster_delta}, alert ' diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 80e9568e173..b753ccb37dd 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -41,6 +41,20 @@ class KubernetesNetworkingMode(enum.Enum): NODEPORT = 'nodeport' PORTFORWARD = 'portforward' + @classmethod + def from_str(cls, mode: str) -> 'KubernetesNetworkingMode': + """Returns the enum value for the given string.""" + if mode.lower() == cls.NODEPORT.value: + return cls.NODEPORT + elif mode.lower() == cls.PORTFORWARD.value: + return cls.PORTFORWARD + else: + raise ValueError(f'Unsupported kubernetes networking mode: ' + f'{mode}. The mode has to be either ' + f'\'{cls.PORTFORWARD.value}\' or ' + f'\'{cls.NODEPORT.value}\'. ') + + class KubernetesServiceType(enum.Enum): """Enum for the different types of services.""" @@ -741,7 +755,7 @@ def setup_sshjump_svc(ssh_jump_name: str, namespace: str, if service_type.value == curr_svc_type: # If the currently existing SSH Jump service's type is identical # to user's configuration for networking mode - logger.warning( + logger.debug( f'SSH Jump Service {ssh_jump_name} already exists in the ' 'cluster, using it.') else: @@ -938,5 +952,7 @@ def check_port_forward_mode_dependencies() -> None: f'`{name}` is required to setup Kubernetes cloud with ' f'`{KubernetesNetworkingMode.PORTFORWARD.value}` default ' 'networking mode and it is not installed. ' - 'For Debian/Ubuntu system, install it with:\n' - f' $ sudo apt install {name}') from None + 'On Debian/Ubuntu, install it with:\n' + f' $ sudo apt install {name}\n' + f'On MacOS, install it with: \n' + f' $ brew install {name}') from None From f74c9dfa55d31438ae657b3a2ecc599734e0e9f9 Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 15 Sep 2023 14:47:11 -0700 Subject: [PATCH 181/183] ssh jump refactor --- sky/authentication.py | 2 +- sky/clouds/kubernetes.py | 8 +- sky/skylet/providers/kubernetes/config.py | 8 +- .../providers/kubernetes/node_provider.py | 2 +- sky/templates/kubernetes-ray.yml.j2 | 4 +- ...jump.yml.j2 => kubernetes-ssh-jump.yml.j2} | 180 +++++++------- ...p_lcm.py => ssh_jump_lifecycle_manager.py} | 222 +++++++++--------- sky/utils/kubernetes_utils.py | 66 +++--- 8 files changed, 246 insertions(+), 246 deletions(-) rename sky/templates/{kubernetes-sshjump.yml.j2 => kubernetes-ssh-jump.yml.j2} (88%) rename sky/utils/kubernetes/{sshjump_lcm.py => ssh_jump_lifecycle_manager.py} (90%) diff --git a/sky/authentication.py b/sky/authentication.py index c8523fdb0d4..88930cbf688 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -435,7 +435,7 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: # because we need to know the service IP address and port to set the # ssh_proxy_command in the autoscaler config. namespace = kubernetes_utils.get_current_kube_config_context_namespace() - kubernetes_utils.setup_sshjump_svc(ssh_jump_name, namespace, service_type) + kubernetes_utils.setup_ssh_jump_svc(ssh_jump_name, namespace, service_type) ssh_proxy_cmd = kubernetes_utils.get_ssh_proxy_command( PRIVATE_SSH_KEY_PATH, ssh_jump_name, network_mode, namespace, diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index d308d7c4c3d..3651d309282 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -28,7 +28,7 @@ class Kubernetes(clouds.Cloud): """Kubernetes.""" SKY_SSH_KEY_SECRET_NAME = f'sky-ssh-{common_utils.get_user_hash()}' - SKY_SSH_JUMP_NAME = f'sky-sshjump-{common_utils.get_user_hash()}' + SKY_SSH_JUMP_NAME = f'sky-ssh-jump-{common_utils.get_user_hash()}' PORT_FORWARD_PROXY_CMD_TEMPLATE = \ 'kubernetes-port-forward-proxy-command.sh.j2' PORT_FORWARD_PROXY_CMD_PATH = '~/.sky/port-forward-proxy-cmd.sh' @@ -213,7 +213,7 @@ def make_deploy_resources_variables( image_id = service_catalog.get_image_id_from_tag(image_id, clouds='kubernetes') # TODO(romilb): Create a lightweight image for SSH jump host - sshjump_image = service_catalog.get_image_id_from_tag(self.IMAGE_CPU, + ssh_jump_image = service_catalog.get_image_id_from_tag(self.IMAGE_CPU, clouds='kubernetes') k8s_acc_label_key = None @@ -235,8 +235,8 @@ def make_deploy_resources_variables( 'k8s_ssh_key_secret_name': self.SKY_SSH_KEY_SECRET_NAME, 'k8s_acc_label_key': k8s_acc_label_key, 'k8s_acc_label_value': k8s_acc_label_value, - 'k8s_sshjump_name': self.SKY_SSH_JUMP_NAME, - 'k8s_sshjump_image': sshjump_image, + 'k8s_ssh_jump_name': self.SKY_SSH_JUMP_NAME, + 'k8s_ssh_jump_image': ssh_jump_image, # TODO(romilb): Allow user to specify custom images 'image_id': image_id, } diff --git a/sky/skylet/providers/kubernetes/config.py b/sky/skylet/providers/kubernetes/config.py index 92ca5d0c13c..2ab466f03ed 100644 --- a/sky/skylet/providers/kubernetes/config.py +++ b/sky/skylet/providers/kubernetes/config.py @@ -267,8 +267,8 @@ def _configure_ssh_jump(namespace, config): """ pod_cfg = config['available_node_types']['ray_head_default']['node_config'] - sshjump_name = pod_cfg['metadata']['labels']['skypilot-sshjump'] - sshjump_image = config['provider']['sshjump_image'] + ssh_jump_name = pod_cfg['metadata']['labels']['skypilot-ssh-jump'] + ssh_jump_image = config['provider']['ssh_jump_image'] volumes = pod_cfg['spec']['volumes'] # find 'secret-volume' and get the secret name @@ -288,8 +288,8 @@ def _configure_ssh_jump(namespace, config): # and available before we create the SSH jump pod. If for any reason the # service is missing, we should raise an error. - kubernetes_utils.setup_sshjump_pod(sshjump_name, sshjump_image, - ssh_key_secret_name, namespace) + kubernetes_utils.setup_ssh_jump_pod(ssh_jump_name, ssh_jump_image, + ssh_key_secret_name, namespace) return config diff --git a/sky/skylet/providers/kubernetes/node_provider.py b/sky/skylet/providers/kubernetes/node_provider.py index b4496855006..8963225cc3f 100644 --- a/sky/skylet/providers/kubernetes/node_provider.py +++ b/sky/skylet/providers/kubernetes/node_provider.py @@ -347,7 +347,7 @@ def create_node(self, node_config, tags, count): def terminate_node(self, node_id): logger.info(config.log_prefix + 'calling delete_namespaced_pod') try: - kubernetes_utils.clean_zombie_sshjump_pod(self.namespace, node_id) + kubernetes_utils.clean_zombie_ssh_jump_pod(self.namespace, node_id) except Exception as e: logger.warning(config.log_prefix + f'Error occurred when analyzing SSH Jump pod: {e}') diff --git a/sky/templates/kubernetes-ray.yml.j2 b/sky/templates/kubernetes-ray.yml.j2 index 79a5e4ccbe1..da8e4253290 100644 --- a/sky/templates/kubernetes-ray.yml.j2 +++ b/sky/templates/kubernetes-ray.yml.j2 @@ -23,7 +23,7 @@ provider: timeout: {{timeout}} - sshjump_image: {{k8s_sshjump_image}} + ssh_jump_image: {{k8s_ssh_jump_image}} # ServiceAccount created by the autoscaler for the head node pod that it # runs in. If this field isn't provided, the head pod config below must @@ -130,7 +130,7 @@ available_node_types: component: {{cluster_name_on_cloud}}-ray-head skypilot-cluster: {{cluster_name_on_cloud}} # Identifies the SSH jump pod used by this pod. Used in life cycle management of the ssh jump pod. - skypilot-sshjump: {{k8s_sshjump_name}} + skypilot-ssh-jump: {{k8s_ssh_jump_name}} spec: # Change this if you altered the autoscaler_service_account above # or want to provide your own. diff --git a/sky/templates/kubernetes-sshjump.yml.j2 b/sky/templates/kubernetes-ssh-jump.yml.j2 similarity index 88% rename from sky/templates/kubernetes-sshjump.yml.j2 rename to sky/templates/kubernetes-ssh-jump.yml.j2 index 7c7c3fbc877..a4c9929fe1e 100644 --- a/sky/templates/kubernetes-sshjump.yml.j2 +++ b/sky/templates/kubernetes-ssh-jump.yml.j2 @@ -1,90 +1,90 @@ -pod_spec: - apiVersion: v1 - kind: Pod - metadata: - name: {{ name }} - labels: - component: {{ name }} - parent: skypilot - spec: - serviceAccountName: sky-sshjump-sa - volumes: - - name: secret-volume - secret: - secretName: {{ secret }} - containers: - - name: {{ name }} - imagePullPolicy: Always - image: {{ image }} - command: ["python3", "-u", "/skypilot/sky/utils/kubernetes/sshjump_lcm.py"] - ports: - - containerPort: 22 - volumeMounts: - - name: secret-volume - readOnly: true - mountPath: /etc/secret-volume - lifecycle: - postStart: - exec: - command: ["/bin/bash", "-c", "mkdir -p ~/.ssh && cp /etc/secret-volume/ssh-publickey ~/.ssh/authorized_keys && sudo service ssh restart"] - env: - - name: MY_POD_NAME - valueFrom: - fieldRef: - fieldPath: metadata.name - - name: MY_POD_NAMESPACE - valueFrom: - fieldRef: - fieldPath: metadata.namespace - - name: ALERT_THRESHOLD - # seconds - value: "600" - - name: RETRY_INTERVAL - # seconds - value: "60" - terminationGracePeriodSeconds: 0 -service_spec: - apiVersion: v1 - kind: Service - metadata: - name: {{ name }} - labels: - parent: skypilot - spec: - type: {{ service_type }} - selector: - component: {{ name }} - ports: - - protocol: TCP - port: 22 - targetPort: 22 -# The following ServiceAccount/Role/RoleBinding sets up an RBAC for life cycle -# management of the jump pod/service -service_account: - apiVersion: v1 - kind: ServiceAccount - metadata: - name: sky-sshjump-sa - parent: skypilot -role: - kind: Role - apiVersion: rbac.authorization.k8s.io/v1 - metadata: - name: sky-sshjump-role - rules: - - apiGroups: [""] - resources: ["pods", "pods/status", "pods/exec", "services"] - verbs: ["get", "list", "create", "delete"] -role_binding: - apiVersion: rbac.authorization.k8s.io/v1 - kind: RoleBinding - metadata: - name: sky-sshjump-rb - parent: skypilot - subjects: - - kind: ServiceAccount - name: sky-sshjump-sa - roleRef: - kind: Role - name: sky-sshjump-role - apiGroup: rbac.authorization.k8s.io +pod_spec: + apiVersion: v1 + kind: Pod + metadata: + name: {{ name }} + labels: + component: {{ name }} + parent: skypilot + spec: + serviceAccountName: sky-ssh-jump-sa + volumes: + - name: secret-volume + secret: + secretName: {{ secret }} + containers: + - name: {{ name }} + imagePullPolicy: Always + image: {{ image }} + command: ["python3", "-u", "/skypilot/sky/utils/kubernetes/ssh_jump_lifecycle_manager.py"] + ports: + - containerPort: 22 + volumeMounts: + - name: secret-volume + readOnly: true + mountPath: /etc/secret-volume + lifecycle: + postStart: + exec: + command: ["/bin/bash", "-c", "mkdir -p ~/.ssh && cp /etc/secret-volume/ssh-publickey ~/.ssh/authorized_keys && sudo service ssh restart"] + env: + - name: MY_POD_NAME + valueFrom: + fieldRef: + fieldPath: metadata.name + - name: MY_POD_NAMESPACE + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: ALERT_THRESHOLD + # seconds + value: "600" + - name: RETRY_INTERVAL + # seconds + value: "60" + terminationGracePeriodSeconds: 0 +service_spec: + apiVersion: v1 + kind: Service + metadata: + name: {{ name }} + labels: + parent: skypilot + spec: + type: {{ service_type }} + selector: + component: {{ name }} + ports: + - protocol: TCP + port: 22 + targetPort: 22 +# The following ServiceAccount/Role/RoleBinding sets up an RBAC for life cycle +# management of the jump pod/service +service_account: + apiVersion: v1 + kind: ServiceAccount + metadata: + name: sky-ssh-jump-sa + parent: skypilot +role: + kind: Role + apiVersion: rbac.authorization.k8s.io/v1 + metadata: + name: sky-ssh-jump-role + rules: + - apiGroups: [""] + resources: ["pods", "pods/status", "pods/exec", "services"] + verbs: ["get", "list", "create", "delete"] +role_binding: + apiVersion: rbac.authorization.k8s.io/v1 + kind: RoleBinding + metadata: + name: sky-ssh-jump-rb + parent: skypilot + subjects: + - kind: ServiceAccount + name: sky-ssh-jump-sa + roleRef: + kind: Role + name: sky-ssh-jump-role + apiGroup: rbac.authorization.k8s.io diff --git a/sky/utils/kubernetes/sshjump_lcm.py b/sky/utils/kubernetes/ssh_jump_lifecycle_manager.py similarity index 90% rename from sky/utils/kubernetes/sshjump_lcm.py rename to sky/utils/kubernetes/ssh_jump_lifecycle_manager.py index 491d507a62d..05f6a8d7a42 100644 --- a/sky/utils/kubernetes/sshjump_lcm.py +++ b/sky/utils/kubernetes/ssh_jump_lifecycle_manager.py @@ -1,111 +1,111 @@ -"""Manages lifecycle of ssh jump pod. - -This script runs inside ssh jump pod as the main process (PID 1). - -It terminates itself (by removing sshjump service and pod via a call to -kubeapi), if it does not see ray pods in the duration of 10 minutes. If the -user re-launches a task before the duration is over, then ssh jump pod is being -reused and will terminate itself when it sees that no ray cluster exist in that -duration. -""" -import datetime -import os -import sys -import time - -from kubernetes import client -from kubernetes import config - -# Load kube config -config.load_incluster_config() - -v1 = client.CoreV1Api() - -current_name = os.getenv('MY_POD_NAME') -current_namespace = os.getenv('MY_POD_NAMESPACE') - -# The amount of time in seconds where no Ray pods exist in which after that time -# sshjump pod terminates itself -alert_threshold = int(os.getenv('ALERT_THRESHOLD', '600')) -# The amount of time in seconds to wait between Ray pods existence checks -retry_interval = int(os.getenv('RETRY_INTERVAL', '60')) - -# Ray pods are labeled with this value i.e., sshjump name which is unique per -# user (based on user hash) -label_selector = f'skypilot-sshjump={current_name}' - - -def poll(): - sys.stdout.write('Starting polling.\n') - - alert_delta = datetime.timedelta(seconds=alert_threshold) - - # Set delay for each retry - retry_interval_delta = datetime.timedelta(seconds=retry_interval) - - # Accumulated time of where no SkyPilot cluster exists. Used to compare - # against alert_threshold - nocluster_delta = datetime.timedelta() - - while True: - sys.stdout.write(f'Sleeping {retry_interval} seconds..\n') - time.sleep(retry_interval) - - # List the pods in the current namespace - try: - ret = v1.list_namespaced_pod(current_namespace, - label_selector=label_selector) - except Exception as e: - sys.stdout.write(f'Error: listing pods failed with error: {e}\n') - raise - - if len(ret.items) == 0: - sys.stdout.write(f'Did not find pods with label "{label_selector}" ' - f'in namespace {current_namespace}\n') - nocluster_delta = nocluster_delta + retry_interval_delta - sys.stdout.write( - f'Time since no pods found: {nocluster_delta}, alert ' - f'threshold: {alert_delta}\n') - else: - sys.stdout.write( - f'Found pods with label "{label_selector}" in namespace ' - f'{current_namespace}\n') - # reset .. - nocluster_delta = datetime.timedelta() - sys.stdout.write(f'noray_delta is reset: {nocluster_delta}\n') - - if nocluster_delta >= alert_delta: - sys.stdout.write( - f'nocluster_delta: {nocluster_delta} crossed alert threshold: ' - f'{alert_delta}. Time to terminate myself and my service.\n') - try: - # sshjump resources created under same name - v1.delete_namespaced_service(current_name, current_namespace) - v1.delete_namespaced_pod(current_name, current_namespace) - except Exception as e: - sys.stdout.write('[ERROR] Deletion failed. Exiting ' - f'poll() with error: {e}\n') - raise - - break - - sys.stdout.write('Done polling.\n') - - -def main(): - sys.stdout.write('SkyPilot SSH Jump Pod Lifecycle Manager\n') - sys.stdout.write(f'current_name: {current_name}\n') - sys.stdout.write(f'current_namespace: {current_namespace}\n') - sys.stdout.write(f'alert_threshold time: {alert_threshold}\n') - sys.stdout.write(f'retry_interval time: {retry_interval}\n') - sys.stdout.write(f'label_selector: {label_selector}\n') - - if not current_name or not current_namespace: - # Raise Exception with message to terminate pod - raise Exception('Missing environment variables MY_POD_NAME or ' - 'MY_POD_NAMESPACE') - poll() - - -if __name__ == '__main__': - main() +"""Manages lifecycle of ssh jump pod. + +This script runs inside ssh jump pod as the main process (PID 1). + +It terminates itself (by removing ssh jump service and pod via a call to +kubeapi), if it does not see ray pods in the duration of 10 minutes. If the +user re-launches a task before the duration is over, then ssh jump pod is being +reused and will terminate itself when it sees that no ray cluster exist in that +duration. +""" +import datetime +import os +import sys +import time + +from kubernetes import client +from kubernetes import config + +# Load kube config +config.load_incluster_config() + +v1 = client.CoreV1Api() + +current_name = os.getenv('MY_POD_NAME') +current_namespace = os.getenv('MY_POD_NAMESPACE') + +# The amount of time in seconds where no Ray pods exist in which after that time +# ssh jump pod terminates itself +alert_threshold = int(os.getenv('ALERT_THRESHOLD', '600')) +# The amount of time in seconds to wait between Ray pods existence checks +retry_interval = int(os.getenv('RETRY_INTERVAL', '60')) + +# Ray pods are labeled with this value i.e., ssh jump name which is unique per +# user (based on user hash) +label_selector = f'skypilot-ssh-jump={current_name}' + + +def poll(): + sys.stdout.write('Starting polling.\n') + + alert_delta = datetime.timedelta(seconds=alert_threshold) + + # Set delay for each retry + retry_interval_delta = datetime.timedelta(seconds=retry_interval) + + # Accumulated time of where no SkyPilot cluster exists. Used to compare + # against alert_threshold + nocluster_delta = datetime.timedelta() + + while True: + sys.stdout.write(f'Sleeping {retry_interval} seconds..\n') + time.sleep(retry_interval) + + # List the pods in the current namespace + try: + ret = v1.list_namespaced_pod(current_namespace, + label_selector=label_selector) + except Exception as e: + sys.stdout.write(f'Error: listing pods failed with error: {e}\n') + raise + + if len(ret.items) == 0: + sys.stdout.write(f'Did not find pods with label "{label_selector}" ' + f'in namespace {current_namespace}\n') + nocluster_delta = nocluster_delta + retry_interval_delta + sys.stdout.write( + f'Time since no pods found: {nocluster_delta}, alert ' + f'threshold: {alert_delta}\n') + else: + sys.stdout.write( + f'Found pods with label "{label_selector}" in namespace ' + f'{current_namespace}\n') + # reset .. + nocluster_delta = datetime.timedelta() + sys.stdout.write(f'noray_delta is reset: {nocluster_delta}\n') + + if nocluster_delta >= alert_delta: + sys.stdout.write( + f'nocluster_delta: {nocluster_delta} crossed alert threshold: ' + f'{alert_delta}. Time to terminate myself and my service.\n') + try: + # ssh jump resources created under same name + v1.delete_namespaced_service(current_name, current_namespace) + v1.delete_namespaced_pod(current_name, current_namespace) + except Exception as e: + sys.stdout.write('[ERROR] Deletion failed. Exiting ' + f'poll() with error: {e}\n') + raise + + break + + sys.stdout.write('Done polling.\n') + + +def main(): + sys.stdout.write('SkyPilot SSH Jump Pod Lifecycle Manager\n') + sys.stdout.write(f'current_name: {current_name}\n') + sys.stdout.write(f'current_namespace: {current_namespace}\n') + sys.stdout.write(f'alert_threshold time: {alert_threshold}\n') + sys.stdout.write(f'retry_interval time: {retry_interval}\n') + sys.stdout.write(f'label_selector: {label_selector}\n') + + if not current_name or not current_namespace: + # Raise Exception with message to terminate pod + raise Exception('Missing environment variables MY_POD_NAME or ' + 'MY_POD_NAMESPACE') + poll() + + +if __name__ == '__main__': + main() diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index b753ccb37dd..255012cb474 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -726,22 +726,22 @@ def get_ssh_proxy_command(private_key_path: str, ssh_jump_name: str, return ssh_jump_proxy_command -def setup_sshjump_svc(ssh_jump_name: str, namespace: str, - service_type: KubernetesServiceType): +def setup_ssh_jump_svc(ssh_jump_name: str, namespace: str, + service_type: KubernetesServiceType): """Sets up Kubernetes service resource to access for SSH jump pod. This method acts as a necessary complement to be run along with - setup_sshjump_pod(...) method. This service ensures the pod is accessible. + setup_ssh_jump_pod(...) method. This service ensures the pod is accessible. Args: - sshjump_name: Name to use for the SSH jump service + ssh_jump_name: Name to use for the SSH jump service namespace: Namespace to create the SSH jump service in service_type: Networking configuration on either to use NodePort or ClusterIP service to ssh in """ - # Fill in template - ssh_key_secret and sshjump_image are not required for + # Fill in template - ssh_key_secret and ssh_jump_image are not required for # the service spec, so we pass in empty strs. - content = fill_sshjump_template('', '', ssh_jump_name, service_type.value) + content = fill_ssh_jump_template('', '', ssh_jump_name, service_type.value) # Create service try: kubernetes.core_api().create_namespaced_service(namespace, @@ -788,8 +788,8 @@ def setup_sshjump_svc(ssh_jump_name: str, namespace: str, logger.info(f'Created SSH Jump Service {ssh_jump_name}.') -def setup_sshjump_pod(sshjump_name: str, sshjump_image: str, - ssh_key_secret: str, namespace: str): +def setup_ssh_jump_pod(ssh_jump_name: str, ssh_jump_image: str, + ssh_key_secret: str, namespace: str): """Sets up Kubernetes RBAC and pod for SSH jump host. Our Kubernetes implementation uses a SSH jump pod to reach SkyPilot clusters @@ -798,18 +798,18 @@ def setup_sshjump_pod(sshjump_name: str, sshjump_image: str, permission to watch for other SkyPilot pods and terminate itself if there are no SkyPilot pods running. - setup_sshjump_service must also be run to ensure that the SSH jump pod is + setup_ssh_jump_service must also be run to ensure that the SSH jump pod is reachable. Args: - sshjump_image: Container image to use for the SSH jump pod - sshjump_name: Name to use for the SSH jump pod + ssh_jump_image: Container image to use for the SSH jump pod + ssh_jump_name: Name to use for the SSH jump pod ssh_key_secret: Secret name for the SSH key stored in the cluster namespace: Namespace to create the SSH jump pod in """ # Fill in template - service is created separately so service_type is not # required, so we pass in empty str. - content = fill_sshjump_template(ssh_key_secret, sshjump_image, sshjump_name, + content = fill_ssh_jump_template(ssh_key_secret, ssh_jump_image, ssh_jump_name, '') # ServiceAccount try: @@ -855,20 +855,20 @@ def setup_sshjump_pod(sshjump_name: str, sshjump_image: str, except kubernetes.api_exception() as e: if e.status == 409: logger.info( - f'SSH Jump Host {sshjump_name} already exists in the cluster, ' + f'SSH Jump Host {ssh_jump_name} already exists in the cluster, ' 'using it.') else: raise else: - logger.info(f'Created SSH Jump Host {sshjump_name}.') + logger.info(f'Created SSH Jump Host {ssh_jump_name}.') -def clean_zombie_sshjump_pod(namespace: str, node_id: str): +def clean_zombie_ssh_jump_pod(namespace: str, node_id: str): """Analyzes SSH jump pod and removes if it is in a bad state Prevents the existence of a dangling SSH jump pod. This could happen in case the pod main container did not start properly (or failed). In that - case, jump pod lifecycle management (LCM) will not functioning properly to + case, jump pod lifecycle manager will not function properly to remove the pod and service automatically, and must be done manually. Args: @@ -890,48 +890,48 @@ def find(l, predicate): ' but the pod was not found (404).') raise else: - sshjump_name = pod.metadata.labels.get('skypilot-sshjump') + ssh_jump_name = pod.metadata.labels.get('skypilot-ssh-jump') try: - sshjump_pod = kubernetes.core_api().read_namespaced_pod( - sshjump_name, namespace) - cont_ready_cond = find(sshjump_pod.status.conditions, + ssh_jump_pod = kubernetes.core_api().read_namespaced_pod( + ssh_jump_name, namespace) + cont_ready_cond = find(ssh_jump_pod.status.conditions, lambda c: c.type == 'ContainersReady') if cont_ready_cond and \ cont_ready_cond.status == 'False': # The main container is not ready. To be on the safe side - # and prevent a dangling sshjump pod, lets remove it and + # and prevent a dangling ssh jump pod, lets remove it and # the service. Otherwise main container is ready and its lifecycle # management script takes care of the cleaning. - kubernetes.core_api().delete_namespaced_pod(sshjump_name, namespace) + kubernetes.core_api().delete_namespaced_pod(ssh_jump_name, namespace) kubernetes.core_api().delete_namespaced_service( - sshjump_name, namespace) + ssh_jump_name, namespace) # only warn and proceed as usual except kubernetes.api_exception() as e: - logger.warning(f'Tried to check sshjump pod {sshjump_name},' + logger.warning(f'Tried to check ssh jump pod {ssh_jump_name},' f' but got error {e}\n. Consider running `kubectl ' - f'delete pod {sshjump_name} -n {namespace}` to manually ' + f'delete pod {ssh_jump_name} -n {namespace}` to manually ' 'remove the pod if it has crashed.') - # We encountered an issue while checking sshjump pod. To be on + # We encountered an issue while checking ssh jump pod. To be on # the safe side, lets remove its service so the port is freed try: kubernetes.core_api().delete_namespaced_service( - sshjump_name, namespace) + ssh_jump_name, namespace) except kubernetes.api_exception(): pass -def fill_sshjump_template(ssh_key_secret: str, sshjump_image: str, - sshjump_name: str, service_type: str) -> Dict: +def fill_ssh_jump_template(ssh_key_secret: str, ssh_jump_image: str, + ssh_jump_name: str, service_type: str) -> Dict: template_path = os.path.join(sky.__root_dir__, 'templates', - 'kubernetes-sshjump.yml.j2') + 'kubernetes-ssh-jump.yml.j2') if not os.path.exists(template_path): raise FileNotFoundError( - 'Template "kubernetes-sshjump.j2" does not exist.') + 'Template "kubernetes-ssh-jump.j2" does not exist.') with open(template_path) as fin: template = fin.read() j2_template = jinja2.Template(template) - cont = j2_template.render(name=sshjump_name, - image=sshjump_image, + cont = j2_template.render(name=ssh_jump_name, + image=ssh_jump_image, secret=ssh_key_secret, service_type=service_type) content = yaml.safe_load(cont) From 657cd6fd7793a06253bfb2fa76f281e178b734cf Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 15 Sep 2023 14:54:38 -0700 Subject: [PATCH 182/183] lint --- sky/authentication.py | 10 ++++++---- sky/clouds/kubernetes.py | 4 ++-- sky/utils/command_runner.py | 4 ++-- sky/utils/kubernetes_utils.py | 19 ++++++++++--------- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/sky/authentication.py b/sky/authentication.py index 88930cbf688..022dba9264c 100644 --- a/sky/authentication.py +++ b/sky/authentication.py @@ -384,14 +384,16 @@ def setup_kubernetes_authentication(config: Dict[str, Any]) -> Dict[str, Any]: nodeport_mode = kubernetes_utils.KubernetesNetworkingMode.NODEPORT port_forward_mode = kubernetes_utils.KubernetesNetworkingMode.PORTFORWARD network_mode_str = skypilot_config.get_nested(('kubernetes', 'networking'), - port_forward_mode.value) + port_forward_mode.value) try: - network_mode = kubernetes_utils.KubernetesNetworkingMode.from_str(network_mode_str) + network_mode = kubernetes_utils.KubernetesNetworkingMode.from_str( + network_mode_str) except ValueError as e: # Add message saying "Please check: ~/.sky/config.yaml" to the error # message. - e.message += f'\nPlease check {skypilot_config.CONFIG_PATH}.' - raise + with ux_utils.print_exception_no_traceback(): + raise ValueError(str(e) + ' Please check: ~/.sky/config.yaml.') \ + from None get_or_generate_keys() # Run kubectl command to add the public key to the cluster. diff --git a/sky/clouds/kubernetes.py b/sky/clouds/kubernetes.py index 3651d309282..50b358755ed 100644 --- a/sky/clouds/kubernetes.py +++ b/sky/clouds/kubernetes.py @@ -213,8 +213,8 @@ def make_deploy_resources_variables( image_id = service_catalog.get_image_id_from_tag(image_id, clouds='kubernetes') # TODO(romilb): Create a lightweight image for SSH jump host - ssh_jump_image = service_catalog.get_image_id_from_tag(self.IMAGE_CPU, - clouds='kubernetes') + ssh_jump_image = service_catalog.get_image_id_from_tag( + self.IMAGE_CPU, clouds='kubernetes') k8s_acc_label_key = None k8s_acc_label_value = None diff --git a/sky/utils/command_runner.py b/sky/utils/command_runner.py index a762172a0f9..c3dd73eb345 100644 --- a/sky/utils/command_runner.py +++ b/sky/utils/command_runner.py @@ -87,8 +87,8 @@ def ssh_options_list( # process running ProxyCommand is kept running as long as the ssh session # is running and the ControlMaster keeps the session, which results in # 'ControlPersist' number of seconds delay per ssh commands ran. - if (ssh_control_name is not None and docker_ssh_proxy_command is None - and not disable_control_master): + if (ssh_control_name is not None and docker_ssh_proxy_command is None and + not disable_control_master): arg_dict.update({ # Control path: important optimization as we do multiple ssh in one # sky.launch(). diff --git a/sky/utils/kubernetes_utils.py b/sky/utils/kubernetes_utils.py index 255012cb474..cadd395bcd0 100644 --- a/sky/utils/kubernetes_utils.py +++ b/sky/utils/kubernetes_utils.py @@ -50,12 +50,11 @@ def from_str(cls, mode: str) -> 'KubernetesNetworkingMode': return cls.PORTFORWARD else: raise ValueError(f'Unsupported kubernetes networking mode: ' - f'{mode}. The mode has to be either ' + f'{mode}. The mode must be either ' f'\'{cls.PORTFORWARD.value}\' or ' f'\'{cls.NODEPORT.value}\'. ') - class KubernetesServiceType(enum.Enum): """Enum for the different types of services.""" NODEPORT = 'NodePort' @@ -809,8 +808,8 @@ def setup_ssh_jump_pod(ssh_jump_name: str, ssh_jump_image: str, """ # Fill in template - service is created separately so service_type is not # required, so we pass in empty str. - content = fill_ssh_jump_template(ssh_key_secret, ssh_jump_image, ssh_jump_name, - '') + content = fill_ssh_jump_template(ssh_key_secret, ssh_jump_image, + ssh_jump_name, '') # ServiceAccount try: kubernetes.core_api().create_namespaced_service_account( @@ -902,15 +901,17 @@ def find(l, predicate): # and prevent a dangling ssh jump pod, lets remove it and # the service. Otherwise main container is ready and its lifecycle # management script takes care of the cleaning. - kubernetes.core_api().delete_namespaced_pod(ssh_jump_name, namespace) + kubernetes.core_api().delete_namespaced_pod(ssh_jump_name, + namespace) kubernetes.core_api().delete_namespaced_service( ssh_jump_name, namespace) # only warn and proceed as usual except kubernetes.api_exception() as e: - logger.warning(f'Tried to check ssh jump pod {ssh_jump_name},' - f' but got error {e}\n. Consider running `kubectl ' - f'delete pod {ssh_jump_name} -n {namespace}` to manually ' - 'remove the pod if it has crashed.') + logger.warning( + f'Tried to check ssh jump pod {ssh_jump_name},' + f' but got error {e}\n. Consider running `kubectl ' + f'delete pod {ssh_jump_name} -n {namespace}` to manually ' + 'remove the pod if it has crashed.') # We encountered an issue while checking ssh jump pod. To be on # the safe side, lets remove its service so the port is freed try: From 9c4e3387878cb38b6ffaee4c75a0980dc316546f Mon Sep 17 00:00:00 2001 From: Romil Bhardwaj Date: Fri, 15 Sep 2023 15:40:42 -0700 Subject: [PATCH 183/183] image build fixes --- tests/kubernetes/build_image.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/kubernetes/build_image.sh b/tests/kubernetes/build_image.sh index 84c3715e1e4..675a15b12bc 100755 --- a/tests/kubernetes/build_image.sh +++ b/tests/kubernetes/build_image.sh @@ -32,7 +32,7 @@ while getopts ":pg" opt; do done # Add -gpu to the tag if the GPU image is being built -if [[ $gpu ]]; then +if [[ $gpu == "true" ]]; then TAG=$TAG-gpu:latest else TAG=$TAG:latest