From f7cd5ad7e63c5077519212a96d80c58795935cba Mon Sep 17 00:00:00 2001 From: Zhanghao Wu Date: Thu, 4 Jul 2024 00:04:22 -0700 Subject: [PATCH] [Cudo] Allow opening ports for cudo (#3717) * Allow opening ports for cudo * fix logging * format * Avoid host controller for cudo * install cudoctl on controller * fix cudoctl installation * update cudo controller message --- sky/clouds/cudo.py | 4 ++++ sky/provision/cudo/__init__.py | 3 ++- sky/provision/cudo/instance.py | 15 ++++++++++++--- sky/utils/controller_utils.py | 13 +++++++------ 4 files changed, 25 insertions(+), 10 deletions(-) diff --git a/sky/clouds/cudo.py b/sky/clouds/cudo.py index 1a32bb0bd2c..3ad66306517 100644 --- a/sky/clouds/cudo.py +++ b/sky/clouds/cudo.py @@ -66,6 +66,10 @@ class Cudo(clouds.Cloud): clouds.CloudImplementationFeatures.DOCKER_IMAGE: ('Docker image is currently not supported on Cudo. You can try ' 'running docker command inside the `run` section in task.yaml.'), + clouds.CloudImplementationFeatures.HOST_CONTROLLERS: ( + 'Cudo Compute cannot host a controller as it does not ' + 'autostopping, which will leave the controller to run indefinitely.' + ), } _MAX_CLUSTER_NAME_LEN_LIMIT = 60 diff --git a/sky/provision/cudo/__init__.py b/sky/provision/cudo/__init__.py index bbdc96413a8..c4587bfdfa7 100644 --- a/sky/provision/cudo/__init__.py +++ b/sky/provision/cudo/__init__.py @@ -3,6 +3,7 @@ from sky.provision.cudo.config import bootstrap_instances from sky.provision.cudo.instance import cleanup_ports from sky.provision.cudo.instance import get_cluster_info +from sky.provision.cudo.instance import open_ports from sky.provision.cudo.instance import query_instances from sky.provision.cudo.instance import run_instances from sky.provision.cudo.instance import stop_instances @@ -11,4 +12,4 @@ __all__ = ('bootstrap_instances', 'run_instances', 'stop_instances', 'terminate_instances', 'wait_instances', 'get_cluster_info', - 'cleanup_ports', 'query_instances') + 'cleanup_ports', 'query_instances', 'open_ports') diff --git a/sky/provision/cudo/instance.py b/sky/provision/cudo/instance.py index 71ada577e53..5f7473a4d93 100644 --- a/sky/provision/cudo/instance.py +++ b/sky/provision/cudo/instance.py @@ -157,11 +157,10 @@ def terminate_instances( del provider_config instances = _filter_instances(cluster_name_on_cloud, None) for inst_id, inst in instances.items(): - logger.info(f'Terminating instance {inst_id}.' - f'{inst}') if worker_only and inst['name'].endswith('-head'): continue - logger.info(f'Removing {inst_id}: {inst}') + logger.debug(f'Terminating Cudo instance {inst_id}.' + f'{inst}') cudo_wrapper.remove(inst_id) @@ -220,6 +219,16 @@ def query_instances( return statuses +def open_ports( + cluster_name_on_cloud: str, + ports: List[str], + provider_config: Optional[Dict[str, Any]] = None, +) -> None: + del cluster_name_on_cloud, ports, provider_config + # Cudo has all ports open by default. Nothing to do here. + return + + def cleanup_ports( cluster_name_on_cloud: str, ports: List[str], diff --git a/sky/utils/controller_utils.py b/sky/utils/controller_utils.py index ba65d4b664a..5a44e318985 100644 --- a/sky/utils/controller_utils.py +++ b/sky/utils/controller_utils.py @@ -247,6 +247,13 @@ def _get_cloud_dependencies_installation_commands( '/bin/linux/amd64/kubectl" && ' 'sudo install -o root -g root -m 0755 ' 'kubectl /usr/local/bin/kubectl))') + elif isinstance(cloud, clouds.Cudo): + commands.append( + f'echo -en "\\r{prefix_str}Cudo{empty_str}" && ' + 'pip list | grep cudo-compute > /dev/null 2>&1 || ' + 'pip install "cudo-compute>=0.1.10" > /dev/null 2>&1 && ' + 'wget https://download.cudo.org/compute/cudoctl-0.3.2-amd64.deb -O ~/cudoctl.deb > /dev/null 2>&1 && ' # pylint: disable=line-too-long + 'sudo dpkg -i ~/cudoctl.deb > /dev/null 2>&1') if controller == Controllers.JOBS_CONTROLLER: if isinstance(cloud, clouds.IBM): commands.append( @@ -263,12 +270,6 @@ def _get_cloud_dependencies_installation_commands( f'echo -en "\\r{prefix_str}RunPod{empty_str}" && ' 'pip list | grep runpod > /dev/null 2>&1 || ' 'pip install "runpod>=1.5.1" > /dev/null 2>&1') - elif isinstance(cloud, clouds.Cudo): - # cudo doesn't support open port - commands.append( - f'echo -en "\\r{prefix_str}Cudo{empty_str}" && ' - 'pip list | grep cudo-compute > /dev/null 2>&1 || ' - 'pip install "cudo-compute>=0.1.8" > /dev/null 2>&1') if (cloudflare.NAME in storage_lib.get_cached_enabled_storage_clouds_or_refresh()): commands.append(f'echo -en "\\r{prefix_str}Cloudflare{empty_str}" && ' +