Skip to content

Commit

Permalink
[Core] Make the IP fetch more robust (#3025)
Browse files Browse the repository at this point in the history
* [Core] Make the IP fetch more robust

* add comment

* fix UX
  • Loading branch information
Michaelvll authored Jan 30, 2024
1 parent 9078de2 commit ef21192
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 4 deletions.
16 changes: 13 additions & 3 deletions sky/backends/backend_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -1394,9 +1394,19 @@ def get_node_ips(
assert cloud is not None, provider_name

if cloud.PROVISIONER_VERSION >= clouds.ProvisionerVersion.SKYPILOT:
metadata = provision_lib.get_cluster_info(
provider_name, ray_config['provider'].get('region'),
ray_config['cluster_name'], ray_config['provider'])
try:
metadata = provision_lib.get_cluster_info(
provider_name, ray_config['provider'].get('region'),
ray_config['cluster_name'], ray_config['provider'])
except Exception as e: # pylint: disable=broad-except
# This could happen when the VM is not fully launched, and a user
# is trying to terminate it with `sky down`.
logger.debug(
'Failed to get cluster info for '
f'{ray_config["cluster_name"]} from the new provisioner '
f'with {common_utils.format_exception(e)}.')
raise exceptions.FetchIPError(
exceptions.FetchIPError.Reason.HEAD) from e
if len(metadata.instances) < expected_num_nodes:
# Simulate the exception when Ray head node is not up.
raise exceptions.FetchIPError(exceptions.FetchIPError.Reason.HEAD)
Expand Down
2 changes: 1 addition & 1 deletion sky/provision/runpod/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def terminate_instances(
del provider_config # unused
instances = _filter_instances(cluster_name_on_cloud, None)
for inst_id, inst in instances.items():
logger.info(f'Terminating instance {inst_id}: {inst}')
logger.debug(f'Terminating instance {inst_id}: {inst}')
if worker_only and inst['name'].endswith('-head'):
continue
try:
Expand Down

0 comments on commit ef21192

Please sign in to comment.