Skip to content

Commit

Permalink
[Azure] SkyPilot provisioner for Azure (#3704)
Browse files Browse the repository at this point in the history
* Use SkyPilot for status query

* format

* Avoid reconfig

* Add todo

* Add termination and stopping

* add stop and termination into __init__

* get rid of azure special handling in backend

* format

* Fix filtering for autodown clusters

* Move NSG waiting

* wip

* wip

* working?

* Fix and format

* remove node providers

* Add manifest and fix formating

* Fix waiting for deletion

* remove azure provider format

* Skip termination for resource group does not exist

* Add retry for fetching subscription ID

* Fix provisioning state

* Fix restarting instances by adding wait for pendings

* fixs

* fix

* Add azure handler

* adopt changes from node provider

* format

* fix merge conflict

* format

* Add detailed reason

* fix import

* Fix backward compat

* fix head node fetching

* format

* fix existing instances

* backward compat test for multi-node

* backward compat for cached cluster info

* fix back compat for provisioner update

* minor

* fix restarting

* revert accidental changes

* fix logging controller utils

* add path

* activate python env for sky jobs logs

* fix quote

* format

* Longer timeout for docker initialization

* fix

* make cloud init more readable

* fix

* fix docker

* fix tests

* add region argument for eu-south-1 region

* Add --region argument for storage aws s3

* Fix tests

* longer

* wip

* wip

* address comments

* revert storage

* revert changes
  • Loading branch information
Michaelvll committed Aug 23, 2024
1 parent 68fafe7 commit 454ed47
Show file tree
Hide file tree
Showing 37 changed files with 1,142 additions and 1,272 deletions.
5 changes: 1 addition & 4 deletions .github/workflows/format.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,18 +35,15 @@ jobs:
- name: Running yapf
run: |
yapf --diff --recursive ./ --exclude 'sky/skylet/ray_patches/**' \
--exclude 'sky/skylet/providers/azure/**' \
--exclude 'sky/skylet/providers/ibm/**'
- name: Running black
run: |
black --diff --check sky/skylet/providers/azure/ \
sky/skylet/providers/ibm/
black --diff --check sky/skylet/providers/ibm/
- name: Running isort for black formatted files
run: |
isort --diff --check --profile black -l 88 -m 3 \
sky/skylet/providers/ibm/
- name: Running isort for yapf formatted files
run: |
isort --diff --check ./ --sg 'sky/skylet/ray_patches/**' \
--sg 'sky/skylet/providers/azure/**' \
--sg 'sky/skylet/providers/ibm/**'
3 changes: 0 additions & 3 deletions format.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,18 +48,15 @@ YAPF_FLAGS=(

YAPF_EXCLUDES=(
'--exclude' 'build/**'
'--exclude' 'sky/skylet/providers/azure/**'
'--exclude' 'sky/skylet/providers/ibm/**'
)

ISORT_YAPF_EXCLUDES=(
'--sg' 'build/**'
'--sg' 'sky/skylet/providers/azure/**'
'--sg' 'sky/skylet/providers/ibm/**'
)

BLACK_INCLUDES=(
'sky/skylet/providers/azure'
'sky/skylet/providers/ibm'
)

Expand Down
7 changes: 7 additions & 0 deletions sky/adaptors/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,3 +82,10 @@ def get_client(name: str, subscription_id: str):
def create_security_rule(**kwargs):
from azure.mgmt.network.models import SecurityRule
return SecurityRule(**kwargs)


@common.load_lazy_modules(modules=_LAZY_MODULES)
def deployment_mode():
"""Azure deployment mode."""
from azure.mgmt.resource.resources.models import DeploymentMode
return DeploymentMode
31 changes: 0 additions & 31 deletions sky/authentication.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
is an exception, due to the limitation of the cloud provider. See the
comments in setup_lambda_authentication)
"""
import base64
import copy
import functools
import os
Expand Down Expand Up @@ -270,36 +269,6 @@ def setup_gcp_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
return configure_ssh_info(config)


# In Azure, cloud-init script must be encoded in base64. See
# https://learn.microsoft.com/en-us/azure/virtual-machines/custom-data
# for more information. Here we decode it and replace the ssh user
# and public key content, then encode it back.
def setup_azure_authentication(config: Dict[str, Any]) -> Dict[str, Any]:
_, public_key_path = get_or_generate_keys()
with open(public_key_path, 'r', encoding='utf-8') as f:
public_key = f.read().strip()
for node_type in config['available_node_types']:
node_config = config['available_node_types'][node_type]['node_config']
cloud_init = (
node_config['azure_arm_parameters']['cloudInitSetupCommands'])
cloud_init = base64.b64decode(cloud_init).decode('utf-8')
cloud_init = cloud_init.replace('skypilot:ssh_user',
config['auth']['ssh_user'])
cloud_init = cloud_init.replace('skypilot:ssh_public_key_content',
public_key)
cloud_init = base64.b64encode(
cloud_init.encode('utf-8')).decode('utf-8')
node_config['azure_arm_parameters']['cloudInitSetupCommands'] = (
cloud_init)
config_str = common_utils.dump_yaml_str(config)
config_str = config_str.replace('skypilot:ssh_user',
config['auth']['ssh_user'])
config_str = config_str.replace('skypilot:ssh_public_key_content',
public_key)
config = yaml.safe_load(config_str)
return config


def setup_lambda_authentication(config: Dict[str, Any]) -> Dict[str, Any]:

get_or_generate_keys()
Expand Down
16 changes: 11 additions & 5 deletions sky/backends/backend_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,8 @@
('available_node_types', 'ray.head.default', 'node_config',
'IamInstanceProfile'),
('available_node_types', 'ray.head.default', 'node_config', 'UserData'),
('available_node_types', 'ray.worker.default', 'node_config', 'UserData'),
('available_node_types', 'ray.head.default', 'node_config',
'azure_arm_parameters', 'cloudInitSetupCommands'),
]


Expand Down Expand Up @@ -1019,13 +1020,18 @@ def _add_auth_to_cluster_config(cloud: clouds.Cloud, cluster_config_file: str):
"""
config = common_utils.read_yaml(cluster_config_file)
# Check the availability of the cloud type.
if isinstance(cloud, (clouds.AWS, clouds.OCI, clouds.SCP, clouds.Vsphere,
clouds.Cudo, clouds.Paperspace)):
if isinstance(cloud, (
clouds.AWS,
clouds.OCI,
clouds.SCP,
clouds.Vsphere,
clouds.Cudo,
clouds.Paperspace,
clouds.Azure,
)):
config = auth.configure_ssh_info(config)
elif isinstance(cloud, clouds.GCP):
config = auth.setup_gcp_authentication(config)
elif isinstance(cloud, clouds.Azure):
config = auth.setup_azure_authentication(config)
elif isinstance(cloud, clouds.Lambda):
config = auth.setup_lambda_authentication(config)
elif isinstance(cloud, clouds.Kubernetes):
Expand Down
Loading

0 comments on commit 454ed47

Please sign in to comment.