From 08ce68908c16b47432f96fe867ae53c7f3557f48 Mon Sep 17 00:00:00 2001 From: cblmemo Date: Sun, 3 Sep 2023 11:36:26 -0700 Subject: [PATCH] Azure finished --- sky/adaptors/azure.py | 37 ++++++++++++- sky/backends/cloud_vm_ray_backend.py | 9 +-- sky/provision/azure/__init__.py | 4 ++ sky/provision/azure/instance.py | 83 ++++++++++++++++++++++++++++ sky/skylet/providers/azure/config.py | 26 --------- 5 files changed, 123 insertions(+), 36 deletions(-) create mode 100644 sky/provision/azure/__init__.py create mode 100644 sky/provision/azure/instance.py diff --git a/sky/adaptors/azure.py b/sky/adaptors/azure.py index b455a24792e..cd695b6ad01 100644 --- a/sky/adaptors/azure.py +++ b/sky/adaptors/azure.py @@ -1,14 +1,15 @@ """Azure cli adaptor""" -# pylint: disable=import-outside-toplevel -from functools import wraps +import functools +import threading azure = None +_session_creation_lock = threading.RLock() def import_package(func): - @wraps(func) + @functools.wraps(func) def wrapper(*args, **kwargs): global azure if azure is None: @@ -35,3 +36,33 @@ def get_current_account_user() -> str: """Get the default account user.""" from azure.common import credentials return credentials.get_cli_profile().get_current_account_user() + + +@import_package +def http_error_exception(): + """HttpError exception.""" + from azure.core import exceptions + return exceptions.HttpResponseError + + +@functools.lru_cache() +@import_package +def get_client(name: str, subscription_id: str): + # Sky only supports Azure CLI credential for now. + # Increase the timeout to fix the Azure get-access-token timeout issue. + # Tracked in + # https://github.com/Azure/azure-cli/issues/20404#issuecomment-1249575110 + from azure.identity import AzureCliCredential + from azure.mgmt.network import NetworkManagementClient + from azure.mgmt.resource import ResourceManagementClient + with _session_creation_lock: + credential = AzureCliCredential(process_timeout=30) + if name == 'compute': + from azure.mgmt.compute import ComputeManagementClient + return ComputeManagementClient(credential, subscription_id) + elif name == 'network': + return NetworkManagementClient(credential, subscription_id) + elif name == 'resource': + return ResourceManagementClient(credential, subscription_id) + else: + raise ValueError(f'Client not supported: "{name}"') diff --git a/sky/backends/cloud_vm_ray_backend.py b/sky/backends/cloud_vm_ray_backend.py index 7546a4977d0..a2bfa3c5a3f 100644 --- a/sky/backends/cloud_vm_ray_backend.py +++ b/sky/backends/cloud_vm_ray_backend.py @@ -2877,7 +2877,7 @@ def _get_zone(runner): def _open_inexistent_ports(self, handle: CloudVmRayResourceHandle, ports_to_open: List[Union[int, str]]) -> None: cloud = handle.launched_resources.cloud - if not isinstance(cloud, (clouds.AWS, clouds.GCP)): + if not isinstance(cloud, (clouds.AWS, clouds.GCP, clouds.Azure)): logger.warning(f'Cannot open ports for {cloud} that not support ' 'new provisioner API.') return @@ -4011,12 +4011,7 @@ def post_teardown_cleanup(self, if terminate: cloud = handle.launched_resources.cloud config = common_utils.read_yaml(handle.cluster_yaml) - if isinstance(cloud, (clouds.AWS, clouds.GCP)): - # Clean up AWS SGs or GCP firewall rules - # We don't need to clean up on Azure since it is done by - # our sky node provider. - # TODO(tian): Adding a no-op cleanup_ports API after #2286 - # merged. + if isinstance(cloud, (clouds.AWS, clouds.GCP, clouds.Azure)): provision_lib.cleanup_ports(repr(cloud), cluster_name_on_cloud, config['provider']) diff --git a/sky/provision/azure/__init__.py b/sky/provision/azure/__init__.py new file mode 100644 index 00000000000..673308b42d8 --- /dev/null +++ b/sky/provision/azure/__init__.py @@ -0,0 +1,4 @@ +"""Azure provisioner for SkyPilot.""" + +from sky.provision.aws.instance import cleanup_ports +from sky.provision.azure.instance import open_ports diff --git a/sky/provision/azure/instance.py b/sky/provision/azure/instance.py new file mode 100644 index 00000000000..84122b6d87d --- /dev/null +++ b/sky/provision/azure/instance.py @@ -0,0 +1,83 @@ +"""Azure instance provisioning.""" +from typing import Any, Callable, Dict, List, Optional, Union + +from sky import sky_logging +from sky.adaptors import azure + +logger = sky_logging.init_logger(__name__) + +# Tag uniquely identifying all nodes of a cluster +TAG_RAY_CLUSTER_NAME = 'ray-cluster-name' +TAG_RAY_NODE_KIND = 'ray-node-type' + + +def get_azure_sdk_function(client: Any, function_name: str) -> Callable: + """Retrieve a callable function from Azure SDK client object. + Newer versions of the various client SDKs renamed function names to + have a begin_ prefix. This function supports both the old and new + versions of the SDK by first trying the old name and falling back to + the prefixed new name. + """ + func = getattr(client, function_name, + getattr(client, f'begin_{function_name}', None)) + if func is None: + raise AttributeError( + '"{obj}" object has no {func} or begin_{func} attribute'.format( + obj={client.__name__}, func=function_name)) + return func + + +def open_ports( + cluster_name_on_cloud: str, + ports: List[Union[int, str]], + provider_config: Optional[Dict[str, Any]] = None, +) -> None: + """See sky/provision/__init__.py""" + assert provider_config is not None, cluster_name_on_cloud + subscription_id = provider_config['subscription_id'] + resource_group = provider_config['resource_group'] + network_client = azure.get_client('network', subscription_id) + create_or_update = get_azure_sdk_function( + client=network_client.security_rules, function_name='create_or_update') + ports = [str(port) for port in ports if port != 22] + rule_name = f'user-ports-{"-".join(ports)}' + + def security_rule_parameters(priority: int) -> Dict[str, Any]: + return { + "priority": priority, + "protocol": "TCP", + "access": "Allow", + "direction": "Inbound", + "sourceAddressPrefix": "*", + "sourcePortRange": "*", + "destinationAddressPrefix": "*", + "destinationPortRanges": ports, + } + + list_nsg = get_azure_sdk_function( + client=network_client.network_security_groups, function_name='list') + for nsg in list_nsg(resource_group): + try: + # Azure NSG rules have a priority field that determines the order + # in which they are applied. The priority must be unique across + # all inbound rules in one NSG. + max_inbound_priority = max([ + rule.priority + for rule in nsg.security_rules + if rule.direction == 'Inbound' + ]) + create_or_update(resource_group, nsg.name, rule_name, + security_rule_parameters(max_inbound_priority + 1)) + except azure.http_error_exception() as e: + logger.warning( + f'Failed to open ports {ports} in NSG {nsg.name}: {e}') + + +def cleanup_ports( + cluster_name_on_cloud: str, + provider_config: Optional[Dict[str, Any]] = None, +) -> None: + """See sky/provision/__init__.py""" + # Azure will automatically cleanup network security groups when cleanup + # resource group. So we don't need to do anything here. + del cluster_name_on_cloud, provider_config # Unused. diff --git a/sky/skylet/providers/azure/config.py b/sky/skylet/providers/azure/config.py index f8738d95c68..a937102f579 100644 --- a/sky/skylet/providers/azure/config.py +++ b/sky/skylet/providers/azure/config.py @@ -81,32 +81,6 @@ def _configure_resource_group(config): with open(template_path, "r") as template_fp: template = json.load(template_fp) - # Setup firewall rules for ports - nsg_resource = None - for resource in template["resources"]: - if resource["type"] == "Microsoft.Network/networkSecurityGroups": - nsg_resource = resource - break - assert nsg_resource is not None, "Could not find NSG resource in template" - ports = config["provider"].get("ports", None) - if ports is not None: - ports = [str(port) for port in ports if port != 22] - nsg_resource["properties"]["securityRules"].append( - { - "name": "user-ports", - "properties": { - "priority": 1001, - "protocol": "TCP", - "access": "Allow", - "direction": "Inbound", - "sourceAddressPrefix": "*", - "sourcePortRange": "*", - "destinationAddressPrefix": "*", - "destinationPortRanges": ports, - }, - } - ) - logger.info("Using cluster name: %s", config["cluster_name"]) # set unique id for resources in this cluster