From 5a0ecc703c80ada615274993ade116ba7ed2570c Mon Sep 17 00:00:00 2001 From: JGSweets Date: Mon, 6 May 2024 21:16:07 -0500 Subject: [PATCH] [Core][AWS] Allow specification of IAM roles for resources. (#3488) * feat: add iam capabilities to serve * fix: do not require default * fix: pylint * fix: pylint * refactor: use remote_identity instead of iam_instance_profile * fix: formatting * fix: quote delimiter * fix: typo * refactor: updates config.rst to include aws remote identity * refactor: use cluster_name instead of cluster_name_on_cloud * fix: only use the new remote_identity feature with AWS * fix: config.rst remote identity example * Update docs/source/reference/config.rst refactor: config description Co-authored-by: Zhanghao Wu * Update docs/source/reference/config.rst refactor: config example Co-authored-by: Zhanghao Wu * refactor: remove redundant example --------- Co-authored-by: Zhanghao Wu --- docs/source/reference/config.rst | 15 ++++++++++++++- sky/backends/backend_utils.py | 12 ++++++++++-- sky/templates/aws-ray.yml.j2 | 4 ++++ sky/utils/schemas.py | 25 +++++++++++++++++++++---- 4 files changed, 49 insertions(+), 7 deletions(-) diff --git a/docs/source/reference/config.rst b/docs/source/reference/config.rst index 1dfda834ee0..3c1c02dd659 100644 --- a/docs/source/reference/config.rst +++ b/docs/source/reference/config.rst @@ -109,7 +109,7 @@ Available fields and semantics: # permission to create a security group. security_group_name: my-security-group - # Identity to use for all AWS instances (optional). + # Identity to use for AWS instances (optional). # # LOCAL_CREDENTIALS: The user's local credential files will be uploaded to # AWS instances created by SkyPilot. They are used for accessing cloud @@ -120,6 +120,19 @@ Available fields and semantics: # instances. SkyPilot will auto-create and reuse a service account (IAM # role) for AWS instances. # + # Customized service account (IAM role): or + # - : apply the service account with the specified name to all instances. + # Example: + # remote_identity: my-service-account-name + # - : A dict mapping from the cluster name (pattern) to the service account name to use. + # NOTE: If none of the wildcard expressions in the dict match the cluster name, LOCAL_CREDENTIALS will be used. + # To specify your default, use "*" as the wildcard expression. + # Example: + # remote_identity: + # my-cluster-name: my-service-account-1 + # sky-serve-controller-*: my-service-account-2 + # "*": my-default-service-account + # # Two caveats of SERVICE_ACCOUNT for multicloud users: # # - This only affects AWS instances. Local AWS credentials will still be diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index fecbcaad0b8..24fde0466f7 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -1,6 +1,7 @@ """Util constants/functions for the backends.""" from datetime import datetime import enum +import fnmatch import functools import os import pathlib @@ -798,7 +799,12 @@ def write_cluster_config( excluded_clouds = [] remote_identity = skypilot_config.get_nested( (str(cloud).lower(), 'remote_identity'), 'LOCAL_CREDENTIALS') - if remote_identity == 'SERVICE_ACCOUNT': + if remote_identity is not None and not isinstance(remote_identity, str): + for profile in remote_identity: + if fnmatch.fnmatchcase(cluster_name, profile): + remote_identity = remote_identity[profile] + break + if remote_identity != 'LOCAL_CREDENTIALS': if not cloud.supports_service_account_on_remote(): raise exceptions.InvalidCloudConfigs( 'remote_identity: SERVICE_ACCOUNT is specified in ' @@ -888,6 +894,8 @@ def write_cluster_config( # User-supplied labels. 'labels': labels, + # User-supplied remote_identity + 'remote_identity': remote_identity, # The reservation pools that specified by the user. This is # currently only used by GCP. 'specific_reservations': specific_reservations, @@ -1523,7 +1531,7 @@ def check_owner_identity(cluster_name: str) -> None: for i, (owner, current) in enumerate(zip(owner_identity, current_user_identity)): - # Clean up the owner identiy for the backslash and newlines, caused + # Clean up the owner identity for the backslash and newlines, caused # by the cloud CLI output, e.g. gcloud. owner = owner.replace('\n', '').replace('\\', '') if owner == current: diff --git a/sky/templates/aws-ray.yml.j2 b/sky/templates/aws-ray.yml.j2 index 6f1df43cfd5..66c01f53617 100644 --- a/sky/templates/aws-ray.yml.j2 +++ b/sky/templates/aws-ray.yml.j2 @@ -60,6 +60,10 @@ available_node_types: ray.head.default: resources: {} node_config: + {% if remote_identity not in ['LOCAL_CREDENTIALS', 'SERVICE_ACCOUNT'] %} + IamInstanceProfile: + Name: {{remote_identity}} + {% endif %} InstanceType: {{instance_type}} ImageId: {{image_id}} # Deep Learning AMI (Ubuntu 18.04); see aws.py. # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances diff --git a/sky/utils/schemas.py b/sky/utils/schemas.py index d02436619c3..4ea74714f6c 100644 --- a/sky/utils/schemas.py +++ b/sky/utils/schemas.py @@ -525,7 +525,21 @@ def get_cluster_schema(): _REMOTE_IDENTITY_SCHEMA = { 'remote_identity': { 'type': 'string', - 'case_insensitive_enum': ['LOCAL_CREDENTIALS', 'SERVICE_ACCOUNT'], + 'case_insensitive_enum': ['LOCAL_CREDENTIALS', 'SERVICE_ACCOUNT'] + } +} + +_REMOTE_IDENTITY_SCHEMA_AWS = { + 'remote_identity': { + 'oneOf': [{ + 'type': 'string' + }, { + 'type': 'object', + 'required': [], + 'additionalProperties': { + 'type': 'string', + }, + }] } } @@ -563,7 +577,7 @@ def get_config_schema(): 'additionalProperties': False, 'properties': { 'security_group_name': { - 'type': 'string', + 'type': 'string' }, **_LABELS_SCHEMA, **_NETWORK_CONFIG_SCHEMA, @@ -660,8 +674,11 @@ def get_config_schema(): }, } - for config in cloud_configs.values(): - config['properties'].update(_REMOTE_IDENTITY_SCHEMA) + for cloud, config in cloud_configs.items(): + if cloud == 'aws': + config['properties'].update(_REMOTE_IDENTITY_SCHEMA_AWS) + else: + config['properties'].update(_REMOTE_IDENTITY_SCHEMA) return { '$schema': 'https://json-schema.org/draft/2020-12/schema', 'type': 'object',