From a83ab9ba918fed51cfd5115ad8ee38bfb0c467f2 Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Thu, 2 May 2024 10:03:33 -0400 Subject: [PATCH 01/15] feat: add iam capabilities to serve --- sky/clouds/aws.py | 11 +++++++++++ sky/templates/aws-ray.yml.j2 | 4 ++++ sky/utils/schemas.py | 23 ++++++++++++++++++++++- 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/sky/clouds/aws.py b/sky/clouds/aws.py index 542d1595d86..0e7fd011a43 100644 --- a/sky/clouds/aws.py +++ b/sky/clouds/aws.py @@ -411,6 +411,16 @@ def make_deploy_resources_variables(self, else: security_group = DEFAULT_SECURITY_GROUP_NAME + iam_instance_profile = skypilot_config.get_nested( + ('aws', 'iam_instance_profile'), None) + if iam_instance_profile is not None and not isinstance(iam_instance_profile, str): + for profile in iam_instance_profile: + if cluster_name_on_cloud.startswith(profile) and profile != 'default': + iam_instance_profile = iam_instance_profile[profile] + break + elif profile == 'default': + iam_instance_profile = iam_instance_profile[profile] + return { 'instance_type': r.instance_type, 'custom_resources': custom_resources, @@ -418,6 +428,7 @@ def make_deploy_resources_variables(self, 'region': region_name, 'zones': ','.join(zone_names), 'image_id': image_id, + 'iam_instance_profile': iam_instance_profile, 'security_group': security_group, 'security_group_managed_by_skypilot': str(security_group != user_security_group).lower(), diff --git a/sky/templates/aws-ray.yml.j2 b/sky/templates/aws-ray.yml.j2 index 6f1df43cfd5..2df7e365a8b 100644 --- a/sky/templates/aws-ray.yml.j2 +++ b/sky/templates/aws-ray.yml.j2 @@ -60,6 +60,10 @@ available_node_types: ray.head.default: resources: {} node_config: + {% if iam_instance_profile %} + IamInstanceProfile: + Name: {{iam_instance_profile}} + {% endif %} InstanceType: {{instance_type}} ImageId: {{image_id}} # Deep Learning AMI (Ubuntu 18.04); see aws.py. # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances diff --git a/sky/utils/schemas.py b/sky/utils/schemas.py index 6c3d84fe857..8a957d6f59d 100644 --- a/sky/utils/schemas.py +++ b/sky/utils/schemas.py @@ -556,7 +556,28 @@ def get_config_schema(): 'additionalProperties': False, 'properties': { 'security_group_name': { - 'type': 'string', + 'type': 'string' + }, + 'iam_instance_profile': { + 'oneOf': [{ + 'type': 'string' + }, { + 'type': 'object', + 'additionalProperties': False, + 'required': ['default'], + 'properties': { + 'sky-serve-controller': { + 'type': 'string', + }, + 'default':{ + 'oneOf': [{ + 'type': 'string' + }, { + 'type': 'null' + }] + } + } + }] }, **_LABELS_SCHEMA, **_NETWORK_CONFIG_SCHEMA, From c9b5f9171bede4c860fbd354ab7174c79958a9d9 Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Thu, 2 May 2024 10:03:33 -0400 Subject: [PATCH 02/15] fix: do not require default --- sky/utils/schemas.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/sky/utils/schemas.py b/sky/utils/schemas.py index 8a957d6f59d..44afe10eb9e 100644 --- a/sky/utils/schemas.py +++ b/sky/utils/schemas.py @@ -556,7 +556,7 @@ def get_config_schema(): 'additionalProperties': False, 'properties': { 'security_group_name': { - 'type': 'string' + 'type': 'string' }, 'iam_instance_profile': { 'oneOf': [{ @@ -564,17 +564,13 @@ def get_config_schema(): }, { 'type': 'object', 'additionalProperties': False, - 'required': ['default'], + 'required': [], 'properties': { 'sky-serve-controller': { 'type': 'string', }, 'default':{ - 'oneOf': [{ - 'type': 'string' - }, { - 'type': 'null' - }] + 'type': 'string' } } }] From a7d456ad57e991550f7bba60932750c173f8f924 Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Thu, 2 May 2024 10:03:33 -0400 Subject: [PATCH 03/15] fix: pylint --- sky/clouds/aws.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/sky/clouds/aws.py b/sky/clouds/aws.py index 0e7fd011a43..fd6a386e1ec 100644 --- a/sky/clouds/aws.py +++ b/sky/clouds/aws.py @@ -412,10 +412,12 @@ def make_deploy_resources_variables(self, security_group = DEFAULT_SECURITY_GROUP_NAME iam_instance_profile = skypilot_config.get_nested( - ('aws', 'iam_instance_profile'), None) - if iam_instance_profile is not None and not isinstance(iam_instance_profile, str): + ('aws', 'iam_instance_profile'), None) + if iam_instance_profile is not None and not isinstance( + iam_instance_profile, str): for profile in iam_instance_profile: - if cluster_name_on_cloud.startswith(profile) and profile != 'default': + if cluster_name_on_cloud.startswith( + profile) and profile != 'default': iam_instance_profile = iam_instance_profile[profile] break elif profile == 'default': From be4b86c17ee61111822206da5d756856f4e2e631 Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Thu, 2 May 2024 10:03:33 -0400 Subject: [PATCH 04/15] fix: pylint --- sky/utils/schemas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/utils/schemas.py b/sky/utils/schemas.py index 44afe10eb9e..2f66f283a79 100644 --- a/sky/utils/schemas.py +++ b/sky/utils/schemas.py @@ -569,7 +569,7 @@ def get_config_schema(): 'sky-serve-controller': { 'type': 'string', }, - 'default':{ + 'default': { 'type': 'string' } } From 4cea7a2ea2ccf2d0f8a0a143bed9a3d2f2f2a56e Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Thu, 2 May 2024 10:05:57 -0400 Subject: [PATCH 05/15] refactor: use remote_identity instead of iam_instance_profile --- sky/backends/backend_utils.py | 11 ++++++++++- sky/clouds/aws.py | 13 ------------- sky/templates/aws-ray.yml.j2 | 4 ++-- sky/utils/schemas.py | 28 +++++++++------------------- 4 files changed, 21 insertions(+), 35 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 5aed22b05ed..cc006fcf2ac 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -1,6 +1,7 @@ """Util constants/functions for the backends.""" from datetime import datetime import enum +import fnmatch import functools import os import pathlib @@ -798,7 +799,13 @@ def write_cluster_config( excluded_clouds = [] remote_identity = skypilot_config.get_nested( (str(cloud).lower(), 'remote_identity'), 'LOCAL_CREDENTIALS') - if remote_identity == 'SERVICE_ACCOUNT': + if remote_identity is not None and not isinstance( + remote_identity, str): + for profile in remote_identity: + if fnmatch.fnmatchcase(cluster_name_on_cloud, profile): + remote_identity = remote_identity[profile] + break + if remote_identity != 'LOCAL_CREDENTIALS': if not cloud.supports_service_account_on_remote(): raise exceptions.InvalidCloudConfigs( 'remote_identity: SERVICE_ACCOUNT is specified in ' @@ -888,6 +895,8 @@ def write_cluster_config( # User-supplied labels. 'labels': labels, + # User-supplied remote_identity + "remote_identity": remote_identity, # The reservation pools that specified by the user. This is # currently only used by GCP. 'specific_reservations': specific_reservations, diff --git a/sky/clouds/aws.py b/sky/clouds/aws.py index fd6a386e1ec..542d1595d86 100644 --- a/sky/clouds/aws.py +++ b/sky/clouds/aws.py @@ -411,18 +411,6 @@ def make_deploy_resources_variables(self, else: security_group = DEFAULT_SECURITY_GROUP_NAME - iam_instance_profile = skypilot_config.get_nested( - ('aws', 'iam_instance_profile'), None) - if iam_instance_profile is not None and not isinstance( - iam_instance_profile, str): - for profile in iam_instance_profile: - if cluster_name_on_cloud.startswith( - profile) and profile != 'default': - iam_instance_profile = iam_instance_profile[profile] - break - elif profile == 'default': - iam_instance_profile = iam_instance_profile[profile] - return { 'instance_type': r.instance_type, 'custom_resources': custom_resources, @@ -430,7 +418,6 @@ def make_deploy_resources_variables(self, 'region': region_name, 'zones': ','.join(zone_names), 'image_id': image_id, - 'iam_instance_profile': iam_instance_profile, 'security_group': security_group, 'security_group_managed_by_skypilot': str(security_group != user_security_group).lower(), diff --git a/sky/templates/aws-ray.yml.j2 b/sky/templates/aws-ray.yml.j2 index 2df7e365a8b..66c01f53617 100644 --- a/sky/templates/aws-ray.yml.j2 +++ b/sky/templates/aws-ray.yml.j2 @@ -60,9 +60,9 @@ available_node_types: ray.head.default: resources: {} node_config: - {% if iam_instance_profile %} + {% if remote_identity not in ['LOCAL_CREDENTIALS', 'SERVICE_ACCOUNT'] %} IamInstanceProfile: - Name: {{iam_instance_profile}} + Name: {{remote_identity}} {% endif %} InstanceType: {{instance_type}} ImageId: {{image_id}} # Deep Learning AMI (Ubuntu 18.04); see aws.py. diff --git a/sky/utils/schemas.py b/sky/utils/schemas.py index 2f66f283a79..4c2cf6efcbe 100644 --- a/sky/utils/schemas.py +++ b/sky/utils/schemas.py @@ -517,8 +517,15 @@ def get_cluster_schema(): _REMOTE_IDENTITY_SCHEMA = { 'remote_identity': { - 'type': 'string', - 'case_insensitive_enum': ['LOCAL_CREDENTIALS', 'SERVICE_ACCOUNT'], + 'oneOf': [{ + 'type': 'string' + }, { + 'type': 'object', + 'required': [], + 'additionalProperties': { + 'type': 'string', + }, + }] } } @@ -558,23 +565,6 @@ def get_config_schema(): 'security_group_name': { 'type': 'string' }, - 'iam_instance_profile': { - 'oneOf': [{ - 'type': 'string' - }, { - 'type': 'object', - 'additionalProperties': False, - 'required': [], - 'properties': { - 'sky-serve-controller': { - 'type': 'string', - }, - 'default': { - 'type': 'string' - } - } - }] - }, **_LABELS_SCHEMA, **_NETWORK_CONFIG_SCHEMA, }, From 703cc1217806889eaca8d9df05bd2e826164042b Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Thu, 2 May 2024 10:06:43 -0400 Subject: [PATCH 06/15] fix: formatting --- sky/backends/backend_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index cc006fcf2ac..6c08097f75d 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -799,8 +799,7 @@ def write_cluster_config( excluded_clouds = [] remote_identity = skypilot_config.get_nested( (str(cloud).lower(), 'remote_identity'), 'LOCAL_CREDENTIALS') - if remote_identity is not None and not isinstance( - remote_identity, str): + if remote_identity is not None and not isinstance(remote_identity, str): for profile in remote_identity: if fnmatch.fnmatchcase(cluster_name_on_cloud, profile): remote_identity = remote_identity[profile] From 02353a74d91f9ef8baa2b249550b2b3be7ab388e Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Thu, 2 May 2024 10:06:43 -0400 Subject: [PATCH 07/15] fix: quote delimiter --- sky/backends/backend_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 6c08097f75d..f8bd933b714 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -895,7 +895,7 @@ def write_cluster_config( # User-supplied labels. 'labels': labels, # User-supplied remote_identity - "remote_identity": remote_identity, + 'remote_identity': remote_identity, # The reservation pools that specified by the user. This is # currently only used by GCP. 'specific_reservations': specific_reservations, From 794dd4015718844887385b6c3378103040374120 Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Mon, 6 May 2024 16:21:15 -0500 Subject: [PATCH 08/15] fix: typo --- sky/backends/backend_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index f8bd933b714..24a5bc1f0a3 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -1531,7 +1531,7 @@ def check_owner_identity(cluster_name: str) -> None: for i, (owner, current) in enumerate(zip(owner_identity, current_user_identity)): - # Clean up the owner identiy for the backslash and newlines, caused + # Clean up the owner identity for the backslash and newlines, caused # by the cloud CLI output, e.g. gcloud. owner = owner.replace('\n', '').replace('\\', '') if owner == current: From f56cf2445b3ef8e4dc1821ab9defa73e8888efee Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Mon, 6 May 2024 16:51:02 -0500 Subject: [PATCH 09/15] refactor: updates config.rst to include aws remote identity --- docs/source/reference/config.rst | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/docs/source/reference/config.rst b/docs/source/reference/config.rst index 9bca0a796d7..572af99a3fc 100644 --- a/docs/source/reference/config.rst +++ b/docs/source/reference/config.rst @@ -109,7 +109,7 @@ Available fields and semantics: # permission to create a security group. security_group_name: my-security-group - # Identity to use for all AWS instances (optional). + # Identity to use for deployed AWS instances (optional). # # LOCAL_CREDENTIALS: The user's local credential files will be uploaded to # AWS instances created by SkyPilot. They are used for accessing cloud @@ -120,6 +120,9 @@ Available fields and semantics: # instances. SkyPilot will auto-create and reuse a service account (IAM # role) for AWS instances. # + # User Specified SERVICE_ACCOUNT (IAM role): The name of the remote identity + # to give the launched resouce. + # # Two caveats of SERVICE_ACCOUNT for multicloud users: # # - This only affects AWS instances. Local AWS credentials will still be @@ -131,7 +134,18 @@ Available fields and semantics: # files to assign to these non-AWS instances). # # Default: 'LOCAL_CREDENTIALS'. + ### Format 1 ### + # A string; the same remote identity is applied to all launched resources. remote_identity: LOCAL_CREDENTIALS + ### Format 2 ### + # A dict mapping wildcard expression of cloud names to the resources to the + # resource identity. + # NOTE: If not a wildcard expression in the dict mapping does not match a + # cloud name for a resouce being deployed, the default remote identity is used. + # To specify your own default, utilize "*" as the wildcard expression. + remote_identity: + sky-serve-controller-*: my-controller-specific-value + "*": my-default-value # Advanced GCP configurations (optional). # Apply to all new instances but not existing ones. From e6c59560320c0e8eb0a48afee0eab32743293c3f Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Mon, 6 May 2024 16:52:09 -0500 Subject: [PATCH 10/15] refactor: use cluster_name instead of cluster_name_on_cloud --- sky/backends/backend_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sky/backends/backend_utils.py b/sky/backends/backend_utils.py index 24a5bc1f0a3..56e20af5814 100644 --- a/sky/backends/backend_utils.py +++ b/sky/backends/backend_utils.py @@ -801,7 +801,7 @@ def write_cluster_config( (str(cloud).lower(), 'remote_identity'), 'LOCAL_CREDENTIALS') if remote_identity is not None and not isinstance(remote_identity, str): for profile in remote_identity: - if fnmatch.fnmatchcase(cluster_name_on_cloud, profile): + if fnmatch.fnmatchcase(cluster_name, profile): remote_identity = remote_identity[profile] break if remote_identity != 'LOCAL_CREDENTIALS': From 7b5861ecbb21f86b85e0a8924bd028a2dbadf15f Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Mon, 6 May 2024 16:52:59 -0500 Subject: [PATCH 11/15] fix: only use the new remote_identity feature with AWS --- sky/utils/schemas.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/sky/utils/schemas.py b/sky/utils/schemas.py index 4c2cf6efcbe..6b526c118ae 100644 --- a/sky/utils/schemas.py +++ b/sky/utils/schemas.py @@ -516,6 +516,13 @@ def get_cluster_schema(): } _REMOTE_IDENTITY_SCHEMA = { + 'remote_identity': { + 'type': 'string', + 'case_insensitive_enum': ['LOCAL_CREDENTIALS', 'SERVICE_ACCOUNT'] + } +} + +_REMOTE_IDENTITY_SCHEMA_AWS = { 'remote_identity': { 'oneOf': [{ 'type': 'string' @@ -660,8 +667,11 @@ def get_config_schema(): }, } - for config in cloud_configs.values(): - config['properties'].update(_REMOTE_IDENTITY_SCHEMA) + for cloud, config in cloud_configs.items(): + if cloud == 'aws': + config['properties'].update(_REMOTE_IDENTITY_SCHEMA_AWS) + else: + config['properties'].update(_REMOTE_IDENTITY_SCHEMA) return { '$schema': 'https://json-schema.org/draft/2020-12/schema', 'type': 'object', From 6b34a3f91cff1be44b8e9d3f08afdf380971e27c Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Mon, 6 May 2024 16:55:30 -0500 Subject: [PATCH 12/15] fix: config.rst remote identity example --- docs/source/reference/config.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/source/reference/config.rst b/docs/source/reference/config.rst index 572af99a3fc..29c4a645496 100644 --- a/docs/source/reference/config.rst +++ b/docs/source/reference/config.rst @@ -144,8 +144,8 @@ Available fields and semantics: # cloud name for a resouce being deployed, the default remote identity is used. # To specify your own default, utilize "*" as the wildcard expression. remote_identity: - sky-serve-controller-*: my-controller-specific-value - "*": my-default-value + sky-serve-controller-*: my-controller-specific-identity + "*": SERVICE_ACCOUNT # Advanced GCP configurations (optional). # Apply to all new instances but not existing ones. From f1f7f133fad1de3636549f2480791bae48e81b33 Mon Sep 17 00:00:00 2001 From: JGSweets Date: Mon, 6 May 2024 19:34:10 -0500 Subject: [PATCH 13/15] Update docs/source/reference/config.rst refactor: config description Co-authored-by: Zhanghao Wu --- docs/source/reference/config.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/reference/config.rst b/docs/source/reference/config.rst index 29c4a645496..62d87c7d2ad 100644 --- a/docs/source/reference/config.rst +++ b/docs/source/reference/config.rst @@ -109,7 +109,7 @@ Available fields and semantics: # permission to create a security group. security_group_name: my-security-group - # Identity to use for deployed AWS instances (optional). + # Identity to use for AWS instances (optional). # # LOCAL_CREDENTIALS: The user's local credential files will be uploaded to # AWS instances created by SkyPilot. They are used for accessing cloud From fe757ff99fb35b4f18aa2076ed109dd7ce8c7e1e Mon Sep 17 00:00:00 2001 From: JGSweets Date: Mon, 6 May 2024 19:34:22 -0500 Subject: [PATCH 14/15] Update docs/source/reference/config.rst refactor: config example Co-authored-by: Zhanghao Wu --- docs/source/reference/config.rst | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/source/reference/config.rst b/docs/source/reference/config.rst index 62d87c7d2ad..eabec5c02b2 100644 --- a/docs/source/reference/config.rst +++ b/docs/source/reference/config.rst @@ -120,8 +120,18 @@ Available fields and semantics: # instances. SkyPilot will auto-create and reuse a service account (IAM # role) for AWS instances. # - # User Specified SERVICE_ACCOUNT (IAM role): The name of the remote identity - # to give the launched resouce. + # Customized service account (IAM role): or + # - : apply the service account with the specified name to all instances. + # Example: + # remote_identity: my-service-account-name + # - : A dict mapping from the cluster name (pattern) to the service account name to use. + # NOTE: If none of the wildcard expressions in the dict match the cluster name, LOCAL_CREDENTIALS will be used. + # To specify your default, use "*" as the wildcard expression. + # Example: + # remote_identity: + # my-cluster-name: my-service-account-1 + # sky-serve-controller-*: my-service-account-2 + # "*": my-default-service-account # # Two caveats of SERVICE_ACCOUNT for multicloud users: # From 27a3dfcc94f436e3fdd3824313c04fd5759280f5 Mon Sep 17 00:00:00 2001 From: Jeremy Goodsitt Date: Mon, 6 May 2024 19:35:53 -0500 Subject: [PATCH 15/15] refactor: remove redundant example --- docs/source/reference/config.rst | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/docs/source/reference/config.rst b/docs/source/reference/config.rst index eabec5c02b2..4d8c71d0a04 100644 --- a/docs/source/reference/config.rst +++ b/docs/source/reference/config.rst @@ -144,18 +144,7 @@ Available fields and semantics: # files to assign to these non-AWS instances). # # Default: 'LOCAL_CREDENTIALS'. - ### Format 1 ### - # A string; the same remote identity is applied to all launched resources. remote_identity: LOCAL_CREDENTIALS - ### Format 2 ### - # A dict mapping wildcard expression of cloud names to the resources to the - # resource identity. - # NOTE: If not a wildcard expression in the dict mapping does not match a - # cloud name for a resouce being deployed, the default remote identity is used. - # To specify your own default, utilize "*" as the wildcard expression. - remote_identity: - sky-serve-controller-*: my-controller-specific-identity - "*": SERVICE_ACCOUNT # Advanced GCP configurations (optional). # Apply to all new instances but not existing ones.