Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Core][AWS] Allow specification of IAM roles for resources. #3488

Merged
merged 15 commits into from
May 7, 2024
Merged
15 changes: 14 additions & 1 deletion docs/source/reference/config.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ Available fields and semantics:
# permission to create a security group.
security_group_name: my-security-group

# Identity to use for all AWS instances (optional).
# Identity to use for AWS instances (optional).
#
# LOCAL_CREDENTIALS: The user's local credential files will be uploaded to
# AWS instances created by SkyPilot. They are used for accessing cloud
Expand All @@ -120,6 +120,19 @@ Available fields and semantics:
# instances. SkyPilot will auto-create and reuse a service account (IAM
# role) for AWS instances.
#
# Customized service account (IAM role): <string> or <dict>
# - <string>: apply the service account with the specified name to all instances.
# Example:
# remote_identity: my-service-account-name
# - <dict>: A dict mapping from the cluster name (pattern) to the service account name to use.
# NOTE: If none of the wildcard expressions in the dict match the cluster name, LOCAL_CREDENTIALS will be used.
# To specify your default, use "*" as the wildcard expression.
# Example:
# remote_identity:
# my-cluster-name: my-service-account-1
# sky-serve-controller-*: my-service-account-2
# "*": my-default-service-account
#
# Two caveats of SERVICE_ACCOUNT for multicloud users:
#
# - This only affects AWS instances. Local AWS credentials will still be
Expand Down
12 changes: 10 additions & 2 deletions sky/backends/backend_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Util constants/functions for the backends."""
from datetime import datetime
import enum
import fnmatch
import functools
import os
import pathlib
Expand Down Expand Up @@ -798,7 +799,12 @@ def write_cluster_config(
excluded_clouds = []
remote_identity = skypilot_config.get_nested(
(str(cloud).lower(), 'remote_identity'), 'LOCAL_CREDENTIALS')
if remote_identity == 'SERVICE_ACCOUNT':
if remote_identity is not None and not isinstance(remote_identity, str):
for profile in remote_identity:
if fnmatch.fnmatchcase(cluster_name, profile):
remote_identity = remote_identity[profile]
break
if remote_identity != 'LOCAL_CREDENTIALS':
JGSweets marked this conversation as resolved.
Show resolved Hide resolved
if not cloud.supports_service_account_on_remote():
raise exceptions.InvalidCloudConfigs(
'remote_identity: SERVICE_ACCOUNT is specified in '
Expand Down Expand Up @@ -888,6 +894,8 @@ def write_cluster_config(

# User-supplied labels.
'labels': labels,
# User-supplied remote_identity
'remote_identity': remote_identity,
# The reservation pools that specified by the user. This is
# currently only used by GCP.
'specific_reservations': specific_reservations,
Expand Down Expand Up @@ -1523,7 +1531,7 @@ def check_owner_identity(cluster_name: str) -> None:
for i, (owner,
current) in enumerate(zip(owner_identity,
current_user_identity)):
# Clean up the owner identiy for the backslash and newlines, caused
# Clean up the owner identity for the backslash and newlines, caused
JGSweets marked this conversation as resolved.
Show resolved Hide resolved
# by the cloud CLI output, e.g. gcloud.
owner = owner.replace('\n', '').replace('\\', '')
if owner == current:
Expand Down
4 changes: 4 additions & 0 deletions sky/templates/aws-ray.yml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@ available_node_types:
ray.head.default:
resources: {}
node_config:
{% if remote_identity not in ['LOCAL_CREDENTIALS', 'SERVICE_ACCOUNT'] %}
IamInstanceProfile:
Name: {{remote_identity}}
{% endif %}
InstanceType: {{instance_type}}
ImageId: {{image_id}} # Deep Learning AMI (Ubuntu 18.04); see aws.py.
# https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/ec2.html#EC2.ServiceResource.create_instances
Expand Down
25 changes: 21 additions & 4 deletions sky/utils/schemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,7 +518,21 @@ def get_cluster_schema():
_REMOTE_IDENTITY_SCHEMA = {
'remote_identity': {
'type': 'string',
'case_insensitive_enum': ['LOCAL_CREDENTIALS', 'SERVICE_ACCOUNT'],
'case_insensitive_enum': ['LOCAL_CREDENTIALS', 'SERVICE_ACCOUNT']
}
}

_REMOTE_IDENTITY_SCHEMA_AWS = {
'remote_identity': {
'oneOf': [{
'type': 'string'
}, {
'type': 'object',
'required': [],
'additionalProperties': {
'type': 'string',
},
}]
Comment on lines +529 to +535
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Seems we only added the support for AWS at the moment, we should limit this ability to aws cloud only, i.e. in L664, we can only apply this additional support for aws only.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be fixed

}
}

Expand Down Expand Up @@ -556,7 +570,7 @@ def get_config_schema():
'additionalProperties': False,
'properties': {
'security_group_name': {
'type': 'string',
'type': 'string'
},
**_LABELS_SCHEMA,
**_NETWORK_CONFIG_SCHEMA,
Expand Down Expand Up @@ -653,8 +667,11 @@ def get_config_schema():
},
}

for config in cloud_configs.values():
config['properties'].update(_REMOTE_IDENTITY_SCHEMA)
for cloud, config in cloud_configs.items():
if cloud == 'aws':
config['properties'].update(_REMOTE_IDENTITY_SCHEMA_AWS)
else:
config['properties'].update(_REMOTE_IDENTITY_SCHEMA)
return {
'$schema': 'https://json-schema.org/draft/2020-12/schema',
'type': 'object',
Expand Down
Loading