Skip to content

Commit

Permalink
[Core] Reduce import time with lazy imports and exec time by avoiding…
Browse files Browse the repository at this point in the history
… script rsync (#3394)

* lazy imports for pandas and lazy data frame

* fix fetch_aws

* fix fetchers

* avoid sync script for task

* format

* format

* Fix tests with autoscaler

* fix serve test

* refactor all cloud adaptors

* comments

* address comments

* format

* refactor

* format

* separate storage installation

* typo from Fail to Failed

* fix lazy imports

* revert gcp adaptor

* use base64 instead of shlex

* Add comments

* add comments

* change to LazyDataFrame for typing

* add annotation for self._df

* add some early load back

* fix exceptions

* Add examples

* format

* format

* fix typing

* Update sky/adaptors/common.py

Co-authored-by: Zongheng Yang <[email protected]>

* Update sky/adaptors/common.py

Co-authored-by: Zongheng Yang <[email protected]>

---------

Co-authored-by: Zongheng Yang <[email protected]>
  • Loading branch information
Michaelvll and concretevitamin authored Apr 3, 2024
1 parent ffc8618 commit bca709b
Show file tree
Hide file tree
Showing 47 changed files with 536 additions and 697 deletions.
37 changes: 10 additions & 27 deletions sky/adaptors/aws.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,12 +34,17 @@
import time
from typing import Any, Callable

from sky.adaptors import common
from sky.utils import common_utils

logger = logging.getLogger(__name__)
_IMPORT_ERROR_MESSAGE = ('Failed to import dependencies for AWS. '
'Try pip install "skypilot[aws]"')
boto3 = common.LazyImport('boto3', import_error_message=_IMPORT_ERROR_MESSAGE)
botocore = common.LazyImport('botocore',
import_error_message=_IMPORT_ERROR_MESSAGE)
_LAZY_MODULES = (boto3, botocore)

boto3 = None
botocore = None
logger = logging.getLogger(__name__)
_session_creation_lock = threading.RLock()

version = 1
Expand Down Expand Up @@ -73,25 +78,6 @@ def wrapper(*args, **kwargs):
return decorator


def import_package(func):

@functools.wraps(func)
def wrapper(*args, **kwargs):
global boto3, botocore
if boto3 is None or botocore is None:
try:
import boto3 as _boto3
import botocore as _botocore
boto3 = _boto3
botocore = _botocore
except ImportError:
raise ImportError('Fail to import dependencies for AWS. '
'Try pip install "skypilot[aws]"') from None
return func(*args, **kwargs)

return wrapper


def _assert_kwargs_builtin_type(kwargs):
assert all(isinstance(v, (int, float, str)) for v in kwargs.values()), (
f'kwargs should not contain none built-in types: {kwargs}')
Expand Down Expand Up @@ -131,7 +117,6 @@ def _create_aws_object(creation_fn_or_cls: Callable[[], Any],
f'{common_utils.format_exception(e)}.')


@import_package
# The LRU cache needs to be thread-local to avoid multiple threads sharing the
# same session object, which is not guaranteed to be thread-safe.
@_thread_local_lru_cache()
Expand All @@ -140,7 +125,6 @@ def session():
return _create_aws_object(boto3.session.Session, 'session')


@import_package
# Avoid caching the resource/client objects. If we are using the assumed role,
# the credentials will be automatically rotated, but the cached resource/client
# object will only refresh the credentials with a fixed 15 minutes interval,
Expand Down Expand Up @@ -172,7 +156,6 @@ def resource(service_name: str, **kwargs):
lambda: session().resource(service_name, **kwargs), 'resource')


@import_package
def client(service_name: str, **kwargs):
"""Create an AWS client of a certain service.
Expand All @@ -189,14 +172,14 @@ def client(service_name: str, **kwargs):
'client')


@import_package
@common.load_lazy_modules(modules=_LAZY_MODULES)
def botocore_exceptions():
"""AWS botocore exception."""
from botocore import exceptions
return exceptions


@import_package
@common.load_lazy_modules(modules=_LAZY_MODULES)
def botocore_config():
"""AWS botocore exception."""
from botocore import config
Expand Down
38 changes: 14 additions & 24 deletions sky/adaptors/azure.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,72 +4,62 @@
import functools
import threading

azure = None
_session_creation_lock = threading.RLock()

from sky.adaptors import common

def import_package(func):
azure = common.LazyImport(
'azure',
import_error_message=('Failed to import dependencies for Azure.'
'Try pip install "skypilot[azure]"'))
_LAZY_MODULES = (azure,)

@functools.wraps(func)
def wrapper(*args, **kwargs):
global azure
if azure is None:
try:
import azure as _azure # type: ignore
azure = _azure
except ImportError:
raise ImportError('Fail to import dependencies for Azure.'
'Try pip install "skypilot[azure]"') from None
return func(*args, **kwargs)

return wrapper
_session_creation_lock = threading.RLock()


@import_package
@common.load_lazy_modules(modules=_LAZY_MODULES)
def get_subscription_id() -> str:
"""Get the default subscription id."""
from azure.common import credentials
return credentials.get_cli_profile().get_subscription_id()


@import_package
@common.load_lazy_modules(modules=_LAZY_MODULES)
def get_current_account_user() -> str:
"""Get the default account user."""
from azure.common import credentials
return credentials.get_cli_profile().get_current_account_user()


@import_package
@common.load_lazy_modules(modules=_LAZY_MODULES)
def exceptions():
"""Azure exceptions."""
from azure.core import exceptions as azure_exceptions
return azure_exceptions


@functools.lru_cache()
@import_package
@common.load_lazy_modules(modules=_LAZY_MODULES)
def get_client(name: str, subscription_id: str):
# Sky only supports Azure CLI credential for now.
# Increase the timeout to fix the Azure get-access-token timeout issue.
# Tracked in
# https://github.com/Azure/azure-cli/issues/20404#issuecomment-1249575110
from azure.identity import AzureCliCredential
from azure.mgmt.network import NetworkManagementClient
from azure.mgmt.resource import ResourceManagementClient
with _session_creation_lock:
credential = AzureCliCredential(process_timeout=30)
if name == 'compute':
from azure.mgmt.compute import ComputeManagementClient
return ComputeManagementClient(credential, subscription_id)
elif name == 'network':
from azure.mgmt.network import NetworkManagementClient
return NetworkManagementClient(credential, subscription_id)
elif name == 'resource':
from azure.mgmt.resource import ResourceManagementClient
return ResourceManagementClient(credential, subscription_id)
else:
raise ValueError(f'Client not supported: "{name}"')


@import_package
@common.load_lazy_modules(modules=_LAZY_MODULES)
def create_security_rule(**kwargs):
from azure.mgmt.network.models import SecurityRule
return SecurityRule(**kwargs)
33 changes: 9 additions & 24 deletions sky/adaptors/cloudflare.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,16 @@
import threading
from typing import Dict, Optional, Tuple

from sky.adaptors import common
from sky.utils import ux_utils

boto3 = None
botocore = None
_IMPORT_ERROR_MESSAGE = ('Failed to import dependencies for Cloudflare.'
'Try pip install "skypilot[cloudflare]"')
boto3 = common.LazyImport('boto3', import_error_message=_IMPORT_ERROR_MESSAGE)
botocore = common.LazyImport('botocore',
import_error_message=_IMPORT_ERROR_MESSAGE)
_LAZY_MODULES = (boto3, botocore)

_session_creation_lock = threading.RLock()
ACCOUNT_ID_PATH = '~/.cloudflare/accountid'
R2_CREDENTIALS_PATH = '~/.cloudflare/r2.credentials'
Expand All @@ -19,25 +25,6 @@
NAME = 'Cloudflare'


def import_package(func):

@functools.wraps(func)
def wrapper(*args, **kwargs):
global boto3, botocore
if boto3 is None or botocore is None:
try:
import boto3 as _boto3
import botocore as _botocore
boto3 = _boto3
botocore = _botocore
except ImportError:
raise ImportError('Fail to import dependencies for Cloudflare.'
'Try pip install "skypilot[aws]"') from None
return func(*args, **kwargs)

return wrapper


@contextlib.contextmanager
def _load_r2_credentials_env():
"""Context manager to temporarily change the AWS credentials file path."""
Expand Down Expand Up @@ -75,7 +62,6 @@ def get_r2_credentials(boto3_session):
# for different threads.
# Reference: https://docs.python.org/3/library/functools.html#functools.lru_cache # pylint: disable=line-too-long
@functools.lru_cache()
@import_package
def session():
"""Create an AWS session."""
# Creating the session object is not thread-safe for boto3,
Expand All @@ -90,7 +76,6 @@ def session():


@functools.lru_cache()
@import_package
def resource(resource_name: str, **kwargs):
"""Create a Cloudflare resource.
Expand Down Expand Up @@ -141,7 +126,7 @@ def client(service_name: str, region):
region_name=region)


@import_package
@common.load_lazy_modules(_LAZY_MODULES)
def botocore_exceptions():
"""AWS botocore exception."""
from botocore import exceptions
Expand Down
65 changes: 65 additions & 0 deletions sky/adaptors/common.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Lazy import for modules to avoid import error when not used."""
import functools
import importlib
from typing import Any, Optional, Tuple


class LazyImport:
"""Lazy importer for heavy modules or cloud modules only when enabled.
We use this for pandas and networkx, as they can be time-consuming to import
(0.1-0.2 seconds). With this class, we can avoid the unnecessary import time
when the module is not used (e.g., `networkx` should not be imported for
`sky status and `pandas` should not be imported for `sky exec`).
We also use this for cloud adaptors, because we do not want to import the
cloud dependencies when it is not enabled.
"""

def __init__(self,
module_name: str,
import_error_message: Optional[str] = None):
self._module_name = module_name
self._module = None
self._import_error_message = import_error_message

def load_module(self):
if self._module is None:
try:
self._module = importlib.import_module(self._module_name)
except ImportError as e:
if self._import_error_message is not None:
raise ImportError(self._import_error_message) from e
raise
return self._module

def __getattr__(self, name: str) -> Any:
# Attempt to access the attribute, if it fails, assume it's a submodule
# and lazily import it
try:
if name in self.__dict__:
return self.__dict__[name]
return getattr(self.load_module(), name)
except AttributeError:
# Dynamically create a new LazyImport instance for the submodule
submodule_name = f'{self._module_name}.{name}'
lazy_submodule = LazyImport(submodule_name,
self._import_error_message)
setattr(self, name, lazy_submodule)
return lazy_submodule


def load_lazy_modules(modules: Tuple[LazyImport, ...]):
"""Load lazy modules before entering a function to error out quickly."""

def decorator(func):

@functools.wraps(func)
def wrapper(*args, **kwargs):
for m in modules:
m.load_module()
return func(*args, **kwargs)

return wrapper

return decorator
31 changes: 5 additions & 26 deletions sky/adaptors/cudo.py
Original file line number Diff line number Diff line change
@@ -1,29 +1,8 @@
"""Cudo Compute cloud adaptor."""

import functools
from sky.adaptors import common

_cudo_sdk = None


def import_package(func):

@functools.wraps(func)
def wrapper(*args, **kwargs):
global _cudo_sdk
if _cudo_sdk is None:
try:
import cudo_compute as _cudo # pylint: disable=import-outside-toplevel
_cudo_sdk = _cudo
except ImportError:
raise ImportError(
'Fail to import dependencies for Cudo Compute.'
'Try pip install "skypilot[cudo]"') from None
return func(*args, **kwargs)

return wrapper


@import_package
def cudo():
"""Return the Cudo Compute package."""
return _cudo_sdk
cudo = common.LazyImport(
'cudo_compute',
import_error_message='Failed to import dependencies for Cudo Compute. '
'Try running: pip install "skypilot[cudo]"')
28 changes: 5 additions & 23 deletions sky/adaptors/docker.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,43 +2,25 @@

# pylint: disable=import-outside-toplevel

from functools import wraps
from sky.adaptors import common

docker = None
docker = common.LazyImport(
'docker',
import_error_message='Failed to import dependencies for Docker. '
'See README for how to install it.')


def import_package(func):

@wraps(func)
def wrapper(*args, **kwargs):
global docker
if docker is None:
try:
import docker as _docker
except ImportError:
raise ImportError('Fail to import dependencies for Docker. '
'See README for how to install it.') from None
docker = _docker
return func(*args, **kwargs)

return wrapper


@import_package
def from_env():
return docker.from_env()


@import_package
def build_error():
return docker.errors.BuildError


@import_package
def not_found_error():
return docker.errors.NotFound


@import_package
def api_error():
return docker.errors.APIError
Loading

0 comments on commit bca709b

Please sign in to comment.