Skip to content

Commit

Permalink
[Core][Serve] Hide execute api from sky.execution (#3058)
Browse files Browse the repository at this point in the history
* change to launch

* move constants back
  • Loading branch information
cblmemo authored Feb 1, 2024
1 parent b247870 commit 54e5bb0
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 23 deletions.
5 changes: 3 additions & 2 deletions sky/backends/backend_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from sky import exceptions
from sky import global_user_state
from sky import provision as provision_lib
from sky import serve as serve_lib
from sky import sky_logging
from sky import skypilot_config
from sky import status_lib
Expand Down Expand Up @@ -2525,7 +2526,7 @@ def get_task_demands_dict(task: 'task_lib.Task') -> Dict[str, float]:
# For sky serve controller task, we set the CPU resource to a smaller
# value to support a larger number of services.
resources_dict = {
'CPU': (constants.SERVICES_TASK_CPU_DEMAND
'CPU': (serve_lib.SERVICES_TASK_CPU_DEMAND
if task.service_name is not None else DEFAULT_TASK_CPU_DEMAND)
}
if task.best_resources is not None:
Expand All @@ -2546,7 +2547,7 @@ def get_task_resources_str(task: 'task_lib.Task') -> str:
The resources string is only used as a display purpose, so we only show
the accelerator demands (if any). Otherwise, the CPU demand is shown.
"""
task_cpu_demand = (constants.SERVICES_TASK_CPU_DEMAND if task.service_name
task_cpu_demand = (serve_lib.SERVICES_TASK_CPU_DEMAND if task.service_name
is not None else DEFAULT_TASK_CPU_DEMAND)
if task.best_resources is not None:
accelerator_dict = task.best_resources.accelerators
Expand Down
14 changes: 8 additions & 6 deletions sky/execution.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def _maybe_clone_disk_from_cluster(clone_disk_from: Optional[str],
return task


def execute(
def _execute(
entrypoint: Union['sky.Task', 'sky.Dag'],
dryrun: bool = False,
down: bool = False,
Expand Down Expand Up @@ -402,6 +402,7 @@ def launch(
# pylint: disable=invalid-name
_is_launched_by_spot_controller: bool = False,
_is_launched_by_sky_serve_controller: bool = False,
_disable_controller_check: bool = False,
) -> Tuple[Optional[int], Optional[backends.ResourceHandle]]:
# NOTE(dev): Keep the docstring consistent between the Python API and CLI.
"""Launch a cluster or task.
Expand Down Expand Up @@ -490,10 +491,11 @@ def launch(
if dryrun.
"""
entrypoint = task
controller_utils.check_cluster_name_not_controller(
cluster_name, operation_str='sky.launch')
if not _disable_controller_check:
controller_utils.check_cluster_name_not_controller(
cluster_name, operation_str='sky.launch')

return execute(
return _execute(
entrypoint=entrypoint,
dryrun=dryrun,
down=down,
Expand Down Expand Up @@ -590,7 +592,7 @@ def exec( # pylint: disable=redefined-builtin
operation='executing tasks',
check_cloud_vm_ray_backend=False,
dryrun=dryrun)
return execute(
return _execute(
entrypoint=entrypoint,
dryrun=dryrun,
down=down,
Expand Down Expand Up @@ -701,7 +703,7 @@ def spot_launch(
f'Launching managed spot job {dag.name!r} from spot controller...'
f'{colorama.Style.RESET_ALL}')
print('Launching spot controller...')
execute(
_execute(
entrypoint=controller_task,
stream_logs=stream_logs,
cluster_name=controller_name,
Expand Down
1 change: 1 addition & 0 deletions sky/serve/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from sky.serve.constants import ENDPOINT_PROBE_INTERVAL_SECONDS
from sky.serve.constants import INITIAL_VERSION
from sky.serve.constants import LB_CONTROLLER_SYNC_INTERVAL_SECONDS
from sky.serve.constants import SERVICES_TASK_CPU_DEMAND
from sky.serve.constants import SKYSERVE_METADATA_DIR
from sky.serve.core import down
from sky.serve.core import status
Expand Down
4 changes: 4 additions & 0 deletions sky/serve/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,10 @@
# do some log rotation.
CONTROLLER_RESOURCES = {'cpus': '4+', 'disk_size': 200}

# A default controller with 4 vCPU and 16 GB memory can run up to 16 services.
SERVICES_MEMORY_USAGE_GB = 1.0
SERVICES_TASK_CPU_DEMAND = 0.25

# A period of time to initialize your service. Any readiness probe failures
# during this period will be ignored.
DEFAULT_INITIAL_DELAY_SECONDS = 1200
Expand Down
9 changes: 3 additions & 6 deletions sky/serve/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,6 @@ def up(
task: sky.Task to serve up.
service_name: Name of the service.
"""
# This is to avoid circular import.
# pylint: disable=import-outside-toplevel
from sky import execution

if service_name is None:
service_name = serve_utils.generate_service_name()

Expand Down Expand Up @@ -163,14 +159,15 @@ def up(
# whether the service is already running. If the id is the same
# with the current job id, we know the service is up and running
# for the first time; otherwise it is a name conflict.
controller_job_id, controller_handle = execution.execute(
entrypoint=controller_task,
controller_job_id, controller_handle = sky.launch(
task=controller_task,
stream_logs=False,
cluster_name=controller_name,
detach_run=True,
idle_minutes_to_autostop=constants.
CONTROLLER_IDLE_MINUTES_TO_AUTOSTOP,
retry_until_up=True,
_disable_controller_check=True,
)

style = colorama.Style
Expand Down
4 changes: 1 addition & 3 deletions sky/serve/serve_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
from sky import status_lib
from sky.serve import constants
from sky.serve import serve_state
from sky.skylet import constants as skylet_constants
from sky.skylet import job_lib
from sky.utils import common_utils
from sky.utils import log_utils
Expand All @@ -38,8 +37,7 @@
SKY_SERVE_CONTROLLER_NAME: str = (
f'sky-serve-controller-{common_utils.get_user_hash()}')
_SYSTEM_MEMORY_GB = psutil.virtual_memory().total // (1024**3)
NUM_SERVICE_THRESHOLD = (_SYSTEM_MEMORY_GB //
skylet_constants.SERVICES_MEMORY_USAGE_GB)
NUM_SERVICE_THRESHOLD = _SYSTEM_MEMORY_GB // constants.SERVICES_MEMORY_USAGE_GB
_CONTROLLER_URL = 'http://localhost:{CONTROLLER_PORT}'

_SKYPILOT_PROVISION_LOG_PATTERN = r'.*tail -n100 -f (.*provision\.log).*'
Expand Down
6 changes: 0 additions & 6 deletions sky/skylet/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,12 +54,6 @@
# Port on the remote spot controller that the dashboard is running on.
SPOT_DASHBOARD_REMOTE_PORT = 5000

# A default controller with 4 vCPU and 16 GB memory can run up to 16 services.
# TODO(tian): This is to fix circular imports. Move this back to
# sky.serve.constants.
SERVICES_MEMORY_USAGE_GB = 1.0
SERVICES_TASK_CPU_DEMAND = 0.25

# Docker default options
DEFAULT_DOCKER_CONTAINER_NAME = 'sky_container'
DEFAULT_DOCKER_PORT = 10022
Expand Down

0 comments on commit 54e5bb0

Please sign in to comment.