Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Catalog] Fix lambda fetcher #3801

Merged
merged 5 commits into from
Aug 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 38 additions & 23 deletions sky/clouds/service_catalog/data_fetchers/fetch_lambda_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import csv
import json
import os
from typing import Optional, Tuple

import requests

Expand Down Expand Up @@ -43,18 +44,24 @@
'RTX6000': 24576,
'V100': 16384,
'H100': 81920,
'GENERAL': None
}


def name_to_gpu(name: str) -> str:
def name_to_gpu_and_cnt(name: str) -> Optional[Tuple[str, int]]:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

could we add a comment for the expected format of the name?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added. Thanks!

"""Extract GPU and count from instance type name.

The instance type name is in the format:
'gpu_{gpu_count}x_{gpu_name}_<suffix>'.
"""
# Edge case
if name == 'gpu_8x_a100_80gb_sxm4':
return 'A100-80GB'
return name.split('_')[2].upper()


def name_to_gpu_cnt(name: str) -> int:
return int(name.split('_')[1].replace('x', ''))
return 'A100-80GB', 8
gpu = name.split('_')[2].upper()
if gpu == 'GENERAL':
return None
gpu_cnt = int(name.split('_')[1].replace('x', ''))
return gpu, gpu_cnt


def create_catalog(api_key: str, output_path: str) -> None:
Expand All @@ -71,24 +78,32 @@ def create_catalog(api_key: str, output_path: str) -> None:
# We parse info.keys() in reverse order so gpu_1x_a100_sxm4 comes before
# gpu_1x_a100 in the catalog (gpu_1x_a100_sxm4 has more availability).
for vm in reversed(list(info.keys())):
gpu = name_to_gpu(vm)
gpu_cnt = float(name_to_gpu_cnt(vm))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name_to_gpu_cnt function is not used and safe to delete?

gpu_and_cnt = name_to_gpu_and_cnt(vm)
gpu: Optional[str]
gpu_cnt: Optional[float]
if gpu_and_cnt is None:
gpu, gpu_cnt = None, None
else:
gpu = gpu_and_cnt[0]
gpu_cnt = float(gpu_and_cnt[1])
vcpus = float(info[vm]['instance_type']['specs']['vcpus'])
mem = float(info[vm]['instance_type']['specs']['memory_gib'])
price = float(info[vm]['instance_type']\
['price_cents_per_hour']) / 100
gpuinfo = {
'Gpus': [{
'Name': gpu,
'Manufacturer': 'NVIDIA',
'Count': gpu_cnt,
'MemoryInfo': {
'SizeInMiB': GPU_TO_MEMORY[gpu]
},
}],
'TotalGpuMemoryInMiB': GPU_TO_MEMORY[gpu]
}
gpuinfo = json.dumps(gpuinfo).replace('"', "'") # pylint: disable=invalid-string-quote
price = (float(info[vm]['instance_type']['price_cents_per_hour']) /
100)
gpuinfo: Optional[str] = None
if gpu is not None:
gpuinfo_dict = {
'Gpus': [{
'Name': gpu,
'Manufacturer': 'NVIDIA',
'Count': gpu_cnt,
'MemoryInfo': {
'SizeInMiB': GPU_TO_MEMORY[gpu]
},
}],
'TotalGpuMemoryInMiB': GPU_TO_MEMORY[gpu]
}
gpuinfo = json.dumps(gpuinfo_dict).replace('"', "'") # pylint: disable=invalid-string-quote
for r in REGIONS:
writer.writerow(
[vm, gpu, gpu_cnt, vcpus, mem, price, r, gpuinfo, ''])
Expand Down
7 changes: 6 additions & 1 deletion sky/clouds/service_catalog/lambda_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,12 @@
if typing.TYPE_CHECKING:
from sky.clouds import cloud

_df = common.read_catalog('lambda/vms.csv')
# Keep it synced with the frequency in
# skypilot-catalog/.github/workflows/update-lambda-catalog.yml
_PULL_FREQUENCY_HOURS = 7

_df = common.read_catalog('lambda/vms.csv',
pull_frequency_hours=_PULL_FREQUENCY_HOURS)

# Number of vCPUS for gpu_1x_a10
_DEFAULT_NUM_VCPUS = 30
Expand Down
Loading