Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial Commit: Add vultr support #110

Merged
merged 7 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ dev = [
"pytest~=7.0",
"pytest-mock",
"ruff==0.5.3", # Should match .pre-commit-config.yaml
"requests-mock",
]

[tool.setuptools.dynamic]
Expand Down
5 changes: 5 additions & 0 deletions src/gpuhunt/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def main():
"runpod",
"tensordock",
"vastai",
"vultr",
],
)
parser.add_argument("--output", required=True)
Expand Down Expand Up @@ -82,6 +83,10 @@ def main():
from gpuhunt.providers.vastai import VastAIProvider

provider = VastAIProvider()
elif args.provider == "vultr":
from gpuhunt.providers.vultr import VultrProvider

provider = VultrProvider()
else:
exit(f"Unknown provider {args.provider}")

Expand Down
2 changes: 1 addition & 1 deletion src/gpuhunt/_internal/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
version_url = "https://dstack-gpu-pricing.s3.eu-west-1.amazonaws.com/v1/version"
catalog_url = "https://dstack-gpu-pricing.s3.eu-west-1.amazonaws.com/v1/{version}/catalog.zip"
OFFLINE_PROVIDERS = ["aws", "azure", "datacrunch", "gcp", "lambdalabs", "oci", "runpod"]
ONLINE_PROVIDERS = ["cudo", "tensordock", "vastai"]
ONLINE_PROVIDERS = ["cudo", "tensordock", "vastai", "vultr"]
jvstme marked this conversation as resolved.
Show resolved Hide resolved
RELOAD_INTERVAL = 15 * 60 # 15 minutes


Expand Down
1 change: 1 addition & 0 deletions src/gpuhunt/_internal/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def correct_gpu_memory_gib(gpu_name: str, memory_mib: float) -> int:

KNOWN_NVIDIA_GPUS: list[NvidiaGPUInfo] = [
NvidiaGPUInfo(name="A10", memory=24, compute_capability=(8, 6)),
NvidiaGPUInfo(name="A16", memory=16, compute_capability=(8, 6)),
NvidiaGPUInfo(name="A40", memory=48, compute_capability=(8, 6)),
NvidiaGPUInfo(name="A100", memory=40, compute_capability=(8, 0)),
NvidiaGPUInfo(name="A100", memory=80, compute_capability=(8, 0)),
Expand Down
1 change: 1 addition & 0 deletions src/gpuhunt/_internal/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def default_catalog() -> Catalog:
("gpuhunt.providers.tensordock", "TensorDockProvider"),
("gpuhunt.providers.vastai", "VastAIProvider"),
("gpuhunt.providers.cudo", "CudoProvider"),
("gpuhunt.providers.vultr", "VultrProvider"),
]:
try:
module = importlib.import_module(module)
Expand Down
182 changes: 182 additions & 0 deletions src/gpuhunt/providers/vultr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
import logging
from typing import Any, Optional

import requests
from requests import Response

from gpuhunt import QueryFilter, RawCatalogItem
from gpuhunt._internal.constraints import KNOWN_AMD_GPUS, KNOWN_NVIDIA_GPUS
from gpuhunt._internal.models import AcceleratorVendor
from gpuhunt.providers import AbstractProvider

logger = logging.getLogger(__name__)

API_URL = "https://api.vultr.com/v2"

EXCLUSION_LIST = ["GH200"]


class VultrProvider(AbstractProvider):
NAME = "vultr"

def get(
self, query_filter: Optional[QueryFilter] = None, balance_resources: bool = True
) -> list[RawCatalogItem]:
offers = fetch_offers()
return sorted(offers, key=lambda i: i.price)


def fetch_offers() -> Optional[list[RawCatalogItem]]:
"""Fetch plans with types:
1. Cloud GPU (vcg),
2. Bare Metal (vbm),
3. and other CPU plans, including:
Cloud Compute (vc2),
High Frequency Compute (vhf),
High Performance (vhp),
All optimized Cloud Types (voc)"""
bare_metal_plans_response = _make_request("GET", "/plans-metal?per_page=500")
other_plans_response = _make_request("GET", "/plans?type=all&per_page=500")
return convert_response_to_raw_catalog_items(bare_metal_plans_response, other_plans_response)


def convert_response_to_raw_catalog_items(
bare_metal_plans_response: Response, other_plans_response: Response
) -> list[RawCatalogItem]:
catalog_items = []

bare_metal_plans = bare_metal_plans_response.json()["plans_metal"]
other_plans = other_plans_response.json()["plans"]

for plan in bare_metal_plans:
for location in plan["locations"]:
catalog_item = get_bare_metal_plans(plan, location)
if catalog_item:
catalog_items.append(catalog_item)

for plan in other_plans:
for location in plan["locations"]:
catalog_item = get_instance_plans(plan, location)
if catalog_item:
catalog_items.append(catalog_item)

return catalog_items


def get_bare_metal_plans(plan: dict, location: str) -> Optional[RawCatalogItem]:
gpu_details = BARE_METAL_GPU_DETAILS.get(plan["id"], None)
return RawCatalogItem(
instance_name=plan["id"],
location=location,
price=plan["hourly_cost"],
cpu=plan["cpu_threads"],
memory=plan["ram"] / 1024,
gpu_count=gpu_details[0] if gpu_details else 0,
gpu_name=gpu_details[1] if gpu_details else None,
gpu_memory=gpu_details[2] if gpu_details else None,
gpu_vendor=get_gpu_vendor(gpu_details[1]) if gpu_details else None,
spot=False,
disk_size=plan["disk"],
)
jvstme marked this conversation as resolved.
Show resolved Hide resolved


def get_instance_plans(plan: dict, location: str) -> Optional[RawCatalogItem]:
plan_type = plan["type"]
if plan_type in ["vc2", "vhf", "vhp", "voc"]:
return RawCatalogItem(
instance_name=plan["id"],
location=location,
price=plan["hourly_cost"],
cpu=plan["vcpu_count"],
memory=plan["ram"] / 1024,
gpu_count=0,
gpu_name=None,
gpu_memory=None,
gpu_vendor=None,
spot=False,
disk_size=plan["disk"],
)
elif plan_type == "vcg":
gpu_name = plan["gpu_type"].split("_")[1] if "_" in plan["gpu_type"] else None
if gpu_name in EXCLUSION_LIST:
logger.info(f"Excluding plan with GPU {gpu_name} as it is not supported.")
return None
gpu_vendor = get_gpu_vendor(gpu_name)
gpu_memory_gb = plan["gpu_vram_gb"]
gpu_count = (
max(1, gpu_memory_gb // get_gpu_memory(gpu_name)) if gpu_name else 0
) # For fractional GPU,
jvstme marked this conversation as resolved.
Show resolved Hide resolved
# gpu_count=1
return RawCatalogItem(
instance_name=plan["id"],
location=location,
price=plan["hourly_cost"],
cpu=plan["vcpu_count"],
memory=plan["ram"] / 1024,
gpu_count=gpu_count,
gpu_name=gpu_name,
gpu_memory=gpu_memory_gb / gpu_count,
gpu_vendor=gpu_vendor,
spot=False,
disk_size=plan["disk"],
)


def get_gpu_memory(gpu_name: str) -> float:
if gpu_name.upper() == "A100":
return 80 # VULTR A100 instances have 80GB
for gpu in KNOWN_NVIDIA_GPUS:
if gpu.name.upper() == gpu_name.upper():
return gpu.memory

for gpu in KNOWN_AMD_GPUS:
if gpu.name.upper() == gpu_name.upper():
return gpu.memory
logger.warning(f"Unknown GPU {gpu_name}")


def get_gpu_vendor(gpu_name: Optional[str]) -> Optional[str]:
if gpu_name is None:
return None
for gpu in KNOWN_NVIDIA_GPUS:
if gpu.name.upper() == gpu_name.upper():
return AcceleratorVendor.NVIDIA.value
for gpu in KNOWN_AMD_GPUS:
if gpu.name.upper() == gpu_name.upper():
return AcceleratorVendor.AMD.value
return None


def extract_gpu_info_from_id(id_str: str):
parts = id_str.split("-")
if "gpu" in parts:
gpu_name = parts[-2].upper()
try:
gpu_count = int(parts[-3])
except ValueError:
gpu_count = 1 # Default set to 1 if count is not explicitly specified,
# for instance in vbm-64c-2048gb-l40-gpu count is not specified but
# in vbm-64c-2048gb-8-l40-gpu count is specified as 8
return gpu_name, gpu_count
return None, 0
jvstme marked this conversation as resolved.
Show resolved Hide resolved
jvstme marked this conversation as resolved.
Show resolved Hide resolved


def _make_request(method: str, path: str, data: Any = None) -> Response:
response = requests.request(
method=method,
url=API_URL + path,
json=data,
timeout=30,
)
response.raise_for_status()
return response


BARE_METAL_GPU_DETAILS = {
"vbm-48c-1024gb-4-a100-gpu": (4, "A100", 80),
"vbm-112c-2048gb-8-h100-gpu": (8, "H100", 80),
"vbm-112c-2048gb-8-a100-gpu": (8, "A100", 80),
"vbm-64c-2048gb-8-l40-gpu": (8, "L40S", 48),
"vbm-72c-480gb-gh200-gpu": (1, "GH200", 480),
jvstme marked this conversation as resolved.
Show resolved Hide resolved
"vbm-256c-2048gb-8-mi300x-gpu": (8, "MI300X", 192),
}
111 changes: 111 additions & 0 deletions src/tests/providers/test_vultr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import gpuhunt._internal.catalog as internal_catalog
from gpuhunt import Catalog
from gpuhunt.providers.vultr import VultrProvider, fetch_offers

bare_metal = {
"plans_metal": [
{
"id": "vbm-256c-2048gb-8-mi300x-gpu",
"physical_cpus": 2,
"cpu_count": 128,
"cpu_cores": 128,
"cpu_threads": 256,
"cpu_model": "EPYC 9534",
"cpu_mhz": 2450,
"ram": 2321924,
"disk": 3576,
"disk_count": 8,
"bandwidth": 10240,
"monthly_cost": 11773.44,
"hourly_cost": 17.52,
"monthly_cost_preemptible": 9891.84,
"hourly_cost_preemptible": 14.72,
"type": "NVMe",
"locations": ["ord"],
},
{
"id": "vbm-112c-2048gb-8-h100-gpu",
"physical_cpus": 2,
"cpu_count": 112,
"cpu_cores": 112,
"cpu_threads": 224,
"cpu_model": "Platinum 8480+",
"cpu_mhz": 2000,
"ram": 2097152,
"disk": 960,
"disk_count": 2,
"bandwidth": 15360,
"monthly_cost": 16074.24,
"hourly_cost": 23.92,
"monthly_cost_preemptible": 12364.8,
"hourly_cost_preemptible": 18.4,
"type": "NVMe",
"locations": ["sea"],
},
]
}

vm_instances = {
"plans": [
{
"id": "vcg-a100-1c-6g-4vram",
"vcpu_count": 1,
"ram": 6144,
"disk": 70,
"disk_count": 1,
"bandwidth": 1024,
"monthly_cost": 90,
"hourly_cost": 0.123,
"type": "vcg",
"locations": ["ewr"],
"gpu_vram_gb": 4,
"gpu_type": "NVIDIA_A100",
},
{
"id": "vcg-a100-12c-120g-80vram",
"vcpu_count": 12,
"ram": 122880,
"disk": 1400,
"disk_count": 1,
"bandwidth": 10240,
"monthly_cost": 1750,
"hourly_cost": 2.397,
"type": "vcg",
"locations": ["ewr"],
"gpu_vram_gb": 80,
"gpu_type": "NVIDIA_A100",
},
{
"id": "vcg-a100-6c-60g-40vram",
"vcpu_count": 12,
"ram": 61440,
"disk": 1400,
"disk_count": 1,
"bandwidth": 10240,
"monthly_cost": 800,
"hourly_cost": 1.397,
"type": "vcg",
"locations": ["ewr"],
"gpu_vram_gb": 40,
"gpu_type": "NVIDIA_A100",
},
]
}


def test_fetch_offers(requests_mock):
# Mocking the responses for the API endpoints
requests_mock.get("https://api.vultr.com/v2/plans-metal?per_page=500", json=bare_metal)
requests_mock.get("https://api.vultr.com/v2/plans?type=all&per_page=500", json=vm_instances)
jvstme marked this conversation as resolved.
Show resolved Hide resolved

# Fetch offers and verify results
assert len(fetch_offers()) == 5
catalog = Catalog(balance_resources=False, auto_reload=False)
vultr = VultrProvider()
internal_catalog.ONLINE_PROVIDERS = ["vultr"]
internal_catalog.OFFLINE_PROVIDERS = []
catalog.add_provider(vultr)
assert len(catalog.query(provider=["vultr"], min_gpu_count=1, max_gpu_count=1)) == 3
assert len(catalog.query(provider=["vultr"], min_gpu_memory=80, max_gpu_count=1)) == 1
assert len(catalog.query(provider=["vultr"], gpu_vendor="amd")) == 1
assert len(catalog.query(provider=["vultr"], gpu_name="MI300X")) == 1
Loading