Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial Commit: Add vultr support #110

Merged
merged 7 commits into from
Jan 8, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ dev = [
"pytest~=7.0",
"pytest-mock",
"ruff==0.5.3", # Should match .pre-commit-config.yaml
"requests-mock",
]

[tool.setuptools.dynamic]
Expand Down
5 changes: 5 additions & 0 deletions src/gpuhunt/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def main():
"runpod",
"tensordock",
"vastai",
"vultr",
],
)
parser.add_argument("--output", required=True)
Expand Down Expand Up @@ -82,6 +83,10 @@ def main():
from gpuhunt.providers.vastai import VastAIProvider

provider = VastAIProvider()
elif args.provider == "vultr":
from gpuhunt.providers.vultr import VultrProvider

provider = VultrProvider()
else:
exit(f"Unknown provider {args.provider}")

Expand Down
2 changes: 1 addition & 1 deletion src/gpuhunt/_internal/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
version_url = "https://dstack-gpu-pricing.s3.eu-west-1.amazonaws.com/v1/version"
catalog_url = "https://dstack-gpu-pricing.s3.eu-west-1.amazonaws.com/v1/{version}/catalog.zip"
OFFLINE_PROVIDERS = ["aws", "azure", "datacrunch", "gcp", "lambdalabs", "oci", "runpod"]
ONLINE_PROVIDERS = ["cudo", "tensordock", "vastai"]
ONLINE_PROVIDERS = ["cudo", "tensordock", "vastai", "vultr"]
jvstme marked this conversation as resolved.
Show resolved Hide resolved
RELOAD_INTERVAL = 15 * 60 # 15 minutes


Expand Down
1 change: 1 addition & 0 deletions src/gpuhunt/_internal/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def correct_gpu_memory_gib(gpu_name: str, memory_mib: float) -> int:

KNOWN_NVIDIA_GPUS: list[NvidiaGPUInfo] = [
NvidiaGPUInfo(name="A10", memory=24, compute_capability=(8, 6)),
NvidiaGPUInfo(name="A16", memory=16, compute_capability=(8, 6)),
NvidiaGPUInfo(name="A40", memory=48, compute_capability=(8, 6)),
NvidiaGPUInfo(name="A100", memory=40, compute_capability=(8, 0)),
NvidiaGPUInfo(name="A100", memory=80, compute_capability=(8, 0)),
Expand Down
1 change: 1 addition & 0 deletions src/gpuhunt/_internal/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def default_catalog() -> Catalog:
("gpuhunt.providers.tensordock", "TensorDockProvider"),
("gpuhunt.providers.vastai", "VastAIProvider"),
("gpuhunt.providers.cudo", "CudoProvider"),
("gpuhunt.providers.vultr", "VultrProvider"),
]:
try:
module = importlib.import_module(module)
Expand Down
179 changes: 179 additions & 0 deletions src/gpuhunt/providers/vultr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
import logging
from typing import Any, Optional

import requests
from requests import Response

from gpuhunt import QueryFilter, RawCatalogItem
from gpuhunt._internal.constraints import KNOWN_AMD_GPUS, KNOWN_NVIDIA_GPUS
from gpuhunt._internal.models import AcceleratorVendor
from gpuhunt.providers import AbstractProvider

logger = logging.getLogger(__name__)

API_URL = "https://api.vultr.com/v2"

EXCLUSION_LIST = ["GH200"]


class VultrProvider(AbstractProvider):
NAME = "vultr"

def get(
self, query_filter: Optional[QueryFilter] = None, balance_resources: bool = True
) -> list[RawCatalogItem]:
offers = fetch_offers()
return sorted(offers, key=lambda i: i.price)


def fetch_offers() -> Optional[list[RawCatalogItem]]:
"""Fetch plans with types:
1. Cloud GPU (vcg),
2. Bare Metal (vbm),
3. and other CPU plans, including:
Cloud Compute (vc2),
High Frequency Compute (vhf),
High Performance (vhp),
All optimized Cloud Types (voc)"""
bare_metal_plans_response = _make_request("GET", "/plans-metal?per_page=500")
other_plans_response = _make_request("GET", "/plans?type=all&per_page=500")
return convert_response_to_raw_catalog_items(bare_metal_plans_response, other_plans_response)


def convert_response_to_raw_catalog_items(
bare_metal_plans_response: Response, other_plans_response: Response
) -> list[RawCatalogItem]:
catalog_items = []

bare_metal_plans = bare_metal_plans_response.json()["plans_metal"]
other_plans = other_plans_response.json()["plans"]

for plan in bare_metal_plans:
for location in plan["locations"]:
catalog_item = get_bare_metal_plans(plan, location)
if catalog_item:
catalog_items.append(catalog_item)

for plan in other_plans:
for location in plan["locations"]:
catalog_item = get_instance_plans(plan, location)
if catalog_item:
catalog_items.append(catalog_item)

return catalog_items


def get_bare_metal_plans(plan: dict, location: str) -> Optional[RawCatalogItem]:
gpu_count, gpu_name, gpu_memory, gpu_vendor = 0, None, None, None
if "gpu" in plan["id"]:
if plan["id"] not in BARE_METAL_GPU_DETAILS:
logger.warning("Skipping unknown GPU plan %s", plan["id"])
return None
gpu_count, gpu_name, gpu_memory = BARE_METAL_GPU_DETAILS[plan["id"]]
if gpu_name in EXCLUSION_LIST:
return None
gpu_vendor = get_gpu_vendor(gpu_name)
if gpu_vendor is None:
logger.warning("Unknown GPU vendor for plan %s, skipping", plan["id"])
return None
return RawCatalogItem(
instance_name=plan["id"],
location=location,
price=plan["hourly_cost"],
cpu=plan["cpu_threads"],
memory=plan["ram"] / 1024,
gpu_count=gpu_count,
gpu_name=gpu_name,
gpu_memory=gpu_memory,
gpu_vendor=gpu_vendor,
spot=False,
disk_size=plan["disk"],
)


def get_instance_plans(plan: dict, location: str) -> Optional[RawCatalogItem]:
plan_type = plan["type"]
if plan_type in ["vc2", "vhf", "vhp", "voc"]:
return RawCatalogItem(
instance_name=plan["id"],
location=location,
price=plan["hourly_cost"],
cpu=plan["vcpu_count"],
memory=plan["ram"] / 1024,
gpu_count=0,
gpu_name=None,
gpu_memory=None,
gpu_vendor=None,
spot=False,
disk_size=plan["disk"],
)
elif plan_type == "vcg":
gpu_name = plan["gpu_type"].split("_")[1] if "_" in plan["gpu_type"] else None
if gpu_name in EXCLUSION_LIST:
logger.info(f"Excluding plan with GPU {gpu_name} as it is not supported.")
return None
gpu_vendor = get_gpu_vendor(gpu_name)
gpu_memory_gb = plan["gpu_vram_gb"]
gpu_count = (
max(1, gpu_memory_gb // get_gpu_memory(gpu_name)) if gpu_name else 0
) # For fractional GPU,
jvstme marked this conversation as resolved.
Show resolved Hide resolved
# gpu_count=1
return RawCatalogItem(
instance_name=plan["id"],
location=location,
price=plan["hourly_cost"],
cpu=plan["vcpu_count"],
memory=plan["ram"] / 1024,
gpu_count=gpu_count,
gpu_name=gpu_name,
gpu_memory=gpu_memory_gb / gpu_count,
gpu_vendor=gpu_vendor,
spot=False,
disk_size=plan["disk"],
)


def get_gpu_memory(gpu_name: str) -> float:
if gpu_name.upper() == "A100":
return 80 # VULTR A100 instances have 80GB
for gpu in KNOWN_NVIDIA_GPUS:
if gpu.name.upper() == gpu_name.upper():
return gpu.memory

for gpu in KNOWN_AMD_GPUS:
if gpu.name.upper() == gpu_name.upper():
return gpu.memory
logger.warning(f"Unknown GPU {gpu_name}")


def get_gpu_vendor(gpu_name: Optional[str]) -> Optional[str]:
if gpu_name is None:
return None
for gpu in KNOWN_NVIDIA_GPUS:
if gpu.name.upper() == gpu_name.upper():
return AcceleratorVendor.NVIDIA.value
for gpu in KNOWN_AMD_GPUS:
if gpu.name.upper() == gpu_name.upper():
return AcceleratorVendor.AMD.value
return None


def _make_request(method: str, path: str, data: Any = None) -> Response:
response = requests.request(
method=method,
url=API_URL + path,
json=data,
timeout=30,
)
response.raise_for_status()
return response


BARE_METAL_GPU_DETAILS = {
"vbm-48c-1024gb-4-a100-gpu": (4, "A100", 80),
"vbm-112c-2048gb-8-h100-gpu": (8, "H100", 80),
"vbm-112c-2048gb-8-a100-gpu": (8, "A100", 80),
"vbm-64c-2048gb-8-l40-gpu": (8, "L40S", 48),
"vbm-72c-480gb-gh200-gpu": (1, "GH200", 96),
"vbm-256c-2048gb-8-mi300x-gpu": (8, "MI300X", 192),
}
111 changes: 111 additions & 0 deletions src/tests/providers/test_vultr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import gpuhunt._internal.catalog as internal_catalog
from gpuhunt import Catalog
from gpuhunt.providers.vultr import VultrProvider, fetch_offers

bare_metal = {
"plans_metal": [
{
"id": "vbm-256c-2048gb-8-mi300x-gpu",
"physical_cpus": 2,
"cpu_count": 128,
"cpu_cores": 128,
"cpu_threads": 256,
"cpu_model": "EPYC 9534",
"cpu_mhz": 2450,
"ram": 2321924,
"disk": 3576,
"disk_count": 8,
"bandwidth": 10240,
"monthly_cost": 11773.44,
"hourly_cost": 17.52,
"monthly_cost_preemptible": 9891.84,
"hourly_cost_preemptible": 14.72,
"type": "NVMe",
"locations": ["ord"],
},
{
"id": "vbm-112c-2048gb-8-h100-gpu",
"physical_cpus": 2,
"cpu_count": 112,
"cpu_cores": 112,
"cpu_threads": 224,
"cpu_model": "Platinum 8480+",
"cpu_mhz": 2000,
"ram": 2097152,
"disk": 960,
"disk_count": 2,
"bandwidth": 15360,
"monthly_cost": 16074.24,
"hourly_cost": 23.92,
"monthly_cost_preemptible": 12364.8,
"hourly_cost_preemptible": 18.4,
"type": "NVMe",
"locations": ["sea"],
},
]
}

vm_instances = {
"plans": [
{
"id": "vcg-a100-1c-6g-4vram",
"vcpu_count": 1,
"ram": 6144,
"disk": 70,
"disk_count": 1,
"bandwidth": 1024,
"monthly_cost": 90,
"hourly_cost": 0.123,
"type": "vcg",
"locations": ["ewr"],
"gpu_vram_gb": 4,
"gpu_type": "NVIDIA_A100",
},
{
"id": "vcg-a100-12c-120g-80vram",
"vcpu_count": 12,
"ram": 122880,
"disk": 1400,
"disk_count": 1,
"bandwidth": 10240,
"monthly_cost": 1750,
"hourly_cost": 2.397,
"type": "vcg",
"locations": ["ewr"],
"gpu_vram_gb": 80,
"gpu_type": "NVIDIA_A100",
},
{
"id": "vcg-a100-6c-60g-40vram",
"vcpu_count": 12,
"ram": 61440,
"disk": 1400,
"disk_count": 1,
"bandwidth": 10240,
"monthly_cost": 800,
"hourly_cost": 1.397,
"type": "vcg",
"locations": ["ewr"],
"gpu_vram_gb": 40,
"gpu_type": "NVIDIA_A100",
},
]
}


def test_fetch_offers(requests_mock):
# Mocking the responses for the API endpoints
requests_mock.get("https://api.vultr.com/v2/plans-metal?per_page=500", json=bare_metal)
requests_mock.get("https://api.vultr.com/v2/plans?type=all&per_page=500", json=vm_instances)
jvstme marked this conversation as resolved.
Show resolved Hide resolved

# Fetch offers and verify results
assert len(fetch_offers()) == 5
catalog = Catalog(balance_resources=False, auto_reload=False)
vultr = VultrProvider()
internal_catalog.ONLINE_PROVIDERS = ["vultr"]
internal_catalog.OFFLINE_PROVIDERS = []
catalog.add_provider(vultr)
assert len(catalog.query(provider=["vultr"], min_gpu_count=1, max_gpu_count=1)) == 3
assert len(catalog.query(provider=["vultr"], min_gpu_memory=80, max_gpu_count=1)) == 1
assert len(catalog.query(provider=["vultr"], gpu_vendor="amd")) == 1
assert len(catalog.query(provider=["vultr"], gpu_name="MI300X")) == 1
Loading