Skip to content

Commit

Permalink
Initial Commit: Add vultr support
Browse files Browse the repository at this point in the history
  • Loading branch information
Bihan Rana authored and Bihan Rana committed Dec 19, 2024
1 parent cb61539 commit d4e5321
Show file tree
Hide file tree
Showing 6 changed files with 450 additions and 1 deletion.
5 changes: 5 additions & 0 deletions src/gpuhunt/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ def main():
"runpod",
"tensordock",
"vastai",
"vultr"
],
)
parser.add_argument("--output", required=True)
Expand Down Expand Up @@ -82,6 +83,10 @@ def main():
from gpuhunt.providers.vastai import VastAIProvider

provider = VastAIProvider()
elif args.provider == "vultr":
from gpuhunt.providers.vultr import VultrProvider

provider = VultrProvider()
else:
exit(f"Unknown provider {args.provider}")

Expand Down
2 changes: 1 addition & 1 deletion src/gpuhunt/_internal/catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
version_url = "https://dstack-gpu-pricing.s3.eu-west-1.amazonaws.com/v1/version"
catalog_url = "https://dstack-gpu-pricing.s3.eu-west-1.amazonaws.com/v1/{version}/catalog.zip"
OFFLINE_PROVIDERS = ["aws", "azure", "datacrunch", "gcp", "lambdalabs", "oci", "runpod"]
ONLINE_PROVIDERS = ["cudo", "tensordock", "vastai"]
ONLINE_PROVIDERS = ["cudo", "tensordock", "vastai", "vultr"]
RELOAD_INTERVAL = 15 * 60 # 15 minutes


Expand Down
1 change: 1 addition & 0 deletions src/gpuhunt/_internal/constraints.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ def correct_gpu_memory_gib(gpu_name: str, memory_mib: float) -> int:

KNOWN_NVIDIA_GPUS: list[NvidiaGPUInfo] = [
NvidiaGPUInfo(name="A10", memory=24, compute_capability=(8, 6)),
NvidiaGPUInfo(name="A16", memory=16, compute_capability=(8, 6)),
NvidiaGPUInfo(name="A40", memory=48, compute_capability=(8, 6)),
NvidiaGPUInfo(name="A100", memory=40, compute_capability=(8, 0)),
NvidiaGPUInfo(name="A100", memory=80, compute_capability=(8, 0)),
Expand Down
1 change: 1 addition & 0 deletions src/gpuhunt/_internal/default.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ def default_catalog() -> Catalog:
("gpuhunt.providers.tensordock", "TensorDockProvider"),
("gpuhunt.providers.vastai", "VastAIProvider"),
("gpuhunt.providers.cudo", "CudoProvider"),
("gpuhunt.providers.vultr", "VultrProvider"),
]:
try:
module = importlib.import_module(module)
Expand Down
197 changes: 197 additions & 0 deletions src/gpuhunt/providers/vultr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import json
from typing import Optional, List, Any
import logging
import requests
from gpuhunt import QueryFilter, RawCatalogItem
from gpuhunt.providers import AbstractProvider
from requests import Response

from gpuhunt._internal.constraints import KNOWN_NVIDIA_GPUS, KNOWN_AMD_GPUS

from gpuhunt._internal.models import AcceleratorVendor

logger = logging.getLogger(__name__)

API_URL = "https://api.vultr.com/v2"

EXCLUSION_LIST = ["GH200"]


class VultrProvider(AbstractProvider):
NAME = "vultr"

def get(self, query_filter: Optional[QueryFilter] = None, balance_resources: bool = True) -> List[RawCatalogItem]:
offers = fetch_offers()
return sorted(offers, key=lambda i: i.price) if offers is not None else []


def fetch_offers() -> Optional[List[RawCatalogItem]]:
"""Fetch plans with types:
1. Cloud GPU (vcg),
2. Bare Metal (vbm),
3. and others cpu plans, which includes:
Cloud Compute (vc2),
High Frequency Compute (vhf),
High Performance (vhp),
All optimized Cloud Types (voc)"""
try:
cloud_gpu_plans_response = _make_request("GET", "/plans?type=vcg")
bare_metal_plans_response = _make_request("GET", "/plans-metal")
other_plans_response = _make_request("GET", "/plans?type=all")
combined_response = {
"plans": (
cloud_gpu_plans_response.json().get("plans", []) +
other_plans_response.json().get("plans", [])
),
"plans_metal": bare_metal_plans_response.json().get("plans_metal", []),
}
return convert_response_to_raw_catalog_items(combined_response)
except requests.RequestException as e:
logger.error(f"Failed to fetch plans: {str(e)}")
return None


def convert_response_to_raw_catalog_items(response: dict) -> List[RawCatalogItem]:
catalog_items = []
plans = response.get("plans", []) + response.get("plans_metal", [])

for plan in plans:
for location in plan.get("locations", []):
if plan in response.get("plans_metal", []):
catalog_item = get_bare_metal_plans(plan, location)
else:
catalog_item = get_instance_plans(plan, location)
if catalog_item:
catalog_items.append(catalog_item)

return catalog_items


def get_bare_metal_plans(plan: dict, location: str) -> Optional[RawCatalogItem]:
gpu_name, gpu_count = extract_gpu_info_from_id(plan.get("id", ""))
if gpu_name in EXCLUSION_LIST:
logger.info(f"Excluding plan with GPU {gpu_name} as it is not supported.")
return None
gpu_memory = get_gpu_memory(gpu_name) * gpu_count if gpu_name else None
gpu_vendor = get_gpu_vendor(gpu_name)
return RawCatalogItem(
instance_name=plan.get("id"),
location=location,
price=round(plan.get("monthly_cost", 0) / 730, 2),
cpu=plan.get("cpu_count"),
memory=plan.get("ram", 0) / 1024,
gpu_count=gpu_count,
gpu_name=gpu_name,
gpu_memory=gpu_memory,
gpu_vendor=gpu_vendor,
spot=False,
disk_size=plan.get("disk", 0),
)


def get_instance_plans(plan: dict, location: str) -> Optional[RawCatalogItem]:
plan_type = plan.get("type", "")
if plan_type in ["vc2", "vhf", "vhp", "voc"]:
return RawCatalogItem(
instance_name=plan.get("id"),
location=location,
price=plan.get("hourly_cost", 0),
cpu=plan.get("vcpu_count"),
memory=plan.get("ram", 0) / 1024,
gpu_count=0,
gpu_name=None,
gpu_memory=None,
gpu_vendor=None,
spot=False,
disk_size=plan.get("disk", 0),
)
elif plan_type == "vcg":
gpu_name = plan.get("gpu_type", "").split("_")[1] if "_" in plan.get("gpu_type", "") else None
if gpu_name in EXCLUSION_LIST:
logger.info(f"Excluding plan with GPU {gpu_name} as it is not supported.")
return None
gpu_vendor = get_gpu_vendor(gpu_name)
gpu_memory_gb = plan.get("gpu_vram_gb", 0)
gpu_count = max(1, gpu_memory_gb // get_gpu_memory(gpu_name)) if gpu_name else 0 # For fractional GPU,
# gpu_count=1
return RawCatalogItem(
instance_name=plan.get("id"),
location=location,
price=plan.get("hourly_cost", 0),
cpu=plan.get("vcpu_count"),
memory=plan.get("ram", 0) / 1024,
gpu_count=gpu_count,
gpu_name=gpu_name,
gpu_memory=gpu_memory_gb,
gpu_vendor=gpu_vendor,
spot=False,
disk_size=plan.get("disk", 0),
)


def get_gpu_memory(gpu_name: str) -> float:
for gpu in KNOWN_NVIDIA_GPUS:
if gpu.name.upper() == gpu_name:
return gpu.memory
for gpu in KNOWN_AMD_GPUS:
if gpu.name.upper() == gpu_name:
return gpu.memory
logger.error(f"GPU {gpu_name} not found in known GPU lists.")
raise ValueError(f"GPU {gpu_name} not found.")


def get_gpu_vendor(gpu_name: Optional[str]) -> Optional[str]:
if gpu_name is None:
return None
for gpu in KNOWN_NVIDIA_GPUS:
if gpu.name.upper() == gpu_name:
return AcceleratorVendor.NVIDIA.value
for gpu in KNOWN_AMD_GPUS:
if gpu.name.upper() == gpu_name:
return AcceleratorVendor.AMD.value
return None


def extract_gpu_info_from_id(id_str: str):
parts = id_str.split('-')
if 'gpu' in parts:
gpu_name = parts[-2].upper()
try:
gpu_count = int(parts[-3])
except ValueError:
gpu_count = 1 # Default set to 1 if count is not explicitly specified,
# for instance in vbm-64c-2048gb-l40-gpu count is not specified but
# in vbm-64c-2048gb-8-l40-gpu count is specified as 8
return gpu_name, gpu_count
return None, 0


def _make_request(method: str, path: str, data: Any = None) -> Response:
try:
response = requests.request(
method=method,
url=API_URL + path,
json=data,
timeout=30,
)
response.raise_for_status()
return response

except requests.HTTPError as e:
status_code = e.response.status_code if e.response else None
if status_code == requests.codes.not_found:
logger.exception(f"Resource not found at {API_URL + path}.")
elif status_code == requests.codes.bad_request:
logger.exception(f"Bad request to {API_URL + path}. Check the request payload or parameters.")
elif status_code == requests.codes.forbidden:
logger.exception(f"Access forbidden to {API_URL + path}. Check API permissions.")
elif status_code == requests.codes.unauthorized:
logger.exception(
f"Unauthorized access to {API_URL + path}. Check API key or authentication details.")
else:
logger.exception(f"HTTP error {status_code} occurred when accessing {API_URL + path}.")

raise
except requests.RequestException as e:
logger.exception(f"Request error while accessing {API_URL + path}: {str(e)}")
raise
Loading

0 comments on commit d4e5321

Please sign in to comment.