dstackai · jvstme · Jan 8, 2025 · Dec 19, 2024 · Dec 20, 2024 · Dec 20, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -57,6 +57,7 @@ dev = [
     "pytest~=7.0",
     "pytest-mock",
     "ruff==0.5.3",  # Should match .pre-commit-config.yaml
+    "requests-mock",
 ]
 
 [tool.setuptools.dynamic]

diff --git a/src/gpuhunt/__main__.py b/src/gpuhunt/__main__.py
@@ -21,6 +21,7 @@ def main():
             "runpod",
             "tensordock",
             "vastai",
+            "vultr",
         ],
     )
     parser.add_argument("--output", required=True)
@@ -82,6 +83,10 @@ def main():
         from gpuhunt.providers.vastai import VastAIProvider
 
         provider = VastAIProvider()
+    elif args.provider == "vultr":
+        from gpuhunt.providers.vultr import VultrProvider
+
+        provider = VultrProvider()
     else:
         exit(f"Unknown provider {args.provider}")
 

diff --git a/src/gpuhunt/_internal/catalog.py b/src/gpuhunt/_internal/catalog.py
@@ -19,7 +19,7 @@
 version_url = "https://dstack-gpu-pricing.s3.eu-west-1.amazonaws.com/v1/version"
 catalog_url = "https://dstack-gpu-pricing.s3.eu-west-1.amazonaws.com/v1/{version}/catalog.zip"
 OFFLINE_PROVIDERS = ["aws", "azure", "datacrunch", "gcp", "lambdalabs", "oci", "runpod"]
-ONLINE_PROVIDERS = ["cudo", "tensordock", "vastai"]
+ONLINE_PROVIDERS = ["cudo", "tensordock", "vastai", "vultr"]
 RELOAD_INTERVAL = 15 * 60  # 15 minutes
 
 

diff --git a/src/gpuhunt/_internal/constraints.py b/src/gpuhunt/_internal/constraints.py
@@ -138,6 +138,7 @@ def correct_gpu_memory_gib(gpu_name: str, memory_mib: float) -> int:
 
 KNOWN_NVIDIA_GPUS: list[NvidiaGPUInfo] = [
     NvidiaGPUInfo(name="A10", memory=24, compute_capability=(8, 6)),
+    NvidiaGPUInfo(name="A16", memory=16, compute_capability=(8, 6)),
     NvidiaGPUInfo(name="A40", memory=48, compute_capability=(8, 6)),
     NvidiaGPUInfo(name="A100", memory=40, compute_capability=(8, 0)),
     NvidiaGPUInfo(name="A100", memory=80, compute_capability=(8, 0)),

diff --git a/src/gpuhunt/_internal/default.py b/src/gpuhunt/_internal/default.py
@@ -22,6 +22,7 @@ def default_catalog() -> Catalog:
         ("gpuhunt.providers.tensordock", "TensorDockProvider"),
         ("gpuhunt.providers.vastai", "VastAIProvider"),
         ("gpuhunt.providers.cudo", "CudoProvider"),
+        ("gpuhunt.providers.vultr", "VultrProvider"),
     ]:
         try:
             module = importlib.import_module(module)

diff --git a/src/gpuhunt/providers/vultr.py b/src/gpuhunt/providers/vultr.py
@@ -0,0 +1,179 @@
+import logging
+from typing import Any, Optional
+
+import requests
+from requests import Response
+
+from gpuhunt import QueryFilter, RawCatalogItem
+from gpuhunt._internal.constraints import KNOWN_AMD_GPUS, KNOWN_NVIDIA_GPUS
+from gpuhunt._internal.models import AcceleratorVendor
+from gpuhunt.providers import AbstractProvider
+
+logger = logging.getLogger(__name__)
+
+API_URL = "https://api.vultr.com/v2"
+
+EXCLUSION_LIST = ["GH200"]
+
+
+class VultrProvider(AbstractProvider):
+    NAME = "vultr"
+
+    def get(
+        self, query_filter: Optional[QueryFilter] = None, balance_resources: bool = True
+    ) -> list[RawCatalogItem]:
+        offers = fetch_offers()
+        return sorted(offers, key=lambda i: i.price)
+
+
+def fetch_offers() -> Optional[list[RawCatalogItem]]:
+    """Fetch plans with types:
+    1. Cloud GPU (vcg),
+    2. Bare Metal (vbm),
+    3. and other CPU plans, including:
+        Cloud Compute (vc2),
+        High Frequency Compute (vhf),
+        High Performance (vhp),
+        All optimized Cloud Types (voc)"""
+    bare_metal_plans_response = _make_request("GET", "/plans-metal?per_page=500")
+    other_plans_response = _make_request("GET", "/plans?type=all&per_page=500")
+    return convert_response_to_raw_catalog_items(bare_metal_plans_response, other_plans_response)
+
+
+def convert_response_to_raw_catalog_items(
+    bare_metal_plans_response: Response, other_plans_response: Response
+) -> list[RawCatalogItem]:
+    catalog_items = []
+
+    bare_metal_plans = bare_metal_plans_response.json()["plans_metal"]
+    other_plans = other_plans_response.json()["plans"]
+
+    for plan in bare_metal_plans:
+        for location in plan["locations"]:
+            catalog_item = get_bare_metal_plans(plan, location)
+            if catalog_item:
+                catalog_items.append(catalog_item)
+
+    for plan in other_plans:
+        for location in plan["locations"]:
+            catalog_item = get_instance_plans(plan, location)
+            if catalog_item:
+                catalog_items.append(catalog_item)
+
+    return catalog_items
+
+
+def get_bare_metal_plans(plan: dict, location: str) -> Optional[RawCatalogItem]:
+    gpu_count, gpu_name, gpu_memory, gpu_vendor = 0, None, None, None
+    if "gpu" in plan["id"]:
+        if plan["id"] not in BARE_METAL_GPU_DETAILS:
+            logger.warning("Skipping unknown GPU plan %s", plan["id"])
+            return None
+        gpu_count, gpu_name, gpu_memory = BARE_METAL_GPU_DETAILS[plan["id"]]
+        if gpu_name in EXCLUSION_LIST:
+            return None
+        gpu_vendor = get_gpu_vendor(gpu_name)
+        if gpu_vendor is None:
+            logger.warning("Unknown GPU vendor for plan %s, skipping", plan["id"])
+            return None
+    return RawCatalogItem(
+        instance_name=plan["id"],
+        location=location,
+        price=plan["hourly_cost"],
+        cpu=plan["cpu_threads"],
+        memory=plan["ram"] / 1024,
+        gpu_count=gpu_count,
+        gpu_name=gpu_name,
+        gpu_memory=gpu_memory,
+        gpu_vendor=gpu_vendor,
+        spot=False,
+        disk_size=plan["disk"],
+    )
+
+
+def get_instance_plans(plan: dict, location: str) -> Optional[RawCatalogItem]:
+    plan_type = plan["type"]
+    if plan_type in ["vc2", "vhf", "vhp", "voc"]:
+        return RawCatalogItem(
+            instance_name=plan["id"],
+            location=location,
+            price=plan["hourly_cost"],
+            cpu=plan["vcpu_count"],
+            memory=plan["ram"] / 1024,
+            gpu_count=0,
+            gpu_name=None,
+            gpu_memory=None,
+            gpu_vendor=None,
+            spot=False,
+            disk_size=plan["disk"],
+        )
+    elif plan_type == "vcg":
+        gpu_name = plan["gpu_type"].split("_")[1] if "_" in plan["gpu_type"] else None
+        if gpu_name in EXCLUSION_LIST:
+            logger.info(f"Excluding plan with GPU {gpu_name} as it is not supported.")
+            return None
+        gpu_vendor = get_gpu_vendor(gpu_name)
+        gpu_memory_gb = plan["gpu_vram_gb"]
+        gpu_count = (
+            max(1, gpu_memory_gb // get_gpu_memory(gpu_name)) if gpu_name else 0
+        )  # For fractional GPU,
+        # gpu_count=1
+        return RawCatalogItem(
+            instance_name=plan["id"],
+            location=location,
+            price=plan["hourly_cost"],
+            cpu=plan["vcpu_count"],
+            memory=plan["ram"] / 1024,
+            gpu_count=gpu_count,
+            gpu_name=gpu_name,
+            gpu_memory=gpu_memory_gb / gpu_count,
+            gpu_vendor=gpu_vendor,
+            spot=False,
+            disk_size=plan["disk"],
+        )
+
+
+def get_gpu_memory(gpu_name: str) -> float:
+    if gpu_name.upper() == "A100":
+        return 80  # VULTR A100 instances have 80GB
+    for gpu in KNOWN_NVIDIA_GPUS:
+        if gpu.name.upper() == gpu_name.upper():
+            return gpu.memory
+
+    for gpu in KNOWN_AMD_GPUS:
+        if gpu.name.upper() == gpu_name.upper():
+            return gpu.memory
+    logger.warning(f"Unknown GPU {gpu_name}")
+
+
+def get_gpu_vendor(gpu_name: Optional[str]) -> Optional[str]:
+    if gpu_name is None:
+        return None
+    for gpu in KNOWN_NVIDIA_GPUS:
+        if gpu.name.upper() == gpu_name.upper():
+            return AcceleratorVendor.NVIDIA.value
+    for gpu in KNOWN_AMD_GPUS:
+        if gpu.name.upper() == gpu_name.upper():
+            return AcceleratorVendor.AMD.value
+    return None
+
+
+def _make_request(method: str, path: str, data: Any = None) -> Response:
+    response = requests.request(
+        method=method,
+        url=API_URL + path,
+        json=data,
+        timeout=30,
+    )
+    response.raise_for_status()
+    return response
+
+
+BARE_METAL_GPU_DETAILS = {
+    "vbm-48c-1024gb-4-a100-gpu": (4, "A100", 80),
+    "vbm-112c-2048gb-8-h100-gpu": (8, "H100", 80),
+    "vbm-112c-2048gb-8-a100-gpu": (8, "A100", 80),
+    "vbm-64c-2048gb-8-l40-gpu": (8, "L40S", 48),
+    "vbm-72c-480gb-gh200-gpu": (1, "GH200", 96),
+    "vbm-256c-2048gb-8-mi300x-gpu": (8, "MI300X", 192),
+}
diff --git a/src/tests/providers/test_vultr.py b/src/tests/providers/test_vultr.py
@@ -0,0 +1,111 @@
+import gpuhunt._internal.catalog as internal_catalog
+from gpuhunt import Catalog
+from gpuhunt.providers.vultr import VultrProvider, fetch_offers
+
+bare_metal = {
+    "plans_metal": [
+        {
+            "id": "vbm-256c-2048gb-8-mi300x-gpu",
+            "physical_cpus": 2,
+            "cpu_count": 128,
+            "cpu_cores": 128,
+            "cpu_threads": 256,
+            "cpu_model": "EPYC 9534",
+            "cpu_mhz": 2450,
+            "ram": 2321924,
+            "disk": 3576,
+            "disk_count": 8,
+            "bandwidth": 10240,
+            "monthly_cost": 11773.44,
+            "hourly_cost": 17.52,
+            "monthly_cost_preemptible": 9891.84,
+            "hourly_cost_preemptible": 14.72,
+            "type": "NVMe",
+            "locations": ["ord"],
+        },
+        {
+            "id": "vbm-112c-2048gb-8-h100-gpu",
+            "physical_cpus": 2,
+            "cpu_count": 112,
+            "cpu_cores": 112,
+            "cpu_threads": 224,
+            "cpu_model": "Platinum 8480+",
+            "cpu_mhz": 2000,
+            "ram": 2097152,
+            "disk": 960,
+            "disk_count": 2,
+            "bandwidth": 15360,
+            "monthly_cost": 16074.24,
+            "hourly_cost": 23.92,
+            "monthly_cost_preemptible": 12364.8,
+            "hourly_cost_preemptible": 18.4,
+            "type": "NVMe",
+            "locations": ["sea"],
+        },
+    ]
+}
+
+vm_instances = {
+    "plans": [
+        {
+            "id": "vcg-a100-1c-6g-4vram",
+            "vcpu_count": 1,
+            "ram": 6144,
+            "disk": 70,
+            "disk_count": 1,
+            "bandwidth": 1024,
+            "monthly_cost": 90,
+            "hourly_cost": 0.123,
+            "type": "vcg",
+            "locations": ["ewr"],
+            "gpu_vram_gb": 4,
+            "gpu_type": "NVIDIA_A100",
+        },
+        {
+            "id": "vcg-a100-12c-120g-80vram",
+            "vcpu_count": 12,
+            "ram": 122880,
+            "disk": 1400,
+            "disk_count": 1,
+            "bandwidth": 10240,
+            "monthly_cost": 1750,
+            "hourly_cost": 2.397,
+            "type": "vcg",
+            "locations": ["ewr"],
+            "gpu_vram_gb": 80,
+            "gpu_type": "NVIDIA_A100",
+        },
+        {
+            "id": "vcg-a100-6c-60g-40vram",
+            "vcpu_count": 12,
+            "ram": 61440,
+            "disk": 1400,
+            "disk_count": 1,
+            "bandwidth": 10240,
+            "monthly_cost": 800,
+            "hourly_cost": 1.397,
+            "type": "vcg",
+            "locations": ["ewr"],
+            "gpu_vram_gb": 40,
+            "gpu_type": "NVIDIA_A100",
+        },
+    ]
+}
+
+
+def test_fetch_offers(requests_mock):
+    # Mocking the responses for the API endpoints
+    requests_mock.get("https://api.vultr.com/v2/plans-metal?per_page=500", json=bare_metal)
+    requests_mock.get("https://api.vultr.com/v2/plans?type=all&per_page=500", json=vm_instances)
+
+    # Fetch offers and verify results
+    assert len(fetch_offers()) == 5
+    catalog = Catalog(balance_resources=False, auto_reload=False)
+    vultr = VultrProvider()
+    internal_catalog.ONLINE_PROVIDERS = ["vultr"]
+    internal_catalog.OFFLINE_PROVIDERS = []
+    catalog.add_provider(vultr)
+    assert len(catalog.query(provider=["vultr"], min_gpu_count=1, max_gpu_count=1)) == 3
+    assert len(catalog.query(provider=["vultr"], min_gpu_memory=80, max_gpu_count=1)) == 1
+    assert len(catalog.query(provider=["vultr"], gpu_vendor="amd")) == 1
+    assert len(catalog.query(provider=["vultr"], gpu_name="MI300X")) == 1