From b2fd6ab284e3ce4c2a82f0b47eb5267e94638aa7 Mon Sep 17 00:00:00 2001 From: Egor Sklyarov Date: Mon, 29 Jan 2024 11:06:05 +0100 Subject: [PATCH] Normalize VastAI GPU memory (#33) --- src/gpuhunt/providers/tensordock.py | 1 + src/gpuhunt/providers/vastai.py | 17 ++++++++++++----- src/tests/providers/test_vastai.py | 9 +++++++++ 3 files changed, 22 insertions(+), 5 deletions(-) create mode 100644 src/tests/providers/test_vastai.py diff --git a/src/gpuhunt/providers/tensordock.py b/src/gpuhunt/providers/tensordock.py index 50ce0e9..eafd6da 100644 --- a/src/gpuhunt/providers/tensordock.py +++ b/src/gpuhunt/providers/tensordock.py @@ -80,6 +80,7 @@ def get( gpu_name=convert_gpu_name(gpu_name), gpu_memory=float(gpu["vram"]), spot=False, + disk_size=float(details["specs"]["storage"]["amount"]), ) ) return sorted(offers, key=lambda i: i.price) diff --git a/src/gpuhunt/providers/vastai.py b/src/gpuhunt/providers/vastai.py index d9155ae..bf88a88 100644 --- a/src/gpuhunt/providers/vastai.py +++ b/src/gpuhunt/providers/vastai.py @@ -5,6 +5,7 @@ import requests +from gpuhunt._internal.constraints import KNOWN_GPUS from gpuhunt._internal.models import QueryFilter, RawCatalogItem from gpuhunt.providers import AbstractProvider @@ -42,6 +43,7 @@ def get( logger.warning("Offer %s does not satisfy filters", offer["id"]) continue gpu_name = get_gpu_name(offer["gpu_name"]) + gpu_memory = normalize_gpu_memory(gpu_name, offer["gpu_ram"]) ondemand_offer = RawCatalogItem( instance_name=str(offer["id"]), location=get_location(offer["geolocation"]), @@ -59,7 +61,7 @@ def get( ), gpu_count=offer["num_gpus"], gpu_name=gpu_name, - gpu_memory=float(int(offer["gpu_ram"] / kilo)), + gpu_memory=float(gpu_memory), spot=False, disk_size=offer["disk_space"], ) @@ -86,10 +88,7 @@ def make_filters(q: QueryFilter) -> Dict[str, Dict[Operators, FilterValue]]: filters["num_gpus"]["gte"] = q.min_gpu_count if q.max_gpu_count is not None: filters["num_gpus"]["lte"] = q.max_gpu_count - if q.min_gpu_memory is not None: - filters["gpu_ram"]["gte"] = q.min_gpu_memory * kilo - if q.max_gpu_memory is not None: - filters["gpu_ram"]["lte"] = q.max_gpu_memory * kilo + # We cannot reliably filter by GPU memory, because it is not the same for a specific GPU model if q.min_disk_size is not None: filters["disk_space"]["gte"] = q.min_disk_size if q.max_disk_size is not None: @@ -131,6 +130,14 @@ def get_gpu_name(gpu_name: str) -> str: return gpu_name.replace(" ", "") +def normalize_gpu_memory(gpu_name: str, memory_mib: float) -> int: + known_memory = [gpu.memory for gpu in KNOWN_GPUS if gpu.name == gpu_name] + if known_memory: + # return the closest known value + return min(known_memory, key=lambda x: abs(x - memory_mib / kilo)) + return int(memory_mib / kilo) + + def get_location(location: Optional[str]) -> str: if location is None: return "" diff --git a/src/tests/providers/test_vastai.py b/src/tests/providers/test_vastai.py new file mode 100644 index 0000000..4be9f95 --- /dev/null +++ b/src/tests/providers/test_vastai.py @@ -0,0 +1,9 @@ +from gpuhunt.providers.vastai import kilo, normalize_gpu_memory + + +class TestGPU: + def test_normalize_known(self): + assert normalize_gpu_memory("A100", 78 * kilo) == 80 + + def test_normalize_unknown(self): + assert normalize_gpu_memory("X1000", 78 * kilo + 10) == 78