Skip to content

Commit

Permalink
Add Vultr Support (#2132)
Browse files Browse the repository at this point in the history
Co-authored-by: Bihan  Rana <[email protected]>
  • Loading branch information
Bihan and Bihan Rana authored Jan 13, 2025
1 parent d8899ae commit e41a29a
Show file tree
Hide file tree
Showing 16 changed files with 513 additions and 1 deletion.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def get_long_description():
"python-multipart>=0.0.16",
"filelock",
"psutil",
"gpuhunt>=0.0.17,<0.1.0",
"gpuhunt>=0.0.18,<0.1.0",
]

GATEWAY_AND_SERVER_COMMON_DEPS = [
Expand Down
1 change: 1 addition & 0 deletions src/dstack/_internal/core/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
BackendType.LAMBDA,
BackendType.OCI,
BackendType.TENSORDOCK,
BackendType.VULTR,
]
BACKENDS_WITH_PLACEMENT_GROUPS_SUPPORT = [
BackendType.AWS,
Expand Down
15 changes: 15 additions & 0 deletions src/dstack/_internal/core/backends/vultr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from dstack._internal.core.backends.base import Backend
from dstack._internal.core.backends.vultr.compute import VultrCompute
from dstack._internal.core.backends.vultr.config import VultrConfig
from dstack._internal.core.models.backends.base import BackendType


class VultrBackend(Backend):
TYPE: BackendType = BackendType.VULTR

def __init__(self, config: VultrConfig):
self.config = config
self._compute = VultrCompute(self.config)

def compute(self) -> VultrCompute:
return self._compute
99 changes: 99 additions & 0 deletions src/dstack/_internal/core/backends/vultr/api_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import base64
from typing import Any

import requests
from requests import Response

from dstack._internal.core.errors import BackendError, BackendInvalidCredentialsError

API_URL = "https://api.vultr.com/v2"


class VultrApiClient:
def __init__(self, api_key: str):
self.api_key = api_key

def validate_api_key(self) -> bool:
try:
self._make_request("GET", "/ssh-keys")
except BackendInvalidCredentialsError:
return False
return True

def get_instance(self, instance_id: str, plan_type: str):
if plan_type == "bare-metal":
response = self._make_request("GET", f"/bare-metals/{instance_id}")
return response.json()["bare_metal"]
else:
response = self._make_request("GET", f"/instances/{instance_id}")
return response.json()["instance"]

def launch_instance(self, region: str, plan: str, label: str, user_data: str):
# For Bare-metals
if "vbm" in plan:
# "Docker on Ubuntu 22.04" is required for bare-metals.
data = {
"region": region,
"plan": plan,
"label": label,
"image_id": "docker",
"user_data": base64.b64encode(user_data.encode()).decode(),
}
resp = self._make_request("POST", "/bare-metals", data)
return resp.json()["bare_metal"]["id"]
# For VMs
elif "vcg" in plan:
# Ubuntu 22.04 will be installed. For gpu VMs, docker is preinstalled.
data = {
"region": region,
"plan": plan,
"label": label,
"os_id": 1743,
"user_data": base64.b64encode(user_data.encode()).decode(),
}
resp = self._make_request("POST", "/instances", data)
return resp.json()["instance"]["id"]
else:
data = {
"region": region,
"plan": plan,
"label": label,
"image_id": "docker",
"user_data": base64.b64encode(user_data.encode()).decode(),
}
resp = self._make_request("POST", "/instances", data)
return resp.json()["instance"]["id"]

def terminate_instance(self, instance_id: str, plan_type: str):
if plan_type == "bare-metal":
# Terminate bare-metal instance
endpoint = f"/bare-metals/{instance_id}"
else:
# Terminate virtual machine instance
endpoint = f"/instances/{instance_id}"
self._make_request("DELETE", endpoint)

def _make_request(self, method: str, path: str, data: Any = None) -> Response:
try:
response = requests.request(
method=method,
url=API_URL + path,
json=data,
headers={"Authorization": f"Bearer {self.api_key}"},
timeout=30,
)
response.raise_for_status()
return response
except requests.HTTPError as e:
if e.response is not None and e.response.status_code in (
requests.codes.forbidden,
requests.codes.unauthorized,
):
raise BackendInvalidCredentialsError(e.response.text)
if e.response is not None and e.response.status_code in (
requests.codes.bad_request,
requests.codes.internal_server_error,
requests.codes.not_found,
):
raise BackendError(e.response.text)
raise
151 changes: 151 additions & 0 deletions src/dstack/_internal/core/backends/vultr/compute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import json
import re
from typing import List, Optional

import requests

from dstack._internal.core.backends.base import Compute
from dstack._internal.core.backends.base.compute import (
get_instance_name,
get_user_data,
)
from dstack._internal.core.backends.base.offers import get_catalog_offers
from dstack._internal.core.backends.vultr.api_client import VultrApiClient
from dstack._internal.core.backends.vultr.config import VultrConfig
from dstack._internal.core.errors import BackendError, ProvisioningError
from dstack._internal.core.models.backends.base import BackendType
from dstack._internal.core.models.instances import (
InstanceAvailability,
InstanceConfiguration,
InstanceOffer,
InstanceOfferWithAvailability,
SSHKey,
)
from dstack._internal.core.models.runs import Job, JobProvisioningData, Requirements, Run
from dstack._internal.core.models.volumes import Volume
from dstack._internal.utils.logging import get_logger

logger = get_logger(__name__)


class VultrCompute(Compute):
def __init__(self, config: VultrConfig):
self.config = config
self.api_client = VultrApiClient(config.creds.api_key)

def get_offers(
self, requirements: Optional[Requirements] = None
) -> List[InstanceOfferWithAvailability]:
offers = get_catalog_offers(
backend=BackendType.VULTR,
requirements=requirements,
locations=self.config.regions or None,
extra_filter=_supported_instances,
)
offers = [
InstanceOfferWithAvailability(
**offer.dict(), availability=InstanceAvailability.AVAILABLE
)
for offer in offers
]
return offers

def run_job(
self,
run: Run,
job: Job,
instance_offer: InstanceOfferWithAvailability,
project_ssh_public_key: str,
project_ssh_private_key: str,
volumes: List[Volume],
) -> JobProvisioningData:
instance_config = InstanceConfiguration(
project_name=run.project_name,
instance_name=get_instance_name(run, job),
ssh_keys=[SSHKey(public=project_ssh_public_key.strip())],
user=run.user,
)
return self.create_instance(instance_offer, instance_config)

def create_instance(
self, instance_offer: InstanceOfferWithAvailability, instance_config: InstanceConfiguration
) -> JobProvisioningData:
instance_id = self.api_client.launch_instance(
region=instance_offer.region,
label=instance_config.instance_name,
plan=instance_offer.instance.name,
user_data=get_user_data(authorized_keys=instance_config.get_public_keys()),
)

launched_instance = JobProvisioningData(
backend=instance_offer.backend,
instance_type=instance_offer.instance,
instance_id=instance_id,
hostname=None,
internal_ip=None,
region=instance_offer.region,
price=instance_offer.price,
ssh_port=22,
username="root",
ssh_proxy=None,
dockerized=True,
backend_data=json.dumps(
{
"plan_type": "bare-metal"
if "vbm" in instance_offer.instance.name
else "vm_instance"
}
),
)
return launched_instance

def terminate_instance(
self, instance_id: str, region: str, backend_data: Optional[str] = None
) -> None:
plan_type = json.loads(backend_data)["plan_type"]
try:
self.api_client.terminate_instance(instance_id=instance_id, plan_type=plan_type)
except requests.HTTPError as e:
raise BackendError(e.response.text)

def update_provisioning_data(
self,
provisioning_data: JobProvisioningData,
project_ssh_public_key: str,
project_ssh_private_key: str,
):
plan_type = json.loads(provisioning_data.backend_data)["plan_type"]
instance_data = self.api_client.get_instance(provisioning_data.instance_id, plan_type)
# Access specific fields
instance_status = instance_data["status"]
instance_main_ip = instance_data["main_ip"]
if instance_status == "active":
provisioning_data.hostname = instance_main_ip
if instance_status == "failed":
raise ProvisioningError("VM entered FAILED state")


def _supported_instances(offer: InstanceOffer) -> bool:
if offer.instance.resources.spot:
return False
for family in [
# Bare Metal - GPU
r"vbm-\d+c-\d+gb-\d+-(a100|h100|l40|mi300x)-gpu",
# Bare Metal - AMD CPU
r"vbm-\d+c-\d+gb-amd",
# Bare Metal - Intel CPU
r"vbm-\d+c-\d+gb(-v\d+)?",
# Cloud GPU
r"vcg-(a16|a40|l40s|a100)-\d+c-\d+g-\d+vram",
# Cloud Compute - Regular Performance
r"vc2-\d+c-\d+gb(-sc1)?",
# Cloud Compute - High Frequency
r"vhf-\d+c-\d+gb(-sc1)?",
# Cloud Compute - High Performance
r"vhp-\d+c-\d+gb-(intel|amd)(-sc1)?",
# Optimized Cloud Compute
r"voc-[cgms]-\d+c-\d+gb-\d+s-amd(-sc1)?",
]:
if re.fullmatch(family, offer.instance.name):
return True
return False
9 changes: 9 additions & 0 deletions src/dstack/_internal/core/backends/vultr/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from dstack._internal.core.backends.base.config import BackendConfig
from dstack._internal.core.models.backends.vultr import (
AnyVultrCreds,
VultrStoredConfig,
)


class VultrConfig(VultrStoredConfig, BackendConfig):
creds: AnyVultrCreds
10 changes: 10 additions & 0 deletions src/dstack/_internal/core/models/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@
VastAIConfigInfoWithCredsPartial,
VastAIConfigValues,
)
from dstack._internal.core.models.backends.vultr import (
VultrConfigInfo,
VultrConfigInfoWithCreds,
VultrConfigInfoWithCredsPartial,
VultrConfigValues,
)
from dstack._internal.core.models.common import CoreModel

# The following models are the basis of the JSON-based backend API.
Expand All @@ -100,6 +106,7 @@
RunpodConfigInfo,
TensorDockConfigInfo,
VastAIConfigInfo,
VultrConfigInfo,
DstackConfigInfo,
DstackBaseBackendConfigInfo,
]
Expand All @@ -120,6 +127,7 @@
RunpodConfigInfoWithCreds,
TensorDockConfigInfoWithCreds,
VastAIConfigInfoWithCreds,
VultrConfigInfoWithCreds,
DstackConfigInfo,
]

Expand All @@ -141,6 +149,7 @@
RunpodConfigInfoWithCredsPartial,
TensorDockConfigInfoWithCredsPartial,
VastAIConfigInfoWithCredsPartial,
VultrConfigInfoWithCredsPartial,
DstackConfigInfo,
]

Expand All @@ -158,6 +167,7 @@
RunpodConfigValues,
TensorDockConfigValues,
VastAIConfigValues,
VultrConfigValues,
DstackConfigValues,
]

Expand Down
2 changes: 2 additions & 0 deletions src/dstack/_internal/core/models/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class BackendType(str, enum.Enum):
RUNPOD (BackendType): Runpod Cloud
TENSORDOCK (BackendType): TensorDock Marketplace
VASTAI (BackendType): Vast.ai Marketplace
VULTR (BackendType): Vultr
"""

AWS = "aws"
Expand All @@ -35,6 +36,7 @@ class BackendType(str, enum.Enum):
RUNPOD = "runpod"
TENSORDOCK = "tensordock"
VASTAI = "vastai"
VULTR = "vultr"


class ConfigElementValue(CoreModel):
Expand Down
40 changes: 40 additions & 0 deletions src/dstack/_internal/core/models/backends/vultr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import List, Optional

from pydantic.fields import Field
from typing_extensions import Annotated, Literal

from dstack._internal.core.models.backends.base import ConfigMultiElement
from dstack._internal.core.models.common import CoreModel


class VultrConfigInfo(CoreModel):
type: Literal["vultr"] = "vultr"
regions: Optional[List[str]] = None


class VultrStoredConfig(VultrConfigInfo):
pass


class VultrAPIKeyCreds(CoreModel):
type: Annotated[Literal["api_key"], Field(description="The type of credentials")] = "api_key"
api_key: Annotated[str, Field(description="The API key")]


AnyVultrCreds = VultrAPIKeyCreds
VultrCreds = AnyVultrCreds


class VultrConfigInfoWithCreds(VultrConfigInfo):
creds: AnyVultrCreds


class VultrConfigInfoWithCredsPartial(CoreModel):
type: Literal["vultr"] = "vultr"
creds: Optional[AnyVultrCreds]
regions: Optional[List[str]]


class VultrConfigValues(CoreModel):
type: Literal["vultr"] = "vultr"
regions: Optional[ConfigMultiElement]
Loading

0 comments on commit e41a29a

Please sign in to comment.