Skip to content

Add Vultr Support #2132

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Jan 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ def get_long_description():
"python-multipart>=0.0.16",
"filelock",
"psutil",
"gpuhunt>=0.0.17,<0.1.0",
"gpuhunt>=0.0.18,<0.1.0",
]

GATEWAY_AND_SERVER_COMMON_DEPS = [
Expand Down
1 change: 1 addition & 0 deletions src/dstack/_internal/core/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
BackendType.LAMBDA,
BackendType.OCI,
BackendType.TENSORDOCK,
BackendType.VULTR,
]
BACKENDS_WITH_PLACEMENT_GROUPS_SUPPORT = [
BackendType.AWS,
Expand Down
15 changes: 15 additions & 0 deletions src/dstack/_internal/core/backends/vultr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from dstack._internal.core.backends.base import Backend
from dstack._internal.core.backends.vultr.compute import VultrCompute
from dstack._internal.core.backends.vultr.config import VultrConfig
from dstack._internal.core.models.backends.base import BackendType


class VultrBackend(Backend):
TYPE: BackendType = BackendType.VULTR

def __init__(self, config: VultrConfig):
self.config = config
self._compute = VultrCompute(self.config)

def compute(self) -> VultrCompute:
return self._compute
99 changes: 99 additions & 0 deletions src/dstack/_internal/core/backends/vultr/api_client.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import base64
from typing import Any

import requests
from requests import Response

from dstack._internal.core.errors import BackendError, BackendInvalidCredentialsError

API_URL = "https://api.vultr.com/v2"


class VultrApiClient:
def __init__(self, api_key: str):
self.api_key = api_key

def validate_api_key(self) -> bool:
try:
self._make_request("GET", "/ssh-keys")
except BackendInvalidCredentialsError:
return False
return True

def get_instance(self, instance_id: str, plan_type: str):
if plan_type == "bare-metal":
response = self._make_request("GET", f"/bare-metals/{instance_id}")
return response.json()["bare_metal"]
else:
response = self._make_request("GET", f"/instances/{instance_id}")
return response.json()["instance"]

def launch_instance(self, region: str, plan: str, label: str, user_data: str):
# For Bare-metals
if "vbm" in plan:
# "Docker on Ubuntu 22.04" is required for bare-metals.
data = {
"region": region,
"plan": plan,
"label": label,
"image_id": "docker",
"user_data": base64.b64encode(user_data.encode()).decode(),
}
resp = self._make_request("POST", "/bare-metals", data)
return resp.json()["bare_metal"]["id"]
# For VMs
elif "vcg" in plan:
# Ubuntu 22.04 will be installed. For gpu VMs, docker is preinstalled.
data = {
"region": region,
"plan": plan,
"label": label,
"os_id": 1743,
"user_data": base64.b64encode(user_data.encode()).decode(),
}
resp = self._make_request("POST", "/instances", data)
return resp.json()["instance"]["id"]
else:
data = {
"region": region,
"plan": plan,
"label": label,
"image_id": "docker",
"user_data": base64.b64encode(user_data.encode()).decode(),
}
resp = self._make_request("POST", "/instances", data)
return resp.json()["instance"]["id"]

def terminate_instance(self, instance_id: str, plan_type: str):
if plan_type == "bare-metal":
# Terminate bare-metal instance
endpoint = f"/bare-metals/{instance_id}"
else:
# Terminate virtual machine instance
endpoint = f"/instances/{instance_id}"
self._make_request("DELETE", endpoint)

def _make_request(self, method: str, path: str, data: Any = None) -> Response:
try:
response = requests.request(
method=method,
url=API_URL + path,
json=data,
headers={"Authorization": f"Bearer {self.api_key}"},
timeout=30,
)
response.raise_for_status()
return response
except requests.HTTPError as e:
if e.response is not None and e.response.status_code in (
requests.codes.forbidden,
requests.codes.unauthorized,
):
raise BackendInvalidCredentialsError(e.response.text)
if e.response is not None and e.response.status_code in (
requests.codes.bad_request,
requests.codes.internal_server_error,
requests.codes.not_found,
):
raise BackendError(e.response.text)
raise
151 changes: 151 additions & 0 deletions src/dstack/_internal/core/backends/vultr/compute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
import json
import re
from typing import List, Optional

import requests

from dstack._internal.core.backends.base import Compute
from dstack._internal.core.backends.base.compute import (
get_instance_name,
get_user_data,
)
from dstack._internal.core.backends.base.offers import get_catalog_offers
from dstack._internal.core.backends.vultr.api_client import VultrApiClient
from dstack._internal.core.backends.vultr.config import VultrConfig
from dstack._internal.core.errors import BackendError, ProvisioningError
from dstack._internal.core.models.backends.base import BackendType
from dstack._internal.core.models.instances import (
InstanceAvailability,
InstanceConfiguration,
InstanceOffer,
InstanceOfferWithAvailability,
SSHKey,
)
from dstack._internal.core.models.runs import Job, JobProvisioningData, Requirements, Run
from dstack._internal.core.models.volumes import Volume
from dstack._internal.utils.logging import get_logger

logger = get_logger(__name__)


class VultrCompute(Compute):
def __init__(self, config: VultrConfig):
self.config = config
self.api_client = VultrApiClient(config.creds.api_key)

def get_offers(
self, requirements: Optional[Requirements] = None
) -> List[InstanceOfferWithAvailability]:
offers = get_catalog_offers(
backend=BackendType.VULTR,
requirements=requirements,
locations=self.config.regions or None,
extra_filter=_supported_instances,
)
offers = [
InstanceOfferWithAvailability(
**offer.dict(), availability=InstanceAvailability.AVAILABLE
)
for offer in offers
]
return offers

def run_job(
self,
run: Run,
job: Job,
instance_offer: InstanceOfferWithAvailability,
project_ssh_public_key: str,
project_ssh_private_key: str,
volumes: List[Volume],
) -> JobProvisioningData:
instance_config = InstanceConfiguration(
project_name=run.project_name,
instance_name=get_instance_name(run, job),
ssh_keys=[SSHKey(public=project_ssh_public_key.strip())],
user=run.user,
)
return self.create_instance(instance_offer, instance_config)

def create_instance(
self, instance_offer: InstanceOfferWithAvailability, instance_config: InstanceConfiguration
) -> JobProvisioningData:
instance_id = self.api_client.launch_instance(
region=instance_offer.region,
label=instance_config.instance_name,
plan=instance_offer.instance.name,
user_data=get_user_data(authorized_keys=instance_config.get_public_keys()),
)

launched_instance = JobProvisioningData(
backend=instance_offer.backend,
instance_type=instance_offer.instance,
instance_id=instance_id,
hostname=None,
internal_ip=None,
region=instance_offer.region,
price=instance_offer.price,
ssh_port=22,
username="root",
ssh_proxy=None,
dockerized=True,
backend_data=json.dumps(
{
"plan_type": "bare-metal"
if "vbm" in instance_offer.instance.name
else "vm_instance"
}
),
)
return launched_instance

def terminate_instance(
self, instance_id: str, region: str, backend_data: Optional[str] = None
) -> None:
plan_type = json.loads(backend_data)["plan_type"]
try:
self.api_client.terminate_instance(instance_id=instance_id, plan_type=plan_type)
except requests.HTTPError as e:
raise BackendError(e.response.text)

def update_provisioning_data(
self,
provisioning_data: JobProvisioningData,
project_ssh_public_key: str,
project_ssh_private_key: str,
):
plan_type = json.loads(provisioning_data.backend_data)["plan_type"]
instance_data = self.api_client.get_instance(provisioning_data.instance_id, plan_type)
# Access specific fields
instance_status = instance_data["status"]
instance_main_ip = instance_data["main_ip"]
if instance_status == "active":
provisioning_data.hostname = instance_main_ip
if instance_status == "failed":
raise ProvisioningError("VM entered FAILED state")


def _supported_instances(offer: InstanceOffer) -> bool:
if offer.instance.resources.spot:
return False
for family in [
# Bare Metal - GPU
r"vbm-\d+c-\d+gb-\d+-(a100|h100|l40|mi300x)-gpu",
# Bare Metal - AMD CPU
r"vbm-\d+c-\d+gb-amd",
# Bare Metal - Intel CPU
r"vbm-\d+c-\d+gb(-v\d+)?",
# Cloud GPU
r"vcg-(a16|a40|l40s|a100)-\d+c-\d+g-\d+vram",
# Cloud Compute - Regular Performance
r"vc2-\d+c-\d+gb(-sc1)?",
# Cloud Compute - High Frequency
r"vhf-\d+c-\d+gb(-sc1)?",
# Cloud Compute - High Performance
r"vhp-\d+c-\d+gb-(intel|amd)(-sc1)?",
# Optimized Cloud Compute
r"voc-[cgms]-\d+c-\d+gb-\d+s-amd(-sc1)?",
]:
if re.fullmatch(family, offer.instance.name):
return True
return False
9 changes: 9 additions & 0 deletions src/dstack/_internal/core/backends/vultr/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from dstack._internal.core.backends.base.config import BackendConfig
from dstack._internal.core.models.backends.vultr import (
AnyVultrCreds,
VultrStoredConfig,
)


class VultrConfig(VultrStoredConfig, BackendConfig):
creds: AnyVultrCreds
10 changes: 10 additions & 0 deletions src/dstack/_internal/core/models/backends/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,12 @@
VastAIConfigInfoWithCredsPartial,
VastAIConfigValues,
)
from dstack._internal.core.models.backends.vultr import (
VultrConfigInfo,
VultrConfigInfoWithCreds,
VultrConfigInfoWithCredsPartial,
VultrConfigValues,
)
from dstack._internal.core.models.common import CoreModel

# The following models are the basis of the JSON-based backend API.
Expand All @@ -100,6 +106,7 @@
RunpodConfigInfo,
TensorDockConfigInfo,
VastAIConfigInfo,
VultrConfigInfo,
DstackConfigInfo,
DstackBaseBackendConfigInfo,
]
Expand All @@ -120,6 +127,7 @@
RunpodConfigInfoWithCreds,
TensorDockConfigInfoWithCreds,
VastAIConfigInfoWithCreds,
VultrConfigInfoWithCreds,
DstackConfigInfo,
]

Expand All @@ -141,6 +149,7 @@
RunpodConfigInfoWithCredsPartial,
TensorDockConfigInfoWithCredsPartial,
VastAIConfigInfoWithCredsPartial,
VultrConfigInfoWithCredsPartial,
DstackConfigInfo,
]

Expand All @@ -158,6 +167,7 @@
RunpodConfigValues,
TensorDockConfigValues,
VastAIConfigValues,
VultrConfigValues,
DstackConfigValues,
]

Expand Down
2 changes: 2 additions & 0 deletions src/dstack/_internal/core/models/backends/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ class BackendType(str, enum.Enum):
RUNPOD (BackendType): Runpod Cloud
TENSORDOCK (BackendType): TensorDock Marketplace
VASTAI (BackendType): Vast.ai Marketplace
VULTR (BackendType): Vultr
"""

AWS = "aws"
Expand All @@ -35,6 +36,7 @@ class BackendType(str, enum.Enum):
RUNPOD = "runpod"
TENSORDOCK = "tensordock"
VASTAI = "vastai"
VULTR = "vultr"


class ConfigElementValue(CoreModel):
Expand Down
40 changes: 40 additions & 0 deletions src/dstack/_internal/core/models/backends/vultr.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from typing import List, Optional

from pydantic.fields import Field
from typing_extensions import Annotated, Literal

from dstack._internal.core.models.backends.base import ConfigMultiElement
from dstack._internal.core.models.common import CoreModel


class VultrConfigInfo(CoreModel):
type: Literal["vultr"] = "vultr"
regions: Optional[List[str]] = None


class VultrStoredConfig(VultrConfigInfo):
pass


class VultrAPIKeyCreds(CoreModel):
type: Annotated[Literal["api_key"], Field(description="The type of credentials")] = "api_key"
api_key: Annotated[str, Field(description="The API key")]


AnyVultrCreds = VultrAPIKeyCreds
VultrCreds = AnyVultrCreds


class VultrConfigInfoWithCreds(VultrConfigInfo):
creds: AnyVultrCreds


class VultrConfigInfoWithCredsPartial(CoreModel):
type: Literal["vultr"] = "vultr"
creds: Optional[AnyVultrCreds]
regions: Optional[List[str]]


class VultrConfigValues(CoreModel):
type: Literal["vultr"] = "vultr"
regions: Optional[ConfigMultiElement]
Loading
Loading