diff --git a/src/dstack/_internal/core/backends/__init__.py b/src/dstack/_internal/core/backends/__init__.py index 665509428..046402c0f 100644 --- a/src/dstack/_internal/core/backends/__init__.py +++ b/src/dstack/_internal/core/backends/__init__.py @@ -17,6 +17,7 @@ BackendType.LAMBDA, BackendType.OCI, BackendType.TENSORDOCK, + BackendType.VULTR, ] BACKENDS_WITH_PLACEMENT_GROUPS_SUPPORT = [ BackendType.AWS, diff --git a/src/dstack/_internal/core/backends/vultr/__init__.py b/src/dstack/_internal/core/backends/vultr/__init__.py new file mode 100644 index 000000000..5f9a97cb6 --- /dev/null +++ b/src/dstack/_internal/core/backends/vultr/__init__.py @@ -0,0 +1,15 @@ +from dstack._internal.core.backends.base import Backend +from dstack._internal.core.backends.vultr.compute import VultrCompute +from dstack._internal.core.backends.vultr.config import VultrConfig +from dstack._internal.core.models.backends.base import BackendType + + +class VultrBackend(Backend): + TYPE: BackendType = BackendType.VULTR + + def __init__(self, config: VultrConfig): + self.config = config + self._compute = VultrCompute(self.config) + + def compute(self) -> VultrCompute: + return self._compute diff --git a/src/dstack/_internal/core/backends/vultr/api_client.py b/src/dstack/_internal/core/backends/vultr/api_client.py new file mode 100644 index 000000000..6995db274 --- /dev/null +++ b/src/dstack/_internal/core/backends/vultr/api_client.py @@ -0,0 +1,154 @@ +import base64 +from typing import Any, List + +import requests +from requests import Response + +from dstack._internal.core.errors import BackendInvalidCredentialsError + +API_URL = "https://api.vultr.com/v2" + + +class VultrApiClient: + def __init__(self, api_key: str): + self.api_key = api_key + + def validate_api_key(self) -> bool: + try: + self._make_request("GET", "/ssh-keys") + except BackendInvalidCredentialsError: + return False + return True + + def get_instance(self, instance_id: str, plan_type: str): + if plan_type == "bare-metal": + response = self._make_request("GET", f"/bare-metals/{instance_id}") + return response.json()["bare_metal"] + else: + response = self._make_request("GET", f"/instances/{instance_id}") + return response.json()["instance"] + + def launch_instance( + self, region: str, plan: str, label: str, startup_script: str, public_keys: List[str] + ): + # Fetch or create startup script ID + script_id: str = self.get_startup_script_id(startup_script) + # Fetch or create SSH key IDs + sshkey_ids: List[str] = self.get_sshkey_id(public_keys) + # For Bare-metals + if "vbm" in plan: + # "Docker on Ubuntu 22.04" is required for bare-metals. + data = { + "region": region, + "plan": plan, + "label": label, + "image_id": "docker", + "script_id": script_id, + "sshkey_id": sshkey_ids, + } + resp = self._make_request("POST", "/bare-metals", data) + return resp.json()["bare_metal"]["id"] + # For VMs + elif "vcg" in plan: + # Ubuntu 22.04 will be installed. For gpu VMs, docker is preinstalled. + data = { + "region": region, + "plan": plan, + "label": label, + "os_id": 1743, + "script_id": script_id, + "sshkey_id": sshkey_ids, + } + resp = self._make_request("POST", "/instances", data) + return resp.json()["instance"]["id"] + else: + data = { + "region": region, + "plan": plan, + "label": label, + "image_id": "docker", + "script_id": script_id, + "sshkey_id": sshkey_ids, + } + resp = self._make_request("POST", "/instances", data) + return resp.json()["instance"]["id"] + + def get_startup_script_id(self, startup_script: str) -> str: + script_name = "dstack-shim-script" + encoded_script = base64.b64encode(startup_script.encode()).decode() + + # Get the list of startup scripts + response = self._make_request("GET", "/startup-scripts") + scripts = response.json()["startup_scripts"] + + # Find the script by name + existing_script = next((s for s in scripts if s["name"] == script_name), None) + + if existing_script: + # Update the existing script + startup_id = existing_script["id"] + update_payload = { + "name": script_name, + "script": encoded_script, + } + self._make_request("PATCH", f"/startup-scripts/{startup_id}", update_payload) + else: + # Create a new script + create_payload = { + "name": script_name, + "type": "boot", + "script": encoded_script, + } + create_response = self._make_request("POST", "/startup-scripts", create_payload) + startup_id = create_response.json()["startup_script"]["id"] + + return startup_id + + def get_sshkey_id(self, ssh_ids: List[str]) -> List[str]: + # Fetch existing SSH keys + response = self._make_request("GET", "/ssh-keys") + ssh_keys = response.json()["ssh_keys"] + + ssh_key_ids = [] + existing_keys = {key["ssh_key"]: key["id"] for key in ssh_keys} + + for ssh_key in ssh_ids: + if ssh_key in existing_keys: + # SSH key already exists, add its id to the list + ssh_key_ids.append(existing_keys[ssh_key]) + else: + # Create new SSH key + create_payload = {"name": "dstack-ssh-key", "ssh_key": ssh_key} + create_response = self._make_request("POST", "/ssh-keys", create_payload) + new_ssh_key_id = create_response.json()["ssh_key"]["id"] + ssh_key_ids.append(new_ssh_key_id) + + return ssh_key_ids + + def terminate_instance(self, instance_id: str, plan_type: str): + if plan_type == "bare-metal": + # Terminate bare-metal instance + endpoint = f"/bare-metals/{instance_id}" + else: + # Terminate virtual machine instance + endpoint = f"/instances/{instance_id}" + self._make_request("DELETE", endpoint) + + def _make_request(self, method: str, path: str, data: Any = None) -> Response: + try: + response = requests.request( + method=method, + url=API_URL + path, + json=data, + headers={"Authorization": f"Bearer {self.api_key}"}, + timeout=30, + ) + response.raise_for_status() + return response + except requests.HTTPError as e: + if e.response is not None and e.response.status_code in ( + requests.codes.forbidden, + requests.codes.unauthorized, + ): + raise BackendInvalidCredentialsError(e.response.text) + raise diff --git a/src/dstack/_internal/core/backends/vultr/compute.py b/src/dstack/_internal/core/backends/vultr/compute.py new file mode 100644 index 000000000..2f86fc28b --- /dev/null +++ b/src/dstack/_internal/core/backends/vultr/compute.py @@ -0,0 +1,128 @@ +import json +from typing import List, Optional + +import requests + +from dstack._internal.core.backends.base import Compute +from dstack._internal.core.backends.base.compute import ( + get_instance_name, + get_shim_commands, +) +from dstack._internal.core.backends.base.offers import get_catalog_offers +from dstack._internal.core.backends.vultr.api_client import VultrApiClient +from dstack._internal.core.backends.vultr.config import VultrConfig +from dstack._internal.core.errors import BackendError, ProvisioningError +from dstack._internal.core.models.backends.base import BackendType +from dstack._internal.core.models.instances import ( + InstanceAvailability, + InstanceConfiguration, + InstanceOfferWithAvailability, + SSHKey, +) +from dstack._internal.core.models.runs import Job, JobProvisioningData, Requirements, Run +from dstack._internal.core.models.volumes import Volume +from dstack._internal.utils.logging import get_logger + +logger = get_logger(__name__) + + +class VultrCompute(Compute): + def __init__(self, config: VultrConfig): + self.config = config + self.api_client = VultrApiClient(config.creds.api_key) + + def get_offers( + self, requirements: Optional[Requirements] = None + ) -> List[InstanceOfferWithAvailability]: + offers = get_catalog_offers( + backend=BackendType.VULTR, + requirements=requirements, + ) + offers = [ + InstanceOfferWithAvailability( + **offer.dict(), availability=InstanceAvailability.AVAILABLE + ) + for offer in offers + ] + return offers + + def run_job( + self, + run: Run, + job: Job, + instance_offer: InstanceOfferWithAvailability, + project_ssh_public_key: str, + project_ssh_private_key: str, + volumes: List[Volume], + ) -> JobProvisioningData: + instance_config = InstanceConfiguration( + project_name=run.project_name, + instance_name=get_instance_name(run, job), + ssh_keys=[SSHKey(public=project_ssh_public_key.strip())], + user=run.user, + ) + return self.create_instance(instance_offer, instance_config) + + def create_instance( + self, instance_offer: InstanceOfferWithAvailability, instance_config: InstanceConfiguration + ) -> JobProvisioningData: + public_keys = instance_config.get_public_keys() + commands = get_shim_commands(authorized_keys=public_keys) + shim_commands = "#!/bin/sh\n" + " ".join([" && ".join(commands)]) + try: + instance_id = self.api_client.launch_instance( + region=instance_offer.region, + label=instance_config.instance_name, + plan=instance_offer.instance.name, + startup_script=shim_commands, + public_keys=public_keys, + ) + except KeyError as e: + raise BackendError(e) + + launched_instance = JobProvisioningData( + backend=instance_offer.backend, + instance_type=instance_offer.instance, + instance_id=instance_id, + hostname=None, + internal_ip=None, + region=instance_offer.region, + price=instance_offer.price, + ssh_port=22, + username="root", + ssh_proxy=None, + dockerized=True, + backend_data=json.dumps( + { + "plan_type": "bare-metal" + if "vbm" in instance_offer.instance.name + else "vm_instance" + } + ), + ) + return launched_instance + + def terminate_instance( + self, instance_id: str, region: str, backend_data: Optional[str] = None + ) -> None: + plan_type = json.loads(backend_data)["plan_type"] + try: + self.api_client.terminate_instance(instance_id=instance_id, plan_type=plan_type) + except requests.HTTPError as e: + raise BackendError(e.response.text) + + def update_provisioning_data( + self, + provisioning_data: JobProvisioningData, + project_ssh_public_key: str, + project_ssh_private_key: str, + ): + plan_type = json.loads(provisioning_data.backend_data)["plan_type"] + instance_data = self.api_client.get_instance(provisioning_data.instance_id, plan_type) + # Access specific fields + instance_status = instance_data["status"] + instance_main_ip = instance_data["main_ip"] + if instance_status == "active": + provisioning_data.hostname = instance_main_ip + if instance_status == "failed": + raise ProvisioningError("VM entered FAILED state") diff --git a/src/dstack/_internal/core/backends/vultr/config.py b/src/dstack/_internal/core/backends/vultr/config.py new file mode 100644 index 000000000..dc05448bc --- /dev/null +++ b/src/dstack/_internal/core/backends/vultr/config.py @@ -0,0 +1,9 @@ +from dstack._internal.core.backends.base.config import BackendConfig +from dstack._internal.core.models.backends.vultr import ( + AnyVultrCreds, + VultrStoredConfig, +) + + +class VultrConfig(VultrStoredConfig, BackendConfig): + creds: AnyVultrCreds diff --git a/src/dstack/_internal/core/models/backends/__init__.py b/src/dstack/_internal/core/models/backends/__init__.py index cfdc3f2c3..3d0219071 100644 --- a/src/dstack/_internal/core/models/backends/__init__.py +++ b/src/dstack/_internal/core/models/backends/__init__.py @@ -77,6 +77,12 @@ VastAIConfigInfoWithCredsPartial, VastAIConfigValues, ) +from dstack._internal.core.models.backends.vultr import ( + VultrConfigInfo, + VultrConfigInfoWithCreds, + VultrConfigInfoWithCredsPartial, + VultrConfigValues, +) from dstack._internal.core.models.common import CoreModel # The following models are the basis of the JSON-based backend API. @@ -100,6 +106,7 @@ RunpodConfigInfo, TensorDockConfigInfo, VastAIConfigInfo, + VultrConfigInfo, DstackConfigInfo, DstackBaseBackendConfigInfo, ] @@ -120,6 +127,7 @@ RunpodConfigInfoWithCreds, TensorDockConfigInfoWithCreds, VastAIConfigInfoWithCreds, + VultrConfigInfoWithCreds, DstackConfigInfo, ] @@ -141,6 +149,7 @@ RunpodConfigInfoWithCredsPartial, TensorDockConfigInfoWithCredsPartial, VastAIConfigInfoWithCredsPartial, + VultrConfigInfoWithCredsPartial, DstackConfigInfo, ] @@ -158,6 +167,7 @@ RunpodConfigValues, TensorDockConfigValues, VastAIConfigValues, + VultrConfigValues, DstackConfigValues, ] diff --git a/src/dstack/_internal/core/models/backends/base.py b/src/dstack/_internal/core/models/backends/base.py index e9cc4ccab..bda5f140c 100644 --- a/src/dstack/_internal/core/models/backends/base.py +++ b/src/dstack/_internal/core/models/backends/base.py @@ -18,6 +18,7 @@ class BackendType(str, enum.Enum): RUNPOD (BackendType): Runpod Cloud TENSORDOCK (BackendType): TensorDock Marketplace VASTAI (BackendType): Vast.ai Marketplace + VULTR (BackendType): Vultr """ AWS = "aws" @@ -35,6 +36,7 @@ class BackendType(str, enum.Enum): RUNPOD = "runpod" TENSORDOCK = "tensordock" VASTAI = "vastai" + VULTR = "vultr" class ConfigElementValue(CoreModel): diff --git a/src/dstack/_internal/core/models/backends/vultr.py b/src/dstack/_internal/core/models/backends/vultr.py new file mode 100644 index 000000000..680cd692e --- /dev/null +++ b/src/dstack/_internal/core/models/backends/vultr.py @@ -0,0 +1,43 @@ +from typing import List, Optional + +from pydantic.fields import Field +from typing_extensions import Annotated, Literal + +from dstack._internal.core.models.backends.base import ConfigElement, ConfigMultiElement +from dstack._internal.core.models.common import CoreModel + + +class VultrConfigInfo(CoreModel): + type: Literal["vultr"] = "vultr" + project_id: str + regions: Optional[List[str]] = None + + +class VultrStoredConfig(VultrConfigInfo): + pass + + +class VultrAPIKeyCreds(CoreModel): + type: Annotated[Literal["api_key"], Field(description="The type of credentials")] = "api_key" + api_key: Annotated[str, Field(description="The API key")] + + +AnyVultrCreds = VultrAPIKeyCreds +VultrCreds = AnyVultrCreds + + +class VultrConfigInfoWithCreds(VultrConfigInfo): + creds: AnyVultrCreds + + +class VultrConfigInfoWithCredsPartial(CoreModel): + type: Literal["vultr"] = "vultr" + creds: Optional[AnyVultrCreds] + project_id: Optional[str] + regions: Optional[List[str]] + + +class VultrConfigValues(CoreModel): + type: Literal["vultr"] = "vultr" + regions: Optional[ConfigMultiElement] + project_id: Optional[ConfigElement] diff --git a/src/dstack/_internal/server/background/tasks/process_instances.py b/src/dstack/_internal/server/background/tasks/process_instances.py index 285531a22..5f0096646 100644 --- a/src/dstack/_internal/server/background/tasks/process_instances.py +++ b/src/dstack/_internal/server/background/tasks/process_instances.py @@ -911,4 +911,6 @@ def _get_instance_timeout_interval( return timedelta(seconds=1200) if backend_type == BackendType.OCI and instance_type_name.startswith("BM."): return timedelta(seconds=1200) + if backend_type == BackendType.VULTR: + return timedelta(seconds=1800) return timedelta(seconds=600) diff --git a/src/dstack/_internal/server/background/tasks/process_running_jobs.py b/src/dstack/_internal/server/background/tasks/process_running_jobs.py index baa60b37e..2558a102c 100644 --- a/src/dstack/_internal/server/background/tasks/process_running_jobs.py +++ b/src/dstack/_internal/server/background/tasks/process_running_jobs.py @@ -656,4 +656,6 @@ def _get_runner_timeout_interval(backend_type: BackendType, instance_type_name: return timedelta(seconds=1200) if backend_type == BackendType.OCI and instance_type_name.startswith("BM."): return timedelta(seconds=1200) + if backend_type == BackendType.VULTR: + return timedelta(seconds=1800) return timedelta(seconds=600) diff --git a/src/dstack/_internal/server/services/backends/__init__.py b/src/dstack/_internal/server/services/backends/__init__.py index 75d8dbcd0..abb4a0c6c 100644 --- a/src/dstack/_internal/server/services/backends/__init__.py +++ b/src/dstack/_internal/server/services/backends/__init__.py @@ -129,6 +129,13 @@ except ImportError: pass +try: + from dstack._internal.server.services.backends.configurators.vultr import VultrConfigurator + + _CONFIGURATOR_CLASSES.append(VultrConfigurator) +except ImportError: + pass + _BACKEND_TYPE_TO_CONFIGURATOR_CLASS_MAP = {c.TYPE: c for c in _CONFIGURATOR_CLASSES} diff --git a/src/dstack/_internal/server/services/backends/configurators/vultr.py b/src/dstack/_internal/server/services/backends/configurators/vultr.py new file mode 100644 index 000000000..d01405aaa --- /dev/null +++ b/src/dstack/_internal/server/services/backends/configurators/vultr.py @@ -0,0 +1,91 @@ +import json +from typing import List + +from dstack._internal.core.backends.base import Backend +from dstack._internal.core.backends.vultr import VultrBackend, VultrConfig, api_client +from dstack._internal.core.models.backends import ( + VultrConfigInfoWithCreds, + VultrConfigInfoWithCredsPartial, + VultrConfigValues, +) +from dstack._internal.core.models.backends.base import ( + BackendType, + ConfigElementValue, + ConfigMultiElement, +) +from dstack._internal.core.models.backends.vultr import ( + VultrConfigInfo, + VultrCreds, + VultrStoredConfig, +) +from dstack._internal.server.models import BackendModel, DecryptedString, ProjectModel +from dstack._internal.server.services.backends import Configurator +from dstack._internal.server.services.backends.configurators.base import ( + raise_invalid_credentials_error, +) + +REGIONS = [ + "no-luster-1", + "se-smedjebacken-1", + "gb-london-1", + "se-stockholm-1", + "us-newyork-1", + "us-santaclara-1", +] + +DEFAULT_REGION = "no-luster-1" + + +class VultrConfigurator(Configurator): + TYPE: BackendType = BackendType.VULTR + + def get_config_values(self, config: VultrConfigInfoWithCredsPartial) -> VultrConfigValues: + config_values = VultrConfigValues() + if config.creds is None: + return config_values + self._validate_vultr_api_key(config.creds.api_key) + config_values.regions = self._get_regions_element( + selected=config.regions or [DEFAULT_REGION] + ) + return config_values + + def create_backend( + self, project: ProjectModel, config: VultrConfigInfoWithCreds + ) -> BackendModel: + if config.regions is None: + config.regions = REGIONS + return BackendModel( + project_id=project.id, + type=self.TYPE.value, + config=VultrStoredConfig( + **VultrConfigInfo.__response__.parse_obj(config).dict() + ).json(), + auth=DecryptedString(plaintext=VultrCreds.parse_obj(config.creds).json()), + ) + + def get_config_info(self, model: BackendModel, include_creds: bool) -> VultrConfigInfo: + config = self._get_backend_config(model) + if include_creds: + return VultrConfigInfoWithCreds.__response__.parse_obj(config) + return VultrConfigInfo.__response__.parse_obj(config) + + def get_backend(self, model: BackendModel) -> Backend: + config = self._get_backend_config(model) + return VultrBackend(config=config) + + def _get_regions_element(self, selected: List[str]) -> ConfigMultiElement: + element = ConfigMultiElement(selected=selected) + for r in REGIONS: + element.values.append(ConfigElementValue(value=r, label=r)) + return element + + def _get_backend_config(self, model: BackendModel) -> VultrConfig: + return VultrConfig.__response__( + **json.loads(model.config), + creds=VultrCreds.parse_raw(model.auth.get_plaintext_or_error()), + ) + + def _validate_vultr_api_key(self, api_key: str): + client = api_client.VultrApiClient(api_key=api_key) + if not client.validate_api_key(): + raise_invalid_credentials_error(fields=[["creds", "api_key"]]) diff --git a/src/dstack/_internal/server/services/config.py b/src/dstack/_internal/server/services/config.py index 965854cc8..e5890fdf8 100644 --- a/src/dstack/_internal/server/services/config.py +++ b/src/dstack/_internal/server/services/config.py @@ -23,6 +23,7 @@ from dstack._internal.core.models.backends.runpod import AnyRunpodCreds from dstack._internal.core.models.backends.tensordock import AnyTensorDockCreds from dstack._internal.core.models.backends.vastai import AnyVastAICreds +from dstack._internal.core.models.backends.vultr import AnyVultrCreds from dstack._internal.core.models.common import CoreModel from dstack._internal.server import settings from dstack._internal.server.models import ProjectModel, UserModel @@ -428,6 +429,13 @@ class VastAIConfig(CoreModel): creds: Annotated[AnyVastAICreds, Field(description="The credentials")] +class VultrConfig(CoreModel): + type: Annotated[Literal["vultr"], Field(description="The type of backend")] = "vultr" + regions: Optional[List[str]] = None + project_id: Annotated[str, Field(description="The project ID")] + creds: Annotated[AnyVultrCreds, Field(description="The credentials")] + + class DstackConfig(CoreModel): type: Annotated[Literal["dstack"], Field(description="The type of backend")] = "dstack" @@ -445,6 +453,7 @@ class DstackConfig(CoreModel): RunpodConfig, TensorDockConfig, VastAIConfig, + VultrConfig, DstackConfig, ] @@ -468,6 +477,7 @@ class _BackendConfig(BaseModel): RunpodConfig, TensorDockConfig, VastAIConfig, + VultrConfig, DstackConfig, ]