Skip to content

Commit 0d65823

Browse files
Bihan  RanaBihan  Rana
authored andcommitted
Fix review issues in Vultr integration
Update supported_instance with regex match
1 parent 71755c9 commit 0d65823

File tree

8 files changed

+57
-104
lines changed

8 files changed

+57
-104
lines changed

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def get_long_description():
4848
"python-multipart>=0.0.16",
4949
"filelock",
5050
"psutil",
51-
"gpuhunt>=0.0.17,<0.1.0",
51+
"gpuhunt>=0.0.18,<0.1.0",
5252
]
5353

5454
GATEWAY_AND_SERVER_COMMON_DEPS = [
Lines changed: 12 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import base64
2-
from typing import Any, List
2+
from typing import Any
33

44
import requests
55
from requests import Response
66

7-
from dstack._internal.core.errors import BackendInvalidCredentialsError
7+
from dstack._internal.core.errors import BackendError, BackendInvalidCredentialsError
88

99
API_URL = "https://api.vultr.com/v2"
1010

@@ -28,13 +28,7 @@ def get_instance(self, instance_id: str, plan_type: str):
2828
response = self._make_request("GET", f"/instances/{instance_id}")
2929
return response.json()["instance"]
3030

31-
def launch_instance(
32-
self, region: str, plan: str, label: str, startup_script: str, public_keys: List[str]
33-
):
34-
# Fetch or create startup script ID
35-
script_id: str = self.get_startup_script_id(startup_script)
36-
# Fetch or create SSH key IDs
37-
sshkey_ids: List[str] = self.get_sshkey_id(public_keys)
31+
def launch_instance(self, region: str, plan: str, label: str, user_data: str):
3832
# For Bare-metals
3933
if "vbm" in plan:
4034
# "Docker on Ubuntu 22.04" is required for bare-metals.
@@ -43,8 +37,7 @@ def launch_instance(
4337
"plan": plan,
4438
"label": label,
4539
"image_id": "docker",
46-
"script_id": script_id,
47-
"sshkey_id": sshkey_ids,
40+
"user_data": base64.b64encode(user_data.encode()).decode(),
4841
}
4942
resp = self._make_request("POST", "/bare-metals", data)
5043
return resp.json()["bare_metal"]["id"]
@@ -56,8 +49,7 @@ def launch_instance(
5649
"plan": plan,
5750
"label": label,
5851
"os_id": 1743,
59-
"script_id": script_id,
60-
"sshkey_id": sshkey_ids,
52+
"user_data": base64.b64encode(user_data.encode()).decode(),
6153
}
6254
resp = self._make_request("POST", "/instances", data)
6355
return resp.json()["instance"]["id"]
@@ -67,64 +59,11 @@ def launch_instance(
6759
"plan": plan,
6860
"label": label,
6961
"image_id": "docker",
70-
"script_id": script_id,
71-
"sshkey_id": sshkey_ids,
62+
"user_data": base64.b64encode(user_data.encode()).decode(),
7263
}
7364
resp = self._make_request("POST", "/instances", data)
7465
return resp.json()["instance"]["id"]
7566

76-
def get_startup_script_id(self, startup_script: str) -> str:
77-
script_name = "dstack-shim-script"
78-
encoded_script = base64.b64encode(startup_script.encode()).decode()
79-
80-
# Get the list of startup scripts
81-
response = self._make_request("GET", "/startup-scripts")
82-
scripts = response.json()["startup_scripts"]
83-
84-
# Find the script by name
85-
existing_script = next((s for s in scripts if s["name"] == script_name), None)
86-
87-
if existing_script:
88-
# Update the existing script
89-
startup_id = existing_script["id"]
90-
update_payload = {
91-
"name": script_name,
92-
"script": encoded_script,
93-
}
94-
self._make_request("PATCH", f"/startup-scripts/{startup_id}", update_payload)
95-
else:
96-
# Create a new script
97-
create_payload = {
98-
"name": script_name,
99-
"type": "boot",
100-
"script": encoded_script,
101-
}
102-
create_response = self._make_request("POST", "/startup-scripts", create_payload)
103-
startup_id = create_response.json()["startup_script"]["id"]
104-
105-
return startup_id
106-
107-
def get_sshkey_id(self, ssh_ids: List[str]) -> List[str]:
108-
# Fetch existing SSH keys
109-
response = self._make_request("GET", "/ssh-keys")
110-
ssh_keys = response.json()["ssh_keys"]
111-
112-
ssh_key_ids = []
113-
existing_keys = {key["ssh_key"]: key["id"] for key in ssh_keys}
114-
115-
for ssh_key in ssh_ids:
116-
if ssh_key in existing_keys:
117-
# SSH key already exists, add its id to the list
118-
ssh_key_ids.append(existing_keys[ssh_key])
119-
else:
120-
# Create new SSH key
121-
create_payload = {"name": "dstack-ssh-key", "ssh_key": ssh_key}
122-
create_response = self._make_request("POST", "/ssh-keys", create_payload)
123-
new_ssh_key_id = create_response.json()["ssh_key"]["id"]
124-
ssh_key_ids.append(new_ssh_key_id)
125-
126-
return ssh_key_ids
127-
12867
def terminate_instance(self, instance_id: str, plan_type: str):
12968
if plan_type == "bare-metal":
13069
# Terminate bare-metal instance
@@ -151,4 +90,10 @@ def _make_request(self, method: str, path: str, data: Any = None) -> Response:
15190
requests.codes.unauthorized,
15291
):
15392
raise BackendInvalidCredentialsError(e.response.text)
93+
if e.response is not None and e.response.status_code in (
94+
requests.codes.bad_request,
95+
requests.codes.internal_server_error,
96+
requests.codes.not_found,
97+
):
98+
raise BackendError(e.response.text)
15499
raise

src/dstack/_internal/core/backends/vultr/compute.py

Lines changed: 37 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
import json
2+
import re
23
from typing import List, Optional
34

45
import requests
56

67
from dstack._internal.core.backends.base import Compute
78
from dstack._internal.core.backends.base.compute import (
89
get_instance_name,
9-
get_shim_commands,
10+
get_user_data,
1011
)
1112
from dstack._internal.core.backends.base.offers import get_catalog_offers
1213
from dstack._internal.core.backends.vultr.api_client import VultrApiClient
@@ -16,6 +17,7 @@
1617
from dstack._internal.core.models.instances import (
1718
InstanceAvailability,
1819
InstanceConfiguration,
20+
InstanceOffer,
1921
InstanceOfferWithAvailability,
2022
SSHKey,
2123
)
@@ -37,6 +39,8 @@ def get_offers(
3739
offers = get_catalog_offers(
3840
backend=BackendType.VULTR,
3941
requirements=requirements,
42+
locations=self.config.regions or None,
43+
extra_filter=_supported_instances,
4044
)
4145
offers = [
4246
InstanceOfferWithAvailability(
@@ -66,19 +70,12 @@ def run_job(
6670
def create_instance(
6771
self, instance_offer: InstanceOfferWithAvailability, instance_config: InstanceConfiguration
6872
) -> JobProvisioningData:
69-
public_keys = instance_config.get_public_keys()
70-
commands = get_shim_commands(authorized_keys=public_keys)
71-
shim_commands = "#!/bin/sh\n" + " ".join([" && ".join(commands)])
72-
try:
73-
instance_id = self.api_client.launch_instance(
74-
region=instance_offer.region,
75-
label=instance_config.instance_name,
76-
plan=instance_offer.instance.name,
77-
startup_script=shim_commands,
78-
public_keys=public_keys,
79-
)
80-
except KeyError as e:
81-
raise BackendError(e)
73+
instance_id = self.api_client.launch_instance(
74+
region=instance_offer.region,
75+
label=instance_config.instance_name,
76+
plan=instance_offer.instance.name,
77+
user_data=get_user_data(authorized_keys=instance_config.get_public_keys()),
78+
)
8279

8380
launched_instance = JobProvisioningData(
8481
backend=instance_offer.backend,
@@ -126,3 +123,29 @@ def update_provisioning_data(
126123
provisioning_data.hostname = instance_main_ip
127124
if instance_status == "failed":
128125
raise ProvisioningError("VM entered FAILED state")
126+
127+
128+
def _supported_instances(offer: InstanceOffer) -> bool:
129+
if offer.instance.resources.spot:
130+
return False
131+
for family in [
132+
# Bare Metal - GPU
133+
r"vbm-\d+c-\d+gb-\d+-(a100|h100|l40|mi300x)-gpu",
134+
# Bare Metal - AMD CPU
135+
r"vbm-\d+c-\d+gb-amd",
136+
# Bare Metal - Intel CPU
137+
r"vbm-\d+c-\d+gb(-v\d+)?",
138+
# Cloud GPU
139+
r"vcg-(a16|a40|l40s|a100)-\d+c-\d+g-\d+vram",
140+
# Cloud Compute - Regular Performance
141+
r"vc2-\d+c-\d+gb(-sc1)?",
142+
# Cloud Compute - High Frequency
143+
r"vhf-\d+c-\d+gb(-sc1)?",
144+
# Cloud Compute - High Performance
145+
r"vhp-\d+c-\d+gb-(intel|amd)(-sc1)?",
146+
# Optimized Cloud Compute
147+
r"voc-[cgms]-\d+c-\d+gb-\d+s-amd(-sc1)?",
148+
]:
149+
if re.fullmatch(family, offer.instance.name):
150+
return True
151+
return False

src/dstack/_internal/core/models/backends/vultr.py

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,12 @@
33
from pydantic.fields import Field
44
from typing_extensions import Annotated, Literal
55

6-
from dstack._internal.core.models.backends.base import ConfigElement, ConfigMultiElement
6+
from dstack._internal.core.models.backends.base import ConfigMultiElement
77
from dstack._internal.core.models.common import CoreModel
88

99

1010
class VultrConfigInfo(CoreModel):
1111
type: Literal["vultr"] = "vultr"
12-
project_id: str
1312
regions: Optional[List[str]] = None
1413

1514

@@ -33,11 +32,9 @@ class VultrConfigInfoWithCreds(VultrConfigInfo):
3332
class VultrConfigInfoWithCredsPartial(CoreModel):
3433
type: Literal["vultr"] = "vultr"
3534
creds: Optional[AnyVultrCreds]
36-
project_id: Optional[str]
3735
regions: Optional[List[str]]
3836

3937

4038
class VultrConfigValues(CoreModel):
4139
type: Literal["vultr"] = "vultr"
4240
regions: Optional[ConfigMultiElement]
43-
project_id: Optional[ConfigElement]

src/dstack/_internal/server/background/tasks/process_instances.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,6 @@ def _get_instance_timeout_interval(
911911
return timedelta(seconds=1200)
912912
if backend_type == BackendType.OCI and instance_type_name.startswith("BM."):
913913
return timedelta(seconds=1200)
914-
if backend_type == BackendType.VULTR:
915-
return timedelta(seconds=1800)
914+
if backend_type == BackendType.VULTR and instance_type_name.startswith("vbm"):
915+
return timedelta(seconds=3300)
916916
return timedelta(seconds=600)

src/dstack/_internal/server/background/tasks/process_running_jobs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,6 @@ def _get_runner_timeout_interval(backend_type: BackendType, instance_type_name:
656656
return timedelta(seconds=1200)
657657
if backend_type == BackendType.OCI and instance_type_name.startswith("BM."):
658658
return timedelta(seconds=1200)
659-
if backend_type == BackendType.VULTR:
660-
return timedelta(seconds=1800)
659+
if backend_type == BackendType.VULTR and instance_type_name.startswith("vbm"):
660+
return timedelta(seconds=3300)
661661
return timedelta(seconds=600)

src/dstack/_internal/server/services/backends/configurators/vultr.py

Lines changed: 2 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,16 +24,7 @@
2424
raise_invalid_credentials_error,
2525
)
2626

27-
REGIONS = [
28-
"no-luster-1",
29-
"se-smedjebacken-1",
30-
"gb-london-1",
31-
"se-stockholm-1",
32-
"us-newyork-1",
33-
"us-santaclara-1",
34-
]
35-
36-
DEFAULT_REGION = "no-luster-1"
27+
REGIONS = []
3728

3829

3930
class VultrConfigurator(Configurator):
@@ -44,9 +35,7 @@ def get_config_values(self, config: VultrConfigInfoWithCredsPartial) -> VultrCon
4435
if config.creds is None:
4536
return config_values
4637
self._validate_vultr_api_key(config.creds.api_key)
47-
config_values.regions = self._get_regions_element(
48-
selected=config.regions or [DEFAULT_REGION]
49-
)
38+
config_values.regions = self._get_regions_element(selected=config.regions or [])
5039
return config_values
5140

5241
def create_backend(

src/dstack/_internal/server/services/config.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,6 @@ class VultrConfig(CoreModel):
436436
Optional[List[str]],
437437
Field(description="The list of Vultr regions. Omit to use all regions"),
438438
] = None
439-
project_id: Annotated[str, Field(description="The project ID")]
440439
creds: Annotated[AnyVultrCreds, Field(description="The credentials")]
441440

442441

0 commit comments

Comments
 (0)