diff --git a/README.md b/README.md index 093076d68..4f849c393 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ for AI workloads both in the cloud and on-prem, speeding up the development, tra To use `dstack` with your own cloud accounts, create the `~/.dstack/server/config.yml` file and [configure backends](https://dstack.ai/docs/reference/server/config.yml). Alternatively, you can configure backends via the control plane UI after you start the server. -You can skip backends configuration if you intend to run containers only on your on-prem servers. Use [SSH fleets](https://dstack.ai/docs/concepts/fleets#ssh-fleets) for that. +You can skip backends configuration if you intend to run containers only on your on-prem servers. Use [SSH fleets](https://dstack.ai/docs/concepts/fleets#ssh) for that. ### Start the server diff --git a/docker/server/README.md b/docker/server/README.md index a608dbe86..ed96b9a32 100644 --- a/docker/server/README.md +++ b/docker/server/README.md @@ -39,7 +39,7 @@ Configuration is updated at ~/.dstack/config.yml ## Create SSH fleets If you want the `dstack` server to run containers on your on-prem servers, -use [fleets](https://dstack.ai/docs/concepts/fleets#ssh-fleets). +use [fleets](https://dstack.ai/docs/concepts/fleets#ssh). ## More information diff --git a/docs/assets/stylesheets/extra.css b/docs/assets/stylesheets/extra.css index 345c88a56..34b40b9e0 100644 --- a/docs/assets/stylesheets/extra.css +++ b/docs/assets/stylesheets/extra.css @@ -23,6 +23,11 @@ font-size: 0.75rem; } } + +[dir=ltr] .md-typeset :is(.admonition,details) pre, [dir=ltr] .md-typeset :is(.admonition,details) :is(.admonition,details) { + margin-left: 32px; +} + @media screen and (max-width: 76.1875em) { .md-header { background-color: rgb(255, 255, 255); @@ -160,7 +165,7 @@ background-color: var(--md-default-fg-color); } -[dir=ltr] .md-typeset :is(.admonition,details) { +[dir=ltr] .md-typeset :is(details) { border-style: solid; /*border-width: 1px;*/ border-width: 0; @@ -170,6 +175,16 @@ background: -webkit-linear-gradient(45deg, rgba(0, 42, 255, 0.1), rgb(0 114 255 / 1%), rgba(0, 42, 255, 0.05)); } +[dir=ltr] .md-typeset :is(.admonition) { + border-style: solid; + border-color: rgba(0, 0, 0, 0.87); + border-width: 1px; + border-radius: 6px; + box-shadow: none; + padding: .6rem .8rem; + /*background: -webkit-linear-gradient(45deg, rgba(0, 42, 255, 0.1), rgb(0 114 255 / 1%), rgba(0, 42, 255, 0.05));*/ +} + .md-typeset iframe { border-radius: 6px; } @@ -178,7 +193,7 @@ margin-left: 32px; } -[dir=ltr] .md-typeset :is(.admonition,details):not(blockquote) > :is(.highlight,.termy,.md-typeset__scrollwrap,p,h4,h3,.tabbed-set):not(.admonition-title) { +[dir=ltr] .md-typeset :is(.admonition,details):not(blockquote) > :is(.md-typeset__scrollwrap,p,h4,h3,.tabbed-set):not(.admonition-title) { padding-left: 32px; } @@ -445,7 +460,7 @@ color: rgba(0,0,0,0.87); } -.md-typeset :not(pre) :is(h1, h2, h3, h4) > code { +.md-typeset :not(pre) :is(h1, h2, h3, h4, h5, h6) > code { color: inherit; background: inherit; padding: 0; @@ -455,7 +470,7 @@ h4.doc-heading { font-size: inherit; } -.md-typeset :not(pre, h1, h2, h3, h4) > code { +.md-typeset :not(pre, h1, h2, h3, h4, h5, h6) > code { background-color: rgba(163, 68, 215, 0.05); /*border: 1px solid #dce0e6;*/ border-radius: 2px; @@ -467,27 +482,27 @@ h4.doc-heading { margin: 0 4px; } -.md-typeset :is(h1, h2, h3, h4) > code { +.md-typeset :is(h1, h2, h3, h4, h5, h6) > code { background-color: inherit; color: inherit; /*padding: 0; margin: 0;*/ } -.md-typeset :is(h1, h2, h3, h4) > a > code { +.md-typeset :is(h1, h2, h3, h4, h5, h6) > a > code { font-size: inherit; color: inherit; } -.md-typeset :is(table) :not(pre, h1, h2, h3, h4) > code { +.md-typeset :is(table) :not(pre, h1, h2, h3, h4, h5, h6) > code { font-size: .85em; } -.md-typeset :not(pre, h1, h2, h3, h4) > code { +.md-typeset :not(pre, h1, h2, h3, h4, h5, h6) > code { font-size: 0.65rem; } -.md-typeset :not(pre, h1, h2, h3, h4) > a code { +.md-typeset :not(pre, h1, h2, h3, h4, h5, h6) > a code { color: #ce00ff; } @@ -639,7 +654,7 @@ code .md-code__nav:hover .md-code__button { /*letter-spacing: 0;*/ } -.md-typeset h1, .md-typeset h2, .md-typeset h3, .md-typeset h4, .md-typeset h5 { +.md-typeset h1, .md-typeset h2, .md-typeset h3, .md-typeset h4, .md-typeset h5, .md-typeset h6 { font-weight: 800; letter-spacing: -1px; color: rgb(0, 0, 0); @@ -654,6 +669,10 @@ code .md-code__nav:hover .md-code__button { font-size: 17px; } +.md-typeset h6 { + font-size: 15px; +} + .md-typeset h3 { font-size: 21.5px; margin-block-end: 0; @@ -1087,7 +1106,7 @@ html .md-footer-meta.md-typeset a:is(:focus,:hover) { background: none; z-index: 1; padding: 5px; - border-radius: 12px; + border-radius: 6px; border: 1px solid black; bottom: -0.7px; top: -0.7px; @@ -1124,7 +1143,7 @@ html .md-footer-meta.md-typeset a:is(:focus,:hover) { height: 100%; background: -webkit-linear-gradient(45deg, rgba(0, 42, 255, 0.025), rgb(0 114 255 / 0.25%), rgba(0, 42, 255, 0.0125)); z-index: 1; - border-radius: 12px; + border-radius: 6px; border: 0.5px solid rgba(0,0,0, 0.5); overflow: unset; } @@ -1198,7 +1217,7 @@ html .md-footer-meta.md-typeset a:is(:focus,:hover) { margin: 1em .8rem; } - .md-typeset .tabbed-block :is(h1, h2, h3, h4, h5) { + .md-typeset .tabbed-block :is(h1, h2, h3, h4, h5, h6) { margin-left: .8rem; } diff --git a/docs/blog/archive/say-goodbye-to-managed-notebooks.md b/docs/blog/archive/say-goodbye-to-managed-notebooks.md index 1ce6338bd..55e00e1c0 100644 --- a/docs/blog/archive/say-goodbye-to-managed-notebooks.md +++ b/docs/blog/archive/say-goodbye-to-managed-notebooks.md @@ -98,4 +98,4 @@ You can securely access the cloud development environment with the desktop IDE o ![](../../assets/images/dstack-vscode-jupyter.png){ width=800 } !!! info "Learn more" - Check out our [guide](../../docs/dev-environments.md) for running dev environments in your cloud. \ No newline at end of file + Check out our [guide](../../docs/concepts/dev-environments.md) for running dev environments in your cloud. \ No newline at end of file diff --git a/docs/blog/posts/amd-mi300x-inference-benchmark.md b/docs/blog/posts/amd-mi300x-inference-benchmark.md index 58b978be4..494a98a8f 100644 --- a/docs/blog/posts/amd-mi300x-inference-benchmark.md +++ b/docs/blog/posts/amd-mi300x-inference-benchmark.md @@ -11,7 +11,7 @@ categories: # Benchmarking Llama 3.1 405B on 8x AMD MI300X GPUs -At `dstack`, we've been adding support for AMD GPUs with [SSH fleets](../../docs/concepts/fleets.md#ssh-fleets), +At `dstack`, we've been adding support for AMD GPUs with [SSH fleets](../../docs/concepts/fleets.md#ssh), so we saw this as a great chance to test our integration by benchmarking AMD GPUs. Our friends at [Hot Aisle :material-arrow-top-right-thin:{ .external }](https://hotaisle.xyz/){:target="_blank"}, who build top-tier bare metal compute for AMD GPUs, kindly provided the hardware for the benchmark. @@ -35,7 +35,7 @@ Here is the spec of the bare metal machine we got: ??? info "Set up an SSH fleet" Hot Aisle provided us with SSH access to the machine. To make it accessible via `dstack`, - we created an [SSH fleet](../../docs/concepts/fleets.md#ssh-fleets) using the following configuration: + we created an [SSH fleet](../../docs/concepts/fleets.md#ssh) using the following configuration:
@@ -216,7 +216,7 @@ If you have questions, feedback, or want to help improve the benchmark, please r is the primary sponsor of this benchmark, and we are sincerely grateful for their hardware and support. If you'd like to use top-tier bare metal compute with AMD GPUs, we recommend going -with Hot Aisle. Once you gain access to a cluster, it can be easily accessed via `dstack`'s [SSH fleet](../../docs/concepts/fleets.md#ssh-fleets) easily. +with Hot Aisle. Once you gain access to a cluster, it can be easily accessed via `dstack`'s [SSH fleet](../../docs/concepts/fleets.md#ssh) easily. ### RunPod If you’d like to use on-demand compute with AMD GPUs at affordable prices, you can configure `dstack` to diff --git a/docs/blog/posts/amd-on-runpod.md b/docs/blog/posts/amd-on-runpod.md index 8b524778a..5bfbee693 100644 --- a/docs/blog/posts/amd-on-runpod.md +++ b/docs/blog/posts/amd-on-runpod.md @@ -39,7 +39,7 @@ you can now specify an AMD GPU under `resources`. Below are a few examples. ## Configuration === "Service" - Here's an example of a [service](../../docs/services.md) that deploys + Here's an example of a [service](../../docs/concepts/services.md) that deploys Llama 3.1 70B in FP16 using [TGI :material-arrow-top-right-thin:{ .external }](https://huggingface.co/docs/text-generation-inference/en/installation_amd){:target="_blank"}.
@@ -71,7 +71,7 @@ you can now specify an AMD GPU under `resources`. Below are a few examples.
=== "Dev environment" - Here's an example of a [dev environment](../../docs/dev-environments.md) using + Here's an example of a [dev environment](../../docs/concepts/dev-environments.md) using [TGI :material-arrow-top-right-thin:{ .external }](https://huggingface.co/docs/text-generation-inference/en/installation_amd){:target="_blank"}'s Docker image: diff --git a/docs/blog/posts/dstack-sky.md b/docs/blog/posts/dstack-sky.md index 1c21532b5..61e49bbcc 100644 --- a/docs/blog/posts/dstack-sky.md +++ b/docs/blog/posts/dstack-sky.md @@ -73,8 +73,8 @@ Continue? [y/n]: You can use both on-demand and spot instances without needing to manage quotas, as they are automatically handled for you. -With `dstack Sky` you can use all of `dstack`'s features, incl. [dev environments](../../docs/dev-environments.md), -[tasks](../../docs/tasks.md), [services](../../docs/services.md), and +With `dstack Sky` you can use all of `dstack`'s features, incl. [dev environments](../../docs/concepts/dev-environments.md), +[tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md). To publish services, the open-source version requires setting up a gateway with your own domain. diff --git a/docs/blog/posts/tpu-on-gcp.md b/docs/blog/posts/tpu-on-gcp.md index a3ee0afb2..765ccb808 100644 --- a/docs/blog/posts/tpu-on-gcp.md +++ b/docs/blog/posts/tpu-on-gcp.md @@ -211,8 +211,8 @@ Note, `v5litepod` is optimized for fine-tuning transformer-based models. Each co 1. Browse [Optimum TPU :material-arrow-top-right-thin:{ .external }](https://github.com/huggingface/optimum-tpu){:target="_blank"}, [Optimum TPU TGI :material-arrow-top-right-thin:{ .external }](https://github.com/huggingface/optimum-tpu/tree/main/text-generation-inference){:target="_blank"} and [vLLM :material-arrow-top-right-thin:{ .external }](https://docs.vllm.ai/en/latest/getting_started/tpu-installation.html){:target="_blank"}. -2. Check [dev environments](../../docs/dev-environments.md), [tasks](https://dstack.ai/docs/tasks), - [services](../../docs/services.md), and [fleets](../../docs/concepts/fleets.md). +2. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](https://dstack.ai/docs/tasks), + [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md). !!! info "Multi-host TPUs" If you’d like to use `dstack` with more than eight TPU cores, upvote the corresponding diff --git a/docs/blog/posts/volumes-on-runpod.md b/docs/blog/posts/volumes-on-runpod.md index 4350d1052..485b43f34 100644 --- a/docs/blog/posts/volumes-on-runpod.md +++ b/docs/blog/posts/volumes-on-runpod.md @@ -18,7 +18,7 @@ deploying a model on RunPod. -Suppose you want to deploy Llama 3.1 on RunPod as a [service](../../docs/services.md): +Suppose you want to deploy Llama 3.1 on RunPod as a [service](../../docs/concepts/services.md):
@@ -115,7 +115,7 @@ env: commands: - text-generation-launcher port: 80 -# Register the mdoel +# Register the model model: meta-llama/Meta-Llama-3.1-8B-Instruct # Uncomment to leverage spot instances @@ -131,9 +131,9 @@ In this case, `dstack` attaches the specified volume to each new replica. This e once, reducing cold start time in proportion to the model size. A notable feature of RunPod is that volumes can be attached to multiple containers simultaneously. This capability is -particularly useful for autoscalable services or distributed tasks. +particularly useful for auto-scalable services or distributed tasks. Using [volumes](../../docs/concepts/volumes.md) not only optimizes inference cold start times but also enhances the efficiency of data and model checkpoint loading during training and fine-tuning. -Whether you're running [tasks](../../docs/tasks.md) or [dev environments](../../docs/dev-environments.md), leveraging +Whether you're running [tasks](../../docs/concepts/tasks.md) or [dev environments](../../docs/concepts/dev-environments.md), leveraging volumes can significantly streamline your workflow and improve overall performance. \ No newline at end of file diff --git a/docs/docs/concepts/backends.md b/docs/docs/concepts/backends.md new file mode 100644 index 000000000..978a0c665 --- /dev/null +++ b/docs/docs/concepts/backends.md @@ -0,0 +1,852 @@ +# Backends + +To use `dstack` with cloud providers, configure backends +via the `~/.dstack/server/config.yml` file. +The server loads this file on startup. + +Alternatively, you can configure backends on the [project settings page](../guides/administration.md#backends) via UI. + +> For using `dstack` with on-prem servers, no backend configuration is required. +> Use [SSH fleets](../concepts/fleets.md#ssh) instead. + +Below are examples of how to configure backends via `~/.dstack/server/config.yml`. + +## Cloud providers + +### AWS + +There are two ways to configure AWS: using an access key or using the default credentials. + +=== "Default credentials" + + If you have default credentials set up (e.g. in `~/.aws/credentials`), configure the backend like this: + +
+ + ```yaml + projects: + - name: main + backends: + - type: aws + creds: + type: default + ``` + +
+ +=== "Access key" + + Create an access key by following the [this guide :material-arrow-top-right-thin:{ .external }](https://docs.aws.amazon.com/cli/latest/userguide/cli-authentication-user.html#cli-authentication-user-get). + Once you've downloaded the `.csv` file with your IAM user's Access key ID and Secret access key, proceed to + configure the backend. + +
+ + ```yaml + projects: + - name: main + backends: + - type: aws + creds: + type: access_key + access_key: KKAAUKLIZ5EHKICAOASV + secret_key: pn158lMqSBJiySwpQ9ubwmI6VUU3/W2fdJdFwfgO + ``` + +
+ +??? info "Required permissions" + The following AWS policy permissions are sufficient for `dstack` to work: + + ``` + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:AttachVolume", + "ec2:AuthorizeSecurityGroupEgress", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:CreatePlacementGroup", + "ec2:CancelSpotInstanceRequests", + "ec2:CreateSecurityGroup", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:DeletePlacementGroup", + "ec2:DeleteVolume", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeCapacityReservations" + "ec2:DescribeImages", + "ec2:DescribeInstances", + "ec2:DescribeInstanceAttribute", + "ec2:DescribeInstanceTypes", + "ec2:DescribeRouteTables", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSubnets", + "ec2:DescribeVpcs", + "ec2:DescribeVolumes", + "ec2:DetachVolume", + "ec2:RunInstances", + "ec2:TerminateInstances" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "servicequotas:ListServiceQuotas", + "servicequotas:GetServiceQuota" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:CreateLoadBalancer", + "elasticloadbalancing:CreateTargetGroup", + "elasticloadbalancing:CreateListener", + "elasticloadbalancing:RegisterTargets", + "elasticloadbalancing:AddTags", + "elasticloadbalancing:DeleteLoadBalancer", + "elasticloadbalancing:DeleteTargetGroup", + "elasticloadbalancing:DeleteListener", + "elasticloadbalancing:DeregisterTargets" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "acm:DescribeCertificate", + "acm:ListCertificates" + ], + "Resource": "*" + } + ] + } + ``` + + The `elasticloadbalancing:*` and `acm:*` permissions are only needed for provisioning gateways with ACM (AWS Certificate Manager) certificates. + +??? info "VPC" + By default, `dstack` uses the default VPC. It's possible to customize it: + + === "vpc_name" + + ```yaml + projects: + - name: main + backends: + - type: aws + creds: + type: default + + vpc_name: my-vpc + ``` + + === "vpc_ids" + ```yaml + projects: + - name: main + backends: + - type: aws + creds: + type: default + + default_vpcs: true + vpc_ids: + us-east-1: vpc-0a2b3c4d5e6f7g8h + us-east-2: vpc-9i8h7g6f5e4d3c2b + us-west-1: vpc-4d3c2b1a0f9e8d7 + ``` + + For the regions without configured `vpc_ids`, enable default VPCs by setting `default_vpcs` to `true`. + +??? info "Private subnets" + By default, `dstack` provisions instances with public IPs and permits inbound SSH traffic. + If you want `dstack` to use private subnets and provision instances without public IPs, set `public_ips` to `false`. + + ```yaml + projects: + - name: main + backends: + - type: aws + creds: + type: default + + public_ips: false + ``` + + Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets. + Additionally, private subnets must have outbound internet connectivity provided by NAT Gateway, Transit Gateway, or other mechanism. + +??? info "OS images" + By default, `dstack` uses its own [AMI :material-arrow-top-right-thin:{ .external }](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) + optimized for `dstack`. + To use your own or other third-party images, set the `os_images` property: + + ```yaml + projects: + - name: main + backends: + - type: aws + creds: + type: default + + os_images: + cpu: + name: my-ami-for-cpu-instances + owner: self + user: dstack + nvidia: + name: 'Some ThirdParty CUDA image' + owner: 123456789012 + user: ubuntu + ``` + + Here, both `cpu` and `nvidia` properties are optional, but if the property is not set, you won´t be able to use the corresponding instance types. + + The `name` is an AMI name. + The `owner` is either an AWS account ID (a 12-digit number) or a special value `self` indicating the current account. + The `user` specifies an OS user for instance provisioning. + + !!! info "Image requirements" + * SSH server listening on port 22 + * `user` with passwordless sudo access + * Docker is installed + * (For NVIDIA instances) NVIDIA/CUDA drivers and NVIDIA Container Toolkit are installed + +### Azure + +There are two ways to configure Azure: using a client secret or using the default credentials. + +=== "Default credentials" + + If you have default credentials set up, configure the backend like this: + +
+ + ```yaml + projects: + - name: main + backends: + - type: azure + subscription_id: 06c82ce3-28ff-4285-a146-c5e981a9d808 + tenant_id: f84a7584-88e4-4fd2-8e97-623f0a715ee1 + creds: + type: default + ``` + +
+ + If you don't know your `subscription_id` and `tenant_id`, use [Azure CLI :material-arrow-top-right-thin:{ .external }](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli): + + ```shell + az account show --query "{subscription_id: id, tenant_id: tenantId}" + ``` + +=== "Client secret" + + A client secret can be created using the [Azure CLI :material-arrow-top-right-thin:{ .external }](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli): + + ```shell + SUBSCRIPTION_ID=... + az ad sp create-for-rbac + --name dstack-app \ + --role $DSTACK_ROLE \ + --scopes /subscriptions/$SUBSCRIPTION_ID \ + --query "{ tenant_id: tenant, client_id: appId, client_secret: password }" + ``` + + Once you have `tenant_id`, `client_id`, and `client_secret`, go ahead and configure the backend. + +
+ + ```yaml + projects: + - name: main + backends: + - type: azure + subscription_id: 06c82ce3-28ff-4285-a146-c5e981a9d808 + tenant_id: f84a7584-88e4-4fd2-8e97-623f0a715ee1 + creds: + type: client + client_id: acf3f73a-597b-46b6-98d9-748d75018ed0 + client_secret: 1Kb8Q~o3Q2hdEvrul9yaj5DJDFkuL3RG7lger2VQ + ``` + +
+ + If you don't know your `subscription_id`, use [Azure CLI :material-arrow-top-right-thin:{ .external }](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli): + + ```shell + az account show --query "{subscription_id: id}" + ``` + +??? info "Required permissions" + The following Azure permissions are sufficient for `dstack` to work: + + ```json + { + "properties": { + "roleName": "dstack-role", + "description": "Minimal required permissions for using Azure with dstack", + "assignableScopes": [ + "/subscriptions/${YOUR_SUBSCRIPTION_ID}" + ], + "permissions": [ + { + "actions": [ + "Microsoft.Authorization/*/read", + "Microsoft.Compute/availabilitySets/*", + "Microsoft.Compute/locations/*", + "Microsoft.Compute/virtualMachines/*", + "Microsoft.Compute/virtualMachineScaleSets/*", + "Microsoft.Compute/cloudServices/*", + "Microsoft.Compute/disks/write", + "Microsoft.Compute/disks/read", + "Microsoft.Compute/disks/delete", + "Microsoft.Network/networkSecurityGroups/*", + "Microsoft.Network/locations/*", + "Microsoft.Network/virtualNetworks/*", + "Microsoft.Network/networkInterfaces/*", + "Microsoft.Network/publicIPAddresses/*", + "Microsoft.Resources/subscriptions/resourceGroups/read", + "Microsoft.Resources/subscriptions/resourceGroups/write", + "Microsoft.Resources/subscriptions/read" + ], + "notActions": [], + "dataActions": [], + "notDataActions": [] + } + ] + } + } + ``` + +??? info "VPC" + By default, `dstack` creates new Azure networks and subnets for every configured region. + It's possible to use custom networks by specifying `vpc_ids`: + + ```yaml + projects: + - name: main + backends: + - type: azure + creds: + type: default + regions: [westeurope] + vpc_ids: + westeurope: myNetworkResourceGroup/myNetworkName + ``` + + +??? info "Private subnets" + By default, `dstack` provisions instances with public IPs and permits inbound SSH traffic. + If you want `dstack` to use private subnets and provision instances without public IPs, + specify custom networks using `vpc_ids` and set `public_ips` to `false`. + + ```yaml + projects: + - name: main + backends: + - type: azure + creds: + type: default + regions: [westeurope] + vpc_ids: + westeurope: myNetworkResourceGroup/myNetworkName + public_ips: false + ``` + + Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets. + Additionally, private subnets must have outbound internet connectivity provided by [NAT Gateway or other mechanism](https://learn.microsoft.com/en-us/azure/nat-gateway/nat-overview). + +### GCP + +There are two ways to configure GCP: using a service account or using the default credentials. + +=== "Default credentials" + + Enable GCP application default credentials: + + ```shell + gcloud auth application-default login + ``` + + Then configure the backend like this: + +
+ + ```yaml + projects: + - name: main + backends: + - type: gcp + project_id: gcp-project-id + creds: + type: default + ``` + +
+ +=== "Service account" + + To create a service account, follow [this guide :material-arrow-top-right-thin:{ .external }](https://cloud.google.com/iam/docs/service-accounts-create). After setting up the service account [create a key :material-arrow-top-right-thin:{ .external }](https://cloud.google.com/iam/docs/keys-create-delete) for it and download the corresponding JSON file. + + Then go ahead and configure the backend by specifying the downloaded file path. + +
+ + ```yaml + projects: + - name: main + backends: + - type: gcp + project_id: gcp-project-id + creds: + type: service_account + filename: ~/.dstack/server/gcp-024ed630eab5.json + ``` + +
+ +If you don't know your GCP project ID, use [Google Cloud CLI :material-arrow-top-right-thin:{ .external }](https://cloud.google.com/sdk/docs/install-sdk): + +```shell +gcloud projects list --format="json(projectId)" +``` + +??? info "Required permissions" + The following GCP permissions are sufficient for `dstack` to work: + + ``` + compute.disks.create + compute.disks.delete + compute.disks.get + compute.disks.list + compute.disks.setLabels + compute.disks.use + compute.firewalls.create + compute.images.useReadOnly + compute.instances.attachDisk + compute.instances.create + compute.instances.delete + compute.instances.detachDisk + compute.instances.get + compute.instances.setLabels + compute.instances.setMetadata + compute.instances.setServiceAccount + compute.instances.setTags + compute.networks.get + compute.networks.updatePolicy + compute.regions.get + compute.regions.list + compute.routers.list + compute.subnetworks.list + compute.subnetworks.use + compute.subnetworks.useExternalIp + compute.zoneOperations.get + ``` + + If you plan to use TPUs, additional permissions are required: + + ``` + tpu.nodes.create + tpu.nodes.get + tpu.nodes.update + tpu.nodes.delete + tpu.operations.get + tpu.operations.list + ``` + + Also, the use of TPUs requires the `serviceAccountUser` role. + For TPU VMs, dstack will use the default service account. + +??? info "Required APIs" + First, ensure the required APIs are enabled in your GCP `project_id`. + + ```shell + PROJECT_ID=... + gcloud config set project $PROJECT_ID + gcloud services enable cloudapis.googleapis.com + gcloud services enable compute.googleapis.com + ``` + +??? info "VPC" + + === "VPC" + +
+ + ```yaml + projects: + - name: main + backends: + - type: gcp + project_id: gcp-project-id + creds: + type: default + + vpc_name: my-custom-vpc + ``` + +
+ + === "Shared VPC" + +
+ + ```yaml + projects: + - name: main + backends: + - type: gcp + project_id: gcp-project-id + creds: + type: default + + vpc_name: my-custom-vpc + vpc_project_id: another-project-id + ``` + +
+ + When using a Shared VPC, ensure there is a firewall rule allowing `INGRESS` traffic on port `22`. + You can limit this rule to `dstack` instances using the `dstack-runner-instance` target tag. + + When using GCP gateways with a Shared VPC, also ensure there is a firewall rule allowing `INGRESS` traffic on ports `22`, `80`, `443`. + You can limit this rule to `dstack` gateway instances using the `dstack-gateway-instance` target tag. + + To use TPUs with a Shared VPC, you need to grant the TPU Service Account in your service project permissions + to manage resources in the host project by granting the "TPU Shared VPC Agent" (roles/tpu.xpnAgent) role + ([more in the GCP docs](https://cloud.google.com/tpu/docs/shared-vpc-networks#vpc-shared-vpc)). + +??? info "Private subnets" + By default, `dstack` provisions instances with public IPs and permits inbound SSH traffic. + If you want `dstack` to use private subnets and provision instances without public IPs, set `public_ips` to `false`. + + ```yaml + projects: + - name: main + backends: + - type: gcp + creds: + type: default + + public_ips: false + ``` + + Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets. + Additionally, [Cloud NAT](https://cloud.google.com/nat/docs/overview) must be configured to provide access to external resources for provisioned instances. + +### Lambda + +Log into your [Lambda Cloud :material-arrow-top-right-thin:{ .external }](https://lambdalabs.com/service/gpu-cloud) account, click API keys in the sidebar, and then click the `Generate API key` +button to create a new API key. + +Then, go ahead and configure the backend: + +
+ +```yaml +projects: +- name: main + backends: + - type: lambda + creds: + type: api_key + api_key: eersct_yrpiey-naaeedst-tk-_cb6ba38e1128464aea9bcc619e4ba2a5.iijPMi07obgt6TZ87v5qAEj61RVxhd0p +``` + +
+ +### RunPod + +Log into your [RunPod :material-arrow-top-right-thin:{ .external }](https://www.runpod.io/console/) console, click Settings in the sidebar, expand the `API Keys` section, and click +the button to create a Read & Write key. + +Then proceed to configuring the backend. + +
+ +```yaml +projects: + - name: main + backends: + - type: runpod + creds: + type: api_key + api_key: US9XTPDIV8AR42MMINY8TCKRB8S4E7LNRQ6CAUQ9 +``` + +
+ +### Vast.ai + +Log into your [Vast.ai :material-arrow-top-right-thin:{ .external }](https://cloud.vast.ai/) account, click Account in the sidebar, and copy your +API Key. + +Then, go ahead and configure the backend: + +
+ +```yaml +projects: +- name: main + backends: + - type: vastai + creds: + type: api_key + api_key: d75789f22f1908e0527c78a283b523dd73051c8c7d05456516fc91e9d4efd8c5 +``` + +
+ +Also, the `vastai` backend supports on-demand instances only. Spot instance support coming soon. + +### TensorDock + +Log into your [TensorDock :material-arrow-top-right-thin:{ .external }](https://dashboard.tensordock.com/) account, click Developers in the sidebar, and use the `Create an Authorization` section to create a new authorization key. + +Then, go ahead and configure the backend: + +
+ +```yaml +projects: + - name: main + backends: + - type: tensordock + creds: + type: api_key + api_key: 248e621d-9317-7494-dc1557fa5825b-98b + api_token: FyBI3YbnFEYXdth2xqYRnQI7hiusssBC +``` + +
+ +The `tensordock` backend supports on-demand instances only. Spot instance support coming soon. + +### CUDO + +Log into your [CUDO Compute :material-arrow-top-right-thin:{ .external }](https://compute.cudo.org/) account, click API keys in the sidebar, and click the `Create an API key` button. + +Ensure you've created a project with CUDO Compute, then proceed to configuring the backend. + +
+ +```yaml +projects: + - name: main + backends: + - type: cudo + project_id: my-cudo-project + creds: + type: api_key + api_key: 7487240a466624b48de22865589 +``` + +
+ +### OCI + +There are two ways to configure OCI: using client credentials or using the default credentials. + +=== "Default credentials" + If you have default credentials set up in `~/.oci/config`, configure the backend like this: + +
+ + ```yaml + projects: + - name: main + backends: + - type: oci + creds: + type: default + ``` + +
+ +=== "Client credentials" + + Log into the [OCI Console :material-arrow-top-right-thin:{ .external }](https://cloud.oracle.com), go to `My profile`, + select `API keys`, and click `Add API key`. + + Once you add a key, you'll see the configuration file. Copy its values to configure the backend as follows: + +
+ + ```yaml + projects: + - name: main + backends: + - type: oci + creds: + type: client + user: ocid1.user.oc1..g5vlaeqfu47akmaafq665xsgmyaqjktyfxtacfxc4ftjxuca7aohnd2ev66m + tenancy: ocid1.tenancy.oc1..ajqsftvk4qarcfaak3ha4ycdsaahxmaita5frdwg3tqo2bcokpd3n7oizwai + region: eu-frankfurt-1 + fingerprint: 77:32:77:00:49:7c:cb:56:84:75:8e:77:96:7d:53:17 + key_file: ~/.oci/private_key.pem + ``` + +
+ + Make sure to include either the path to your private key via `key_file` or the contents of the key via `key_content`. + +??? info "Required permissions" + + This is an example of a restrictive policy for a group of `dstack` users: + + ``` + Allow group to read compartments in tenancy where target.compartment.name = '' + Allow group to read marketplace-community-listings in compartment + Allow group to manage app-catalog-listing in compartment + Allow group to manage instances in compartment + Allow group to manage compute-capacity-reports in compartment + Allow group to manage volumes in compartment + Allow group to manage volume-attachments in compartment + Allow group to manage virtual-network-family in compartment + ``` + + To use this policy, create a compartment for `dstack` and specify it in `~/.dstack/server/config.yml`. + + ```yaml + projects: + - name: main + backends: + - type: oci + creds: + type: default + compartment_id: ocid1.compartment.oc1..aaaaaaaa + ``` + +### DataCrunch + +Log into your [DataCrunch :material-arrow-top-right-thin:{ .external }](https://cloud.datacrunch.io/) account, click Keys in the sidebar, find `REST API Credentials` area and then click the `Generate Credentials` button. + +Then, go ahead and configure the backend: + +
+ +```yaml +projects: + - name: main + backends: + - type: datacrunch + creds: + type: api_key + client_id: xfaHBqYEsArqhKWX-e52x3HH7w8T + client_secret: B5ZU5Qx9Nt8oGMlmMhNI3iglK8bjMhagTbylZy4WzncZe39995f7Vxh8 +``` + +
+ +## On-prem servers + +### SSH fleets + +> For using `dstack` with on-prem servers, no backend configuration is required. +> See [SSH fleets](fleets.md#ssh) for more details. + +### Kubernetes + +To configure a Kubernetes backend, specify the path to the kubeconfig file, +and the port that `dstack` can use for proxying SSH traffic. +In case of a self-managed cluster, also specify the IP address of any node in the cluster. + +[//]: # (TODO: Mention that the Kind context has to be selected via `current-context` ) + +=== "Self-managed" + + Here's how to configure the backend to use a self-managed cluster. + +
+ + ```yaml + projects: + - name: main + backends: + - type: kubernetes + kubeconfig: + filename: ~/.kube/config + networking: + ssh_host: localhost # The external IP address of any node + ssh_port: 32000 # Any port accessible outside of the cluster + ``` + +
+ + The port specified to `ssh_port` must be accessible outside of the cluster. + + ??? info "Kind" + If you are using [Kind](https://kind.sigs.k8s.io/), make sure to make + to set up `ssh_port` via `extraPortMappings` for proxying SSH traffic: + + ```yaml + kind: Cluster + apiVersion: kind.x-k8s.io/v1alpha4 + nodes: + - role: control-plane + extraPortMappings: + - containerPort: 32000 # Must be same as `ssh_port` + hostPort: 32000 # Must be same as `ssh_port` + ``` + + Go ahead and create the cluster like this: + + ```shell + kind create cluster --config examples/misc/kubernetes/kind-config.yml + ``` + +[//]: # (TODO: Elaborate on the Kind's IP address on Linux) + +=== "Managed" + Here's how to configure the backend to use a managed cluster (AWS, GCP, Azure). + +
+ + ```yaml + projects: + - name: main + backends: + - type: kubernetes + kubeconfig: + filename: ~/.kube/config + networking: + ssh_port: 32000 # Any port accessible outside of the cluster + ``` + +
+ + The port specified to `ssh_port` must be accessible outside of the cluster. + + ??? info "EKS" + For example, if you are using EKS, make sure to add it via an ingress rule + of the corresponding security group: + + ```shell + aws ec2 authorize-security-group-ingress --group-id --protocol tcp --port 32000 --cidr 0.0.0.0/0 + ``` + +[//]: # (TODO: Elaborate on gateways, and what backends allow configuring them) + +[//]: # (TODO: Should we automatically detect ~/.kube/config) + +??? info "NVIDIA GPU Operator" + To use GPUs with Kubernetes, the cluster must be installed with the + [NVIDIA GPU Operator :material-arrow-top-right-thin:{ .external }](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/index.html). + + [//]: # (TODO: Provide short yet clear instructions. Elaborate on whether it works with Kind.) + +## dstack Sky + +If you're using [dstack Sky :material-arrow-top-right-thin:{ .external }](https://sky.dstack.ai){:target="_blank"}, +backends are pre-configured to use compute from `dstack`'s marketplace. + +You can reconfigure backends via the UI, to use your own cloud accounts instead. + +[//]: # (TODO: Add link to the server config reference page) \ No newline at end of file diff --git a/docs/docs/concepts/dev-environments.md b/docs/docs/concepts/dev-environments.md new file mode 100644 index 000000000..0b20d9c60 --- /dev/null +++ b/docs/docs/concepts/dev-environments.md @@ -0,0 +1,261 @@ +# Dev environments + +A dev environment lets you provision an instance and access it with your desktop IDE. + +## Define a configuration + +First, define a dev environment configuration as a YAML file in your project folder. +The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `dev.dstack.yml` are both acceptable). + +
+ +```yaml +type: dev-environment +# The name is optional, if not specified, generated randomly +name: vscode + +python: "3.11" +# Uncomment to use a custom Docker image +#image: dstackai/base:py3.13-0.6-cuda-12.1 +ide: vscode + +# Uncomment to leverage spot instances +#spot_policy: auto + +resources: + gpu: 24GB +``` + +
+ +### Resources + +When you specify a resource value like `cpu` or `memory`, +you can either use an exact value (e.g. `24GB`) or a +range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). + +
+ +```yaml +type: dev-environment +# The name is optional, if not specified, generated randomly +name: vscode + +ide: vscode + +resources: + # 200GB or more RAM + memory: 200GB.. + # 4 GPUs from 40GB to 80GB + gpu: 40GB..80GB:4 + # Shared memory (required by multi-gpu) + shm_size: 16GB + # Disk size + disk: 500GB +``` + +
+ +The `gpu` property allows specifying not only memory size but also GPU vendor, names +and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), +`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), +`A100:40GB:2` (two A100 GPUs of 40GB). + +??? info "Google Cloud TPU" + To use TPUs, specify its architecture via the `gpu` property. + + ```yaml + type: dev-environment + # The name is optional, if not specified, generated randomly + name: vscode + + ide: vscode + + resources: + gpu: v2-8 + ``` + + Currently, only 8 TPU cores can be specified, supporting single TPU device workloads. Multi-TPU support is coming soon. + +??? info "Shared memory" + If you are using parallel communicating processes (e.g., dataloaders in PyTorch), you may need to configure + `shm_size`, e.g. set it to `16GB`. + +### Python version + +If you don't specify `image`, `dstack` uses its base Docker image pre-configured with +`python`, `pip`, `conda` (Miniforge), and essential CUDA drivers. +The `python` property determines which default Docker image is used. + +??? info "nvcc" + By default, the base Docker image doesn’t include `nvcc`, which is required for building custom CUDA kernels. + If you need `nvcc`, set the [`nvcc`](../reference/dstack.yml/dev-environment.md#nvcc) property to true. + +### Docker + +If you want, you can specify your own Docker image via `image`. + +
+ +```yaml +type: dev-environment +# The name is optional, if not specified, generated randomly +name: vscode + +# Any custom Docker image +image: ghcr.io/huggingface/text-generation-inference:latest + +ide: vscode +``` + +
+ +??? info "Private registry" + + Use the `registry_auth` property to provide credentials for a private Docker registry. + + ```yaml + type: dev-environment + # The name is optional, if not specified, generated randomly + name: vscode + + # Any private Docker image + image: ghcr.io/huggingface/text-generation-inference:latest + # Credentials of the private Docker registry + registry_auth: + username: peterschmidt85 + password: ghp_e49HcZ9oYwBzUbcSk2080gXZOU2hiT9AeSR5 + + ide: vscode + ``` + +??? info "Privileged mode" + All backends except `runpod`, `vastai`, and `kubernetes` support running containers in privileged mode. + This mode enables features like using [Docker and Docker Compose](../guides/protips.md#docker-and-docker-compose) + inside `dstack` runs. + +### Environment variables + +
+ +```yaml +type: dev-environment +# The name is optional, if not specified, generated randomly +name: vscode + +# Environment variables +env: + - HF_TOKEN + - HF_HUB_ENABLE_HF_TRANSFER=1 + +ide: vscode +``` + +
+ +If you don't assign a value to an environment variable (see `HF_TOKEN` above), +`dstack` will require the value to be passed via the CLI or set in the current process. + +??? info "System environment variables" + The following environment variables are available in any run by default: + + | Name | Description | + |-------------------------|-----------------------------------------| + | `DSTACK_RUN_NAME` | The name of the run | + | `DSTACK_REPO_ID` | The ID of the repo | + | `DSTACK_GPUS_NUM` | The total number of GPUs in the run | + +### Spot policy + +By default, `dstack` uses on-demand instances. However, you can change that +via the [`spot_policy`](../reference/dstack.yml/dev-environment.md#spot_policy) property. It accepts `spot`, `on-demand`, and `auto`. + +!!! info "Reference" + Dev environments support many more configuration options, + incl. [`backends`](../reference/dstack.yml/dev-environment.md#backends), + [`regions`](../reference/dstack.yml/dev-environment.md#regions), + [`max_price`](../reference/dstack.yml/dev-environment.md#max_price), and + [`max_duration`](../reference/dstack.yml/dev-environment.md#max_duration), + among [others](../reference/dstack.yml/dev-environment.md). + +## Run a configuration + +To run a dev environment, pass the configuration to [`dstack apply`](../reference/cli/dstack/apply.md): + +
+ +```shell +$ dstack apply -f examples/.dstack.yml + + # BACKEND REGION RESOURCES SPOT PRICE + 1 runpod CA-MTL-1 9xCPU, 48GB, A5000:24GB yes $0.11 + 2 runpod EU-SE-1 9xCPU, 43GB, A5000:24GB yes $0.11 + 3 gcp us-west4 4xCPU, 16GB, L4:24GB yes $0.214516 + +Submit the run vscode? [y/n]: y + +Launching `vscode`... +---> 100% + +To open in VS Code Desktop, use this link: + vscode://vscode-remote/ssh-remote+vscode/workflow +``` + +
+ +`dstack apply` automatically provisions an instance, uploads the contents of the repo (incl. your local uncommitted changes), +and sets up an IDE on the instance. + +!!! info "Windows" + On Windows, `dstack` works both natively and inside WSL. But, for dev environments, + it's recommended _not to use_ `dstack apply` _inside WSL_ due to a [VS Code issue :material-arrow-top-right-thin:{ .external }](https://github.com/microsoft/vscode-remote-release/issues/937){:target="_blank"}. + +To open the dev environment in your desktop IDE, use the link from the output +(such as `vscode://vscode-remote/ssh-remote+fast-moth-1/workflow`). + +![](../../assets/images/dstack-vscode-jupyter.png){ width=800 } + +??? info "SSH" + + Alternatively, while the CLI is attached to the run, you can connect to the dev environment via SSH: + +
+ + ```shell + $ ssh fast-moth-1 + ``` + +
+ +### Retry policy + +By default, if `dstack` can't find capacity or the instance is interrupted, the run will fail. + +If you'd like `dstack` to automatically retry, configure the +[retry](../reference/dstack.yml/dev-environment.md#retry) property accordingly: + +
+ +```yaml +type: dev-environment +# The name is optional, if not specified, generated randomly +name: vscode + +ide: vscode + +retry: + # Retry on specific events + on_events: [no-capacity, error, interruption] + # Retry for up to 1 hour + duration: 1h +``` + +
+ +--8<-- "docs/concepts/snippets/manage-fleets.ext" + +--8<-- "docs/concepts/snippets/manage-runs.ext" + +!!! info "What's next?" + 1. Read about [tasks](tasks.md), [services](services.md), and [repos](repos.md) + 2. Learn how to manage [fleets](fleets.md) \ No newline at end of file diff --git a/docs/docs/concepts/fleets.md b/docs/docs/concepts/fleets.md index 36d6f1d7c..6b6e45cf4 100644 --- a/docs/docs/concepts/fleets.md +++ b/docs/docs/concepts/fleets.md @@ -1,105 +1,223 @@ # Fleets -Fleets are groups of cloud instances or SSH machines that you use to run dev environments, tasks, and services. +Fleets are groups of instances used to run dev environments, tasks, and services. +Depending on the fleet configuration, instances can be interconnected clusters or standalone instances. -By default, when you run `dstack apply` to start a new dev environment, task, or service, -`dstack` reuses `idle` instances from an existing fleet. -If no `idle` instances match the requirements, `dstack` automatically creates a new fleet -using configured backends. +`dstack` supports two kinds of fleets: -If you need more control over instance configuration and lifecycle, or if you want to use on-prem servers, -`dstack` also offers you a way to create and manage fleets directly. +* [Cloud fleets](#cloud) – dynamically provisioned through configured backends +* [SSH fleets](#ssh) – created using on-prem servers -## Define a configuration +## Cloud fleets { #cloud } -To create a fleet, define its configuration as a YAML file in your project folder. -The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `fleet.dstack.yml` are both acceptable). +When you call `dstack apply` to run a dev environment, task, or service, `dstack` reuses `idle` instances +from an existing fleet. If none match the requirements, `dstack` creates a new cloud fleet. -=== "Cloud fleets" +For greater control over cloud fleet provisioning, create fleets explicitly using configuration files. - !!! info "What is a cloud fleet?" - By default, when running dev environments, tasks, and services, `dstack` - reuses `idle` instances from existing fleets or creates a new cloud fleet on the fly. - - If you want more control over the lifecycle of cloud instances, you can create a cloud fleet manually. - This allows you to reuse a fleet over a longer period and across multiple runs. You can also delete the fleet only when needed. +### Define a configuration - To create a cloud fleet, specify `resources`, `nodes`, - and other optional parameters. - -
+Define a fleet configuration as a YAML file in your project directory. The file must have a +`.dstack.yml` extension (e.g. `.dstack.yml` or `fleet.dstack.yml`). + +
```yaml type: fleet # The name is optional, if not specified, generated randomly - name: fleet-distrib + name: my-fleet - # Number of instances + # Specify the number of instances nodes: 2 - # Ensure instances are inter-connected - placement: cluster + # Uncomment to ensure instances are inter-connected + #placement: cluster - # Terminate if idle for 3 days - idle_duration: 3d + resources: + gpu: 24GB + ``` + +
+ +#### Placement + +To ensure instances are interconnected (e.g., for +[distributed tasks](tasks.md#distributed-tasks)), set `placement` to `cluster`. +This ensures all instances are provisioned in the same backend and region with optimal inter-node connectivity + +??? info "AWS" + `dstack` automatically enables [Elastic Fabric Adapter :material-arrow-top-right-thin:{ .external }](https://aws.amazon.com/hpc/efa/){:target="_blank"} + for the instance types that support it: + `p5.48xlarge`, `p4d.24xlarge`, `g4dn.12xlarge`, `g4dn.16xlarge`, `g4dn.8xlarge`, `g4dn.metal`, + `g5.12xlarge`, `g5.16xlarge`, `g5.24xlarge`, `g5.48xlarge`, `g5.8xlarge`, `g6.12xlarge`, + `g6.16xlarge`, `g6.24xlarge`, `g6.48xlarge`, `g6.8xlarge`, and `gr6.8xlarge`. + + Currently, only one EFA interface is enabled per instance, regardless of its maximum capacity. + This will change once [this issue :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/issues/1804){:target="_blank"} is resolved. + +> The `cluster` placement is supported only for `aws`, `azure`, `gcp`, and `oci` +> backends. + +#### Resources + +When you specify a resource value like `cpu` or `memory`, +you can either use an exact value (e.g. `24GB`) or a +range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). + +
+```yaml +type: fleet +# The name is optional, if not specified, generated randomly +name: my-fleet + +nodes: 2 + +resources: + # 200GB or more RAM + memory: 200GB.. + # 4 GPUs from 40GB to 80GB + gpu: 40GB..80GB:4 + # Disk size + disk: 500GB +``` + +
+ +The `gpu` property allows specifying not only memory size but also GPU vendor, names +and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), +`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), +`A100:40GB:2` (two A100 GPUs of 40GB). + +??? info "Google Cloud TPU" + To use TPUs, specify its architecture via the `gpu` property. + + ```yaml + type: dev-environment + # The name is optional, if not specified, generated randomly + name: vscode + + ide: vscode + resources: - gpu: - # 24GB or more vRAM - memory: 24GB.. - # Two or more GPUs - count: 2.. + gpu: v2-8 ``` + + Currently, only 8 TPU cores can be specified, supporting single TPU device workloads. Multi-TPU support is coming soon. + +#### Idle duration + +By default, fleet instances remain active until the fleet is explicitly deleted via `dstack fleet delete`. + +To automatically terminate `idle` instances after a certain period, configure `idle_duration`. + +
-
+ ```yaml + type: fleet + # The name is optional, if not specified, generated randomly + name: my-fleet + + nodes: 2 + + # Terminate instances idle for more than 1 hour + idle_duration: 1h + + resources: + gpu: 24GB + ``` - When you apply this configuration, `dstack` will create cloud instances using the configured backends according - to the specified parameters. +
+ +#### Spot policy + +By default, `dstack` uses on-demand instances. However, you can change that +via the [`spot_policy`](../reference/dstack.yml/dev-environment.md#spot_policy) property. It accepts `spot`, `on-demand`, and `auto`. + +#### Retry policy - !!! info "Cluster placement" - To ensure the nodes of the fleet are interconnected (e.g., if you'd like to use them for - [multi-node tasks](../reference/dstack.yml/task.md#distributed-tasks)), - set `placement` to `cluster`. - - In this case, `dstack` will provision all nodes in the same backend and region and configure optimal - inter-node connectivity. +By default, if `dstack` fails to provision an instance or an instance is interrupted, no retry is attempted. - !!! info "Backends" - The `cluster` value of the `placement` property is supported only by the `aws`, `azure`, `gcp`, and `oci` - backends. +If you'd like `dstack` to do it, configure the +[retry](../reference/dstack.yml/dev-environment.md#retry) property accordingly: - ??? info "AWS" - `dstack` automatically enables [Elastic Fabric Adapter :material-arrow-top-right-thin:{ .external }](https://aws.amazon.com/hpc/efa/){:target="_blank"} - for instance types that support it. The following instance types with EFA are supported: - `p5.48xlarge`, `p4d.24xlarge`, `g4dn.12xlarge`, `g4dn.16xlarge`, `g4dn.8xlarge`, `g4dn.metal`, - `g5.12xlarge`, `g5.16xlarge`, `g5.24xlarge`, `g5.48xlarge`, `g5.8xlarge`, `g6.12xlarge`, - `g6.16xlarge`, `g6.24xlarge`, `g6.48xlarge`, `g6.8xlarge`, `gr6.8xlarge` +
- Currently, only one EFA interface is enabled regardless of the maximum number of interfaces supported by the instance type. - This limitation will be lifted once [this issue :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/issues/1804){:target="_blank"} is fixed. +```yaml +type: fleet +# The name is optional, if not specified, generated randomly +name: my-fleet - !!! info "Backends" - Cloud fleets are supported for all backends except `kubernetes`, `vastai`, and `runpod`. +nodes: 1 -=== "SSH fleets" +resources: + gpu: 24GB - !!! info "What is an SSH fleet?" - If you’d like to run dev environments, tasks, and services on arbitrary on-prem servers via `dstack`, you can - create an SSH fleet. +retry: + # Retry on specific events + on_events: [no-capacity, interruption] + # Retry for up to 1 hour + duration: 1h +``` + +
- To create an SSH fleet, specify `ssh_config` to allow the `dstack` server to connect to these servers - via SSH. +> Cloud fleets are supported by all backends except `kubernetes`, `vastai`, and `runpod`. -
+!!! info "Reference" + Cloud fleets support many more configuration options, + incl. [`backends`](../reference/dstack.yml/fleet.md#backends), + [`regions`](../reference/dstack.yml/fleet.md#regions), + [`max_price`](../reference/dstack.yml/fleet.md#max_price), and + among [others](../reference/dstack.yml/fleet.md). + +### Create or update a fleet + +To create or update the fleet, pass the fleet configuration to [`dstack apply`](../reference/cli/dstack/apply.md): + +
+ +```shell +$ dstack apply -f examples/misc/fleets/.dstack.yml +``` + +
+ +To ensure the fleet is created, you can use the `dstack fleet` command: + +
+ +```shell +$ dstack fleet + + FLEET INSTANCE BACKEND GPU PRICE STATUS CREATED + my-fleet 0 gcp (europe-west-1) L4:24GB (spot) $0.1624 idle 3 mins ago + 1 gcp (europe-west-1) L4:24GB (spot) $0.1624 idle 3 mins ago +``` + +
+ +Once the status of instances changes to `idle`, they can be used by dev environments, tasks, and services. + +## SSH fleets { #ssh } + +If you have a group of on-prem servers accessible via SSH, you can create an SSH fleet. + +### Define a configuration + +Define a fleet configuration as a YAML file in your project directory. The file must have a +`.dstack.yml` extension (e.g. `.dstack.yml` or `fleet.dstack.yml`). + +
```yaml type: fleet # The name is optional, if not specified, generated randomly - name: fleet-distrib-ssh + name: my-fleet - # Ensure instances are inter-connected - placement: cluster + # Uncomment if instances are interconnected + #placement: cluster - # The user, private SSH key, and hostnames of the on-prem servers + # SSH credentials for the on-prem servers ssh_config: user: ubuntu identity_file: ~/.ssh/id_rsa @@ -108,115 +226,101 @@ The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `fleet.dstack.ym - 3.255.177.52 ``` -
- - When you apply this configuration, `dstack` will connect to the specified hosts using the provided SSH credentials, - install the dependencies, and configure these servers as a fleet. - - !!! info "Requirements" - Hosts should be pre-installed with Docker. - - === "NVIDIA" - Systems with NVIDIA GPUs should also be pre-installed with CUDA 12.1 and - [NVIDIA Container Toolkit :material-arrow-top-right-thin:{ .external }](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html). - - === "AMD" - Systems with AMD GPUs should also be pre-installed with AMDGPU-DKMS kernel driver (e.g. via - [native package manager :material-arrow-top-right-thin:{ .external }](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/native-install/index.html) - or [AMDGPU installer :material-arrow-top-right-thin:{ .external }](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/amdgpu-install.html).) - - The user should have passwordless `sudo` access. - - ??? info "Environment variables" - For SSH fleets, it's possible to pre-configure environment variables. - These variables will be used when installing the `dstack-shim` service on hosts - and running containers. - - For example, these variables can be used to configure a proxy: - - ```yaml - type: fleet - name: my-fleet - - placement: cluster - - env: - - HTTP_PROXY=http://proxy.example.com:80 - - HTTPS_PROXY=http://proxy.example.com:80 - - NO_PROXY=localhost,127.0.0.1 - - ssh_config: - user: ubuntu - identity_file: ~/.ssh/id_rsa - hosts: - - 3.255.177.51 - - 3.255.177.52 - ``` - - !!! info "Cluster placement" - Set `placement` to `cluster` if the hosts are interconnected - (e.g. if you'd like to use them for [multi-node tasks](../reference/dstack.yml/task.md#distributed-tasks)). - - !!! info "Network" - By default, `dstack` automatically detects the private network for the specified hosts. - However, it's possible to configure it explicitelly via - the [`network`](../reference/dstack.yml/fleet.md#network) property. - - !!! info "Backends" - To use SSH fleets, you don't need to configure any backends at all. +
+ +??? info "Requirements" + 1. Hosts should be pre-installed with Docker. + + === "NVIDIA" + 2. Hosts with NVIDIA GPUs should also be pre-installed with CUDA 12.1 and + [NVIDIA Container Toolkit :material-arrow-top-right-thin:{ .external }](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html). + + === "AMD" + 2. Hosts with AMD GPUs should also be pre-installed with AMDGPU-DKMS kernel driver (e.g. via + [native package manager :material-arrow-top-right-thin:{ .external }](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/native-install/index.html) + or [AMDGPU installer :material-arrow-top-right-thin:{ .external }](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/amdgpu-install.html).) + + 3. The user specified should have passwordless `sudo` access. + +#### Placement + +If the hosts are interconnected (i.e. share the same network), set `placement` to `cluster`. +This is required if you'd like to use the fleet for [distributed tasks](tasks.md#distributed-tasks). + +##### Network + +By default, `dstack` automatically detects the network shared by the hosts. +However, it's possible to configure it explicitly via +the [`network`](../reference/dstack.yml/fleet.md#network) property. + +[//]: # (TODO: Provide an example and more detail) + +#### Environment variables + +If needed, you can specify environment variables that will be used by `dstack-shim` and passed to containers. + +[//]: # (TODO: Explain what dstack-shim is) + +For example, these variables can be used to configure a proxy: + +```yaml +type: fleet +name: my-fleet + +env: + - HTTP_PROXY=http://proxy.example.com:80 + - HTTPS_PROXY=http://proxy.example.com:80 + - NO_PROXY=localhost,127.0.0.1 + +ssh_config: + user: ubuntu + identity_file: ~/.ssh/id_rsa + hosts: + - 3.255.177.51 + - 3.255.177.52 +``` !!! info "Reference" - See [`.dstack.yml`](../reference/dstack.yml/fleet.md) for all the options supported by - the fleet configuration. + For all SSH fleet configuration options, refer to the [reference](../reference/dstack.yml/fleet.md). -## Create or update a fleet +### Create or update a fleet To create or update the fleet, pass the fleet configuration to [`dstack apply`](../reference/cli/dstack/apply.md):
```shell -$ dstack apply -f examples/misc/fleets/distrib.dstack.yml +$ dstack apply -f examples/misc/fleets/.dstack.yml ```
-### Ensure the fleet is created - -To ensure the fleet is created, use the `dstack fleet` command: +To ensure the fleet is created, you can use the `dstack fleet` command:
```shell $ dstack fleet - FLEET INSTANCE BACKEND GPU PRICE STATUS CREATED - my-fleet 0 gcp (europe-west-1) L4:24GB (spot) $0.1624 idle 3 mins ago - 1 gcp (europe-west-1) L4:24GB (spot) $0.1624 idle 3 mins ago + FLEET INSTANCE GPU PRICE STATUS CREATED + my-fleet 0 L4:24GB (spot) $0 idle 3 mins ago + 1 L4:24GB (spot) $0 idle 3 mins ago ```
Once the status of instances changes to `idle`, they can be used by dev environments, tasks, and services. -!!! info "Idle duration" - If you want a fleet to be automatically deleted after a certain idle time, - you can set the [`idle_duration`](../reference/dstack.yml/fleet.md#idle_duration) property. - By default, it's set to `3d`. - -[//]: # (Add Idle time example to the reference page) - -### Troubleshooting SSH fleets +#### Troubleshooting !!! info "Resources" - If you're creating an SSH fleet, ensure that the GPU, memory, and disk size are detected properly. - If GPU isn't detected, ensure that the hosts meet the requirements (see above). + Once the fleet is created, double-check that the GPU, memory, and disk are detected correctly. -If the status doesn't change to `idle` after a few minutes, ensure that -the hosts meet the requirements (see above). +If the status does not change to `idle` after a few minutes or the resources are not displayed correctly, ensure that +all host requirements are satisfied. -If the requirements are met but the fleet still fails to be created, check `/root/.dstack/shim.log` for logs -on the hosts specified in `ssh_config`. +If the requirements are met but the fleet still fails to be created correctly, check the logs at +`/root/.dstack/shim.log` on the hosts for error details. ## Manage fleets @@ -253,12 +357,6 @@ Fleet my-gcp-fleet deleted Alternatively, you can delete a fleet by passing the fleet name to `dstack fleet delete`. To terminate and delete specific instances from a fleet, pass `-i INSTANCE_NUM`. -## What's next? - -1. Read about [dev environments](../dev-environments.md), [tasks](../tasks.md), and - [services](../services.md) -2. Join the community via [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd) - -!!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/fleet.md) for all the options supported by - fleets, along with multiple examples. +!!! info "What's next?" + 1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), and + [services](services.md) \ No newline at end of file diff --git a/docs/docs/concepts/gateways.md b/docs/docs/concepts/gateways.md index d3234765c..b531a3a80 100644 --- a/docs/docs/concepts/gateways.md +++ b/docs/docs/concepts/gateways.md @@ -1,8 +1,8 @@ # Gateways -Gateways manage the ingress traffic of running [services](../services.md) -and provide them with an HTTPS endpoint mapped to your domain, -handling authentication, load distribution, and auto-scaling. +Gateways manage the ingress traffic of running [services](services.md), +provide an HTTPS endpoint mapped to your domain, +and handle auto-scaling. > If you're using [dstack Sky :material-arrow-top-right-thin:{ .external }](https://sky.dstack.ai){:target="_blank"}, > the gateway is already set up for you. @@ -32,8 +32,7 @@ domain: example.com A domain name is required to create a gateway. !!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/gateway.md) for all the options supported by - gateways, along with multiple examples. + For all gateway configuration options, refer to the [reference](../reference/dstack.yml/gateway.md). ## Create or update a gateway @@ -83,10 +82,5 @@ Alternatively, you can delete a gateway by passing the gateway name to `dstack [//]: # (TODO: ## Accessing endpoints) -## What's next? - -1. See [services](../services.md) on how to run services - -!!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/gateway.md) for all the options supported by - gateways, along with multiple examples. +!!! info "What's next?" + 1. See [services](services.md) on how to run services \ No newline at end of file diff --git a/docs/docs/concepts/repos.md b/docs/docs/concepts/repos.md index ff5291ccb..1dd2ed3d9 100644 --- a/docs/docs/concepts/repos.md +++ b/docs/docs/concepts/repos.md @@ -83,4 +83,4 @@ $ dstack apply -f .dstack.yml --no-repo ## What's next? -1. Read about [dev environments](../dev-environments.md), [tasks](../tasks.md), [services](../services.md) \ No newline at end of file +1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), [services](services.md) \ No newline at end of file diff --git a/docs/docs/concepts/services.md b/docs/docs/concepts/services.md new file mode 100644 index 000000000..6fdb7170b --- /dev/null +++ b/docs/docs/concepts/services.md @@ -0,0 +1,406 @@ +# Services + +Services allow you to deploy models or web apps as secure and scalable endpoints. + +## Define a configuration + +First, define a service configuration as a YAML file in your project folder. +The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `dev.dstack.yml` are both acceptable). + +
+ +```yaml +type: service +name: llama31 + +# If `image` is not specified, dstack uses its default image +python: "3.11" +env: + - HF_TOKEN + - MODEL_ID=meta-llama/Meta-Llama-3.1-8B-Instruct + - MAX_MODEL_LEN=4096 +commands: + - pip install vllm + - vllm serve $MODEL_ID + --max-model-len $MAX_MODEL_LEN + --tensor-parallel-size $DSTACK_GPUS_NUM +port: 8000 +# (Optional) Register the model +model: meta-llama/Meta-Llama-3.1-8B-Instruct + +# Uncomment to leverage spot instances +#spot_policy: auto + +resources: + gpu: 24GB +``` + +
+ +### Replicas and scaling + +By default, `dstack` runs a single replica of the service. +You can configure the number of replicas as well as the auto-scaling rules. + +
+ +```yaml +type: service +# The name is optional, if not specified, generated randomly +name: llama31-service + +python: "3.10" + +# Required environment variables +env: + - HF_TOKEN +commands: + - pip install vllm + - vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct --max-model-len 4096 +# Expose the port of the service +port: 8000 + +resources: + # Change to what is required + gpu: 24GB + +# Minimum and maximum number of replicas +replicas: 1..4 +scaling: + # Requests per seconds + metric: rps + # Target metric value + target: 10 +``` + +
+ +The [`replicas`](../reference/dstack.yml/service.md#replicas) property can be a number or a range. + +The [`metric`](../reference/dstack.yml/service.md#metric) property of [`scaling`](../reference/dstack.yml/service.md#scaling) only supports the `rps` metric (requests per second). In this +case `dstack` adjusts the number of replicas (scales up or down) automatically based on the load. + +Setting the minimum number of replicas to `0` allows the service to scale down to zero when there are no requests. + +!!! info "Gateways" + The `scaling` property currently requires creating a [gateway](gateways.md). + This requirement is expected to be removed soon. + +### Authorization + +By default, the service enables authorization, meaning the service endpoint requires a `dstack` user token. +This can be disabled by setting `auth` to `false`. + +
+ +```yaml +type: service +# The name is optional, if not specified, generated randomly +name: http-server-service + +# Disable authorization +auth: false + +python: "3.10" + +# Commands of the service +commands: + - python3 -m http.server +# The port of the service +port: 8000 +``` + +
+ +### Model + +If the service is running a chat model with an OpenAI-compatible interface, +set the [`model`](#model) property to make the model accessible via `dstack`'s +global the OpenAI-compatible endpoint, and also accessible via `dstack`'s UI. + +### Resources + +If you specify memory size, you can either specify an explicit size (e.g. `24GB`) or a +range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). + +
+ +```yaml +type: service +# The name is optional, if not specified, generated randomly +name: http-server-service + +python: "3.10" + +# Commands of the service +commands: + - pip install vllm + - python -m vllm.entrypoints.openai.api_server + --model mistralai/Mixtral-8X7B-Instruct-v0.1 + --host 0.0.0.0 + --tensor-parallel-size $DSTACK_GPUS_NUM +# Expose the port of the service +port: 8000 + +resources: + # 2 GPUs of 80GB + gpu: 80GB:2 + + # Minimum disk size + disk: 200GB +``` + +
+ +The `gpu` property allows specifying not only memory size but also GPU vendor, names +and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), +`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), +`A100:40GB:2` (two A100 GPUs of 40GB). + +??? info "Shared memory" + If you are using parallel communicating processes (e.g., dataloaders in PyTorch), you may need to configure + `shm_size`, e.g. set it to `16GB`. + +### Python version + +If you don't specify `image`, `dstack` uses its base Docker image pre-configured with +`python`, `pip`, `conda` (Miniforge), and essential CUDA drivers. +The `python` property determines which default Docker image is used. + +
+ +```yaml +type: service +# The name is optional, if not specified, generated randomly +name: http-server-service + +# If `image` is not specified, dstack uses its base image +python: "3.10" + +# Commands of the service +commands: + - python3 -m http.server +# The port of the service +port: 8000 +``` + +
+ +??? info "nvcc" + By default, the base Docker image doesn’t include `nvcc`, which is required for building custom CUDA kernels. + If you need `nvcc`, set the corresponding property to true. + +
+ + ```yaml + type: service + # The name is optional, if not specified, generated randomly + name: http-server-service + + # If `image` is not specified, dstack uses its base image + python: "3.10" + # Ensure nvcc is installed (req. for Flash Attention) + nvcc: true + + # Commands of the service + commands: + - python3 -m http.server + # The port of the service + port: 8000 + ``` + +
+ +### Docker + +If you want, you can specify your own Docker image via `image`. + +
+ + ```yaml + type: service + # The name is optional, if not specified, generated randomly + name: http-server-service + + # Any custom Docker image + image: dstackai/base:py3.13-0.6-cuda-12.1 + + # Commands of the service + commands: + - python3 -m http.server + # The port of the service + port: 8000 + ``` + +
+ +??? info "Private registry" + + Use the `registry_auth` property to provide credentials for a private Docker registry. + + ```yaml + type: service + # The name is optional, if not specified, generated randomly + name: http-server-service + + # Any private Docker iamge + image: dstackai/base:py3.13-0.6-cuda-12.1 + # Credentials of the private registry + registry_auth: + username: peterschmidt85 + password: ghp_e49HcZ9oYwBzUbcSk2080gXZOU2hiT9AeSR5 + + # Commands of the service + commands: + - python3 -m http.server + # The port of the service + port: 8000 + ``` + +??? info "Privileged mode" + All backends except `runpod`, `vastai`, and `kubernetes` support running containers in privileged mode. + This mode enables features like using [Docker and Docker Compose](../guides/protips.md#docker-and-docker-compose) + inside `dstack` runs. + +### Environment variables + +
+ +```yaml +type: service +# The name is optional, if not specified, generated randomly +name: llama-2-7b-service + +python: "3.10" + +# Environment variables +env: + - HF_TOKEN + - MODEL=NousResearch/Llama-2-7b-chat-hf +# Commands of the service +commands: + - pip install vllm + - python -m vllm.entrypoints.openai.api_server --model $MODEL --port 8000 +# The port of the service +port: 8000 + +resources: + # Required GPU vRAM + gpu: 24GB +``` + +
+ +> If you don't assign a value to an environment variable (see `HF_TOKEN` above), +`dstack` will require the value to be passed via the CLI or set in the current process. + +??? info "System environment variables" + The following environment variables are available in any run by default: + + | Name | Description | + |-------------------------|-----------------------------------------| + | `DSTACK_RUN_NAME` | The name of the run | + | `DSTACK_REPO_ID` | The ID of the repo | + | `DSTACK_GPUS_NUM` | The total number of GPUs in the run | + +### Spot policy + +By default, `dstack` uses on-demand instances. However, you can change that +via the [`spot_policy`](../reference/dstack.yml/service.md#spot_policy) property. It accepts `spot`, `on-demand`, and `auto`. + +!!! info "Reference" + Services support many more configuration options, + incl. [`backends`](../reference/dstack.yml/service.md#backends), + [`regions`](../reference/dstack.yml/service.md#regions), + [`max_price`](../reference/dstack.yml/service.md#max_price), and + among [others](../reference/dstack.yml/service.md). + +## (Optional) Set up a gateway + +Running services doesn't require [gateways](gateways.md) unless you need to enable auto-scaling or want the endpoint to +use HTTPS and map it to your domain. + +!!! info "Websockets and base path" + A [gateways](gateways.md) may also be required if the service needs Websockets or cannot be used with + a base path. + +> If you're using [dstack Sky :material-arrow-top-right-thin:{ .external }](https://sky.dstack.ai){:target="_blank"}, +> a gateway is already pre-configured for you. + +## Run a configuration + +To run a service, pass the configuration to [`dstack apply`](../reference/cli/dstack/apply.md): + +
+ +```shell +$ HF_TOKEN=... +$ dstack apply -f service.dstack.yml + + # BACKEND REGION RESOURCES SPOT PRICE + 1 runpod CA-MTL-1 18xCPU, 100GB, A5000:24GB:2 yes $0.22 + 2 runpod EU-SE-1 18xCPU, 100GB, A5000:24GB:2 yes $0.22 + 3 gcp us-west4 27xCPU, 150GB, A5000:24GB:3 yes $0.33 + +Submit the run llama31? [y/n]: y + +Provisioning... +---> 100% + +Service is published at: + http://localhost:3000/proxy/services/main/llama31/ +Model meta-llama/Meta-Llama-3.1-8B-Instruct is published at: + http://localhost:3000/proxy/models/main/ +``` + +
+ +`dstack apply` automatically provisions instances, uploads the contents of the repo (incl. your local uncommitted changes), +and runs the service. + +### Service endpoint + +If a [gateway](gateways.md) is not configured, the service’s endpoint will be accessible at +`/proxy/services///`. + +
+ +```shell +$ curl http://localhost:3000/proxy/services/main/llama31/v1/chat/completions \ + -H 'Content-Type: application/json' \ + -H 'Authorization: Bearer <dstack token>' \ + -d '{ + "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "messages": [ + { + "role": "user", + "content": "Compose a poem that explains the concept of recursion in programming." + } + ] + }' +``` + +
+ +If the service defines the `model` property, the model can be accessed with +the global OpenAI-compatible endpoint at `/proxy/models//`, +or via `dstack` UI. + +??? info "Gateway" + If a [gateway](gateways.md) is configured, the service endpoint will be accessible at + `https://./`. + + If the service defines the `model` property, the model will be available via the global OpenAI-compatible endpoint + at `https://gateway./`. + +[//]: # (By default, the service endpoint requires the `Authorization` header with `Bearer `.) +[//]: # (Authorization can be disabled by setting [`auth`](../reference/dstack.yml/service.md#authorization) to `false` in the) +[//]: # (service configuration file.) + +!!! info "What's next?" + 1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), and [repos](repos.md) + 2. Learn how to manage [fleets](fleets.md) + 3. See how to set up [gateways](gateways.md) + 4. Check the [TGI :material-arrow-top-right-thin:{ .external }](../../examples/deployment/tgi/index.md){:target="_blank"}, + [vLLM :material-arrow-top-right-thin:{ .external }](../../examples/deployment/vllm/index.md){:target="_blank"}, and + [NIM :material-arrow-top-right-thin:{ .external }](../../examples/deployment/nim/index.md){:target="_blank"} examples \ No newline at end of file diff --git a/docs/docs/concepts/snippets/manage-fleets.ext b/docs/docs/concepts/snippets/manage-fleets.ext new file mode 100644 index 000000000..d9e76c159 --- /dev/null +++ b/docs/docs/concepts/snippets/manage-fleets.ext @@ -0,0 +1,30 @@ +### Creation policy + +By default, when you run `dstack apply` with a dev environment, task, or service, +if no `idle` instances from the available fleets meet the requirements, `dstack` creates a new fleet +using configured backends. + +To ensure `dstack apply` doesn't create a new fleet but reuses an existing one, +pass `-R` (or `--reuse`) to `dstack apply`. + +
+ +```shell +$ dstack apply -R -f examples/.dstack.yml +``` + +
+ +Or, set [`creation_policy`](../reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. + +### Idle duration + +If a fleet is created automatically, it stays `idle` for 5 minutes by default and can be reused within that time. +If the fleet is not reused within this period, it is automatically terminated. +To change the default idle duration, set +[`idle_duration`](../reference/dstack.yml/fleet.md#idle_duration) in the run configuration (e.g., `0s`, `1m`, or `off` for +unlimited). + +!!! info "Fleets" + For greater control over fleet provisioning, it is recommended to create + [fleets](fleets.md) explicitly. \ No newline at end of file diff --git a/docs/docs/concepts/snippets/manage-runs.ext b/docs/docs/concepts/snippets/manage-runs.ext new file mode 100644 index 000000000..13afd8403 --- /dev/null +++ b/docs/docs/concepts/snippets/manage-runs.ext @@ -0,0 +1,23 @@ +## Manage runs + +### List runs + +The [`dstack ps`](../reference/cli/dstack/ps.md) command lists all running jobs and their statuses. +Use `--watch` (or `-w`) to monitor the live status of runs. + +### Stop a run + +A dev environment runs until you stop it or its lifetime exceeds [`max_duration`](../reference/dstack.yml/dev-environment.md#max_duration). +To gracefully stop a dev environment, use [`dstack stop`](../reference/cli/dstack/stop.md). +Pass `--abort` or `-x` to stop without waiting for a graceful shutdown. + +### Attach to a run + +By default, `dstack apply` runs in attached mode – it establishes the SSH tunnel to the run, forwards ports, and shows real-time logs. +If you detached from a run, you can reattach to it using [`dstack attach`](../reference/cli/dstack/attach.md). + +### See run logs + +To see the logs of a run without attaching, use [`dstack logs`](../reference/cli/dstack/logs.md). +Pass `--diagnose`/`-d` to `dstack logs` to see the diagnostics logs. It may be useful if a run fails. +For more information on debugging failed runs, see the [troubleshooting](../guides/troubleshooting.md) guide. \ No newline at end of file diff --git a/docs/docs/concepts/tasks.md b/docs/docs/concepts/tasks.md new file mode 100644 index 000000000..aaad78dce --- /dev/null +++ b/docs/docs/concepts/tasks.md @@ -0,0 +1,392 @@ +# Tasks + +A task allows you to run arbitrary commands on one or more nodes. +They are best suited for jobs like training or batch processing. + +## Define a configuration + +First, define a task configuration as a YAML file in your project folder. +The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `dev.dstack.yml` are both acceptable). + +[//]: # (TODO: Make tabs - single machine & distributed tasks & web app) + +
+ +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: axolotl-train + +# Using the official Axolotl's Docker image +image: winglian/axolotl-cloud:main-20240429-py3.11-cu121-2.2.1 + +# Required environment variables +env: + - HF_TOKEN + - WANDB_API_KEY +# Commands of the task +commands: + - accelerate launch -m axolotl.cli.train examples/fine-tuning/axolotl/config.yaml + +resources: + gpu: + # 24GB or more vRAM + memory: 24GB.. + # Two or more GPU + count: 2.. +``` + +
+ +### Ports + +A task can configure ports. In this case, if the task is running an application on a port, `dstack apply` +will securely allow you to access this port from your local machine through port forwarding. + +
+ +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: streamlit-hello + +python: "3.10" + +# Commands of the task +commands: + - pip3 install streamlit + - streamlit hello +# Expose the port to access the web app +ports: + - 8501 +``` + +
+ +When running it, `dstack apply` forwards `8501` port to `localhost:8501`, enabling secure access to the running +application. + +### Distributed tasks + +By default, a task runs on a single node. +However, you can run it on a cluster of nodes by specifying `nodes`. + +
+ +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: train-distrib + +# The size of the cluster +nodes: 2 + +python: "3.10" + +# Commands of the task +commands: + - pip install -r requirements.txt + - torchrun + --nproc_per_node=$DSTACK_GPUS_PER_NODE + --node_rank=$DSTACK_NODE_RANK + --nnodes=$DSTACK_NODES_NUM + --master_addr=$DSTACK_MASTER_NODE_IP + --master_port=8008 resnet_ddp.py + --num_epochs 20 + +resources: + gpu: 24GB +``` + +
+ +All you need to do is pass the corresponding environment variables such as +`DSTACK_GPUS_PER_NODE`, `DSTACK_NODE_RANK`, `DSTACK_NODES_NUM`, +`DSTACK_MASTER_NODE_IP`, and `DSTACK_GPUS_NUM` (see [System environment variables](#system-environment-variables)). + +!!! info "Fleets" + To ensure all nodes are provisioned into a cluster placement group and to enable the highest level of inter-node + connectivity (incl. support for [EFA :material-arrow-top-right-thin:{ .external }](https://aws.amazon.com/hpc/efa/){:target="_blank"}), + create a [fleet](fleets.md) via a configuration before running a disstributed task. + +`dstack` is easy to use with `accelerate`, `torchrun`, Ray, Spark, and any other distributed frameworks. + +### Resources + +When you specify a resource value like `cpu` or `memory`, +you can either use an exact value (e.g. `24GB`) or a +range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). + +
+ +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: train + +# Commands of the task +commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py + +resources: + # 200GB or more RAM + memory: 200GB.. + # 4 GPUs from 40GB to 80GB + gpu: 40GB..80GB:4 + # Shared memory (required by multi-gpu) + shm_size: 16GB + # Disk size + disk: 500GB +``` + +
+ +The `gpu` property allows specifying not only memory size but also GPU vendor, names +and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), +`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), +`A100:40GB:2` (two A100 GPUs of 40GB). + +??? info "Google Cloud TPU" + To use TPUs, specify its architecture via the `gpu` property. + + ```yaml + type: task + # The name is optional, if not specified, generated randomly + name: train + + python: "3.10" + + # Commands of the task + commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py + + resources: + gpu: v2-8 + ``` + + Currently, only 8 TPU cores can be specified, supporting single TPU device workloads. Multi-TPU support is coming soon. + +??? info "Shared memory" + If you are using parallel communicating processes (e.g., dataloaders in PyTorch), you may need to configure + `shm_size`, e.g. set it to `16GB`. + +### Python version + +If you don't specify `image`, `dstack` uses its base Docker image pre-configured with +`python`, `pip`, `conda` (Miniforge), and essential CUDA drivers. +The `python` property determines which default Docker image is used. + +
+ +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: train + +# If `image` is not specified, dstack uses its base image +python: "3.10" + +# Commands of the task +commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py +``` + +
+ +??? info "nvcc" + By default, the base Docker image doesn’t include `nvcc`, which is required for building custom CUDA kernels. + If you need `nvcc`, set the corresponding property to true. + + + ```yaml + type: task + # The name is optional, if not specified, generated randomly + name: train + + # If `image` is not specified, dstack uses its base image + python: "3.10" + # Ensure nvcc is installed (req. for Flash Attention) + nvcc: true + + commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py + ``` + +### Docker + +If you want, you can specify your own Docker image via `image`. + +
+ +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: train + +# Any custom Docker image +image: dstackai/base:py3.13-0.6-cuda-12.1 + +# Commands of the task +commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py +``` + +
+ +??? info "Private registry" + Use the `registry_auth` property to provide credentials for a private Docker registry. + + ```yaml + type: dev-environment + # The name is optional, if not specified, generated randomly + name: train + + # Any private Docker image + image: dstackai/base:py3.13-0.6-cuda-12.1 + # Credentials of the private Docker registry + registry_auth: + username: peterschmidt85 + password: ghp_e49HcZ9oYwBzUbcSk2080gXZOU2hiT9AeSR5 + + # Commands of the task + commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py + ``` + +??? info "Privileged mode" + All backends except `runpod`, `vastai`, and `kubernetes` support running containers in privileged mode. + This mode enables features like using [Docker and Docker Compose](../guides/protips.md#docker-and-docker-compose) + inside `dstack` runs. + +### Environment variables + +
+ +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: train + +python: "3.10" + +# Environment variables +env: + - HF_TOKEN + - HF_HUB_ENABLE_HF_TRANSFER=1 + +# Commands of the task +commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py +``` + +
+ +If you don't assign a value to an environment variable (see `HF_TOKEN` above), +`dstack` will require the value to be passed via the CLI or set in the current process. + + +??? info "System environment variables" + The following environment variables are available in any run by default: + + | Name | Description | + |-------------------------|------------------------------------------------------------------| + | `DSTACK_RUN_NAME` | The name of the run | + | `DSTACK_REPO_ID` | The ID of the repo | + | `DSTACK_GPUS_NUM` | The total number of GPUs in the run | + | `DSTACK_NODES_NUM` | The number of nodes in the run | + | `DSTACK_GPUS_PER_NODE` | The number of GPUs per node | + | `DSTACK_NODE_RANK` | The rank of the node | + | `DSTACK_MASTER_NODE_IP` | The internal IP address the master node | + | `DSTACK_NODES_IPS` | The list of internal IP addresses of all nodes delimited by "\n" | + +### Spot policy + +By default, `dstack` uses on-demand instances. However, you can change that +via the [`spot_policy`](../reference/dstack.yml/task.md#spot_policy) property. It accepts `spot`, `on-demand`, and `auto`. + +!!! info "Reference" + Tasks support many more configuration options, + incl. [`backends`](../reference/dstack.yml/task.md#backends), + [`regions`](../reference/dstack.yml/task.md#regions), + [`max_price`](../reference/dstack.yml/task.md#max_price), and + [`max_duration`](../reference/dstack.yml/task.md#max_duration), + among [others](../reference/dstack.yml/task.md). + +## Run a configuration + +To run a task, pass the configuration to [`dstack apply`](../reference/cli/dstack/apply.md): + +
+ +```shell +$ HF_TOKEN=... +$ WANDB_API_KEY=... +$ dstack apply -f examples/.dstack.yml + + # BACKEND REGION RESOURCES SPOT PRICE + 1 runpod CA-MTL-1 18xCPU, 100GB, A5000:24GB:2 yes $0.22 + 2 runpod EU-SE-1 18xCPU, 100GB, A5000:24GB:2 yes $0.22 + 3 gcp us-west4 27xCPU, 150GB, A5000:24GB:3 yes $0.33 + +Submit the run axolotl-train? [y/n]: y + +Launching `axolotl-train`... +---> 100% + +{'loss': 1.4967, 'grad_norm': 1.2734375, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.0} + 0% 1/24680 [00:13<95:34:17, 13.94s/it] + 6% 73/1300 [00:48<13:57, 1.47it/s] +``` + +
+ +`dstack apply` automatically provisions instances, uploads the contents of the repo (incl. your local uncommitted changes), +and runs the commands. + +### Retry policy + +By default, if `dstack` can't find capacity, the task exits with an error, or the instance is interrupted, +the run will fail. + +If you'd like `dstack` to automatically retry, configure the +[retry](../reference/dstack.yml/task.md#retry) property accordingly: + +
+ +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: train + +python: "3.10" + +# Commands of the task +commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py + +retry: + # Retry on specific events + on_events: [no-capacity, error, interruption] + # Retry for up to 1 hour + duration: 1h +``` + +
+ +--8<-- "docs/concepts/snippets/manage-fleets.ext" + +--8<-- "docs/concepts/snippets/manage-runs.ext" + +!!! info "What's next?" + +1. Read about [dev environments](dev-environments.md), [services](services.md), and [repos](repos.md) +2. Learn how to manage [fleets](fleets.md) +3. Check the [Axolotl](/examples/fine-tuning/axolotl) example diff --git a/docs/docs/concepts/volumes.md b/docs/docs/concepts/volumes.md index 1dfb2fb17..ab3746167 100644 --- a/docs/docs/concepts/volumes.md +++ b/docs/docs/concepts/volumes.md @@ -1,27 +1,29 @@ # Volumes -Volumes allow you to persist data between runs. `dstack` supports two kinds of volumes: [network volumes](#network-volumes) -and [instance volumes](#instance-volumes). +Volumes enable data persistence between runs of dev environments, tasks, and services. -## Network volumes +`dstack` supports two kinds of volumes: + +* [Network volumes](#network-volumes) — provisioned via backends and mounted to specific container directories. + Ideal for persistent storage. +* [Instance volumes](#instance-volumes) — bind directories on the host instance to container directories. +Useful as a cache for cloud fleets or for persistent storage with SSH fleets. -`dstack` allows to create and attach network volumes to dev environments, tasks, and services. +## Network volumes -!!! info "Backends" - Network volumes are currently supported for the `aws`, `gcp`, and `runpod` backends. - Support for other backends is on the roadmap. +Network volumes are currently supported for the `aws`, `gcp`, and `runpod` backends. ### Define a configuration First, define a volume configuration as a YAML file in your project folder. -The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `vol.dstack.yml` are both acceptable). +The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `volume.dstack.yml` are both acceptable). -
+
```yaml type: volume # A name of the volume -name: my-new-volume +name: my-volume # Volumes are bound to a specific backend and region backend: aws @@ -35,16 +37,32 @@ size: 100GB If you use this configuration, `dstack` will create a new volume based on the specified options. -!!! info "Registering existing volumes" +??? info "Register existing volumes" If you prefer not to create a new volume but to reuse an existing one (e.g., created manually), you can [specify its ID via `volume_id`](../reference/dstack.yml/volume.md#existing-volume). In this case, `dstack` will register the specified volume so that you can use it with dev environments, tasks, and services. +
+ + ```yaml + type: volume + # The name of the volume + name: my-volume + + # Volumes are bound to a specific backend and region + backend: aws + region: eu-central-1 + + # The ID of the volume in AWS + volume_id: vol1235 + ``` + +
+ !!! info "Filesystem" If you register an existing volume, you must ensure the volume already has a filesystem. !!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/volume.md) for all the options supported by - volumes, along with multiple examples. + For all volume configuration options, refer to the [reference](../reference/dstack.yml/volume.md). ### Create, register, or update a volume @@ -54,10 +72,10 @@ To create or register the volume, pass the volume configuration to `dstack apply ```shell $ dstack apply -f volume.dstack.yml -Volume my-new-volume does not exist yet. Create the volume? [y/n]: y +Volume my-volume does not exist yet. Create the volume? [y/n]: y - NAME BACKEND REGION STATUS CREATED - my-new-volume aws eu-central-1 submitted now + NAME BACKEND REGION STATUS CREATED + my-volume aws eu-central-1 submitted now ``` @@ -66,8 +84,7 @@ Volume my-new-volume does not exist yet. Create the volume? [y/n]: y Once created, the volume can be attached to dev environments, tasks, and services. -!!! info "Filesystem" - When creating a network volume, `dstack` automatically creates an `ext4` filesystem on it. +> When creating a network volume, `dstack` automatically creates an `ext4` filesystem on it. ### Attach a volume { #attach-network-volume } @@ -86,12 +103,12 @@ ide: vscode # Map the name of the volume to any path volumes: - - name: my-new-volume + - name: my-volume path: /volume_data # You can also use the short syntax in the `name:path` form # volumes: -# - my-new-volume:/volume_data +# - my-volume:/volume_data ```
@@ -99,7 +116,7 @@ volumes: Once you run this configuration, the contents of the volume will be attached to `/volume_data` inside the dev environment, and its contents will persist across runs. -!!! info "Attaching volumes across regions and backends" +!!! info "Attach volumes across regions and backends" If you're unsure in advance which region or backend you'd like to use (or which is available), you can specify multiple volumes for the same path. @@ -115,7 +132,7 @@ and its contents will persist across runs. `dstack` will attach one of the volumes based on the region and backend of the run. -??? info "Limitations" +??? info "Container path" When you're running a dev environment, task, or service with `dstack`, it automatically mounts the project folder contents to `/workflow` (and sets that as the current working directory). Right now, `dstack` doesn't allow you to attach volumes to `/workflow` or any of its subdirectories. @@ -126,49 +143,62 @@ and its contents will persist across runs. The [`dstack volume list`](../reference/cli/dstack/volume.md#dstack-volume-list) command lists created and registered volumes: -``` +
+ +```shell $ dstack volume list -NAME BACKEND REGION STATUS CREATED - my-new-volume aws eu-central-1 active 3 weeks ago +NAME BACKEND REGION STATUS CREATED + my-volume aws eu-central-1 active 3 weeks ago ``` +
+ #### Delete volumes When the volume isn't attached to any active dev environment, task, or service, you can delete it by passing the volume configuration to `dstack delete`: +
+ ```shell $ dstack delete -f vol.dstack.yaml ``` +
+ Alternatively, you can delete a volume by passing the volume name to `dstack volume delete`. If the volume was created using `dstack`, it will be physically destroyed along with the data. If you've registered an existing volume, it will be de-registered with `dstack` but will keep the data. +### FAQs -## Instance volumes +??? info "Can I use network volumes across backends?" + + Since volumes are backed up by cloud network disks, you can only use them within the same cloud. If you need to access + data across different backends, you should either use object storage or replicate the data across multiple volumes. -Unlike [network volumes](#network-volumes), which are persistent external resources mounted over network, -instance volumes are part of the instance storage. Basically, the instance volume is a filesystem path -(a directory or a file) mounted inside the run container. +??? info "Can I use network volumes across regions?" -As a consequence, the contents of the instance volume are specific to the instance -where the run is executed, and data persistence, integrity, and even existence are guaranteed only if the subsequent run -is executed on the same exact instance, and there is no other runs in between. + Typically, network volumes are associated with specific regions, so you can't use them in other regions. Often, + volumes are also linked to availability zones, but some providers support volumes that can be used across different + availability zones within the same region. + + If you don't want to limit a run to one particular region, you can create different volumes for different regions + and specify them for the same mount point as [documented above](#attach-network-volume). -!!! info "Backends" - Instance volumes are currently supported for all backends except `runpod`, `vastai` and `kubernetes`. +??? info "Can I attach network volumes to multiple runs or instances?" + You can mount a volume in multiple runs. This feature is currently supported only by the `runpod` backend. -### Manage volumes { #manage-instance-volumes } +## Instance volumes -You don't need to create or delete instance volumes, and they are not displayed in the -[`dstack volume list`](../reference/cli/dstack/volume.md#dstack-volume-list) command output. +Instance volumes allow mapping any directory on the instance where the run is executed to any path inside the container. +This means that the data in instance volumes is persisted only if the run is executed on the same instance. -### Attach a volume { #attach-instance-volume } +### Attach a volume -Dev environments, tasks, and services let you attach any number of instance volumes. -To attach an instance volume, specify the `instance_path` and `path` in the `volumes` property: +A run can configure any number of instance volumes. To attach an instance volume, +specify the `instance_path` and `path` in the `volumes` property:
@@ -191,59 +221,44 @@ volumes:
-### Use cases { #instance-volumes-use-cases } - -Despite the limitations, instance volumes can still be useful in some cases: - -=== "Cache" - - For example, if runs regularly install packages with `pip install`, include the instance volume in the run configuration - to reuse pip cache between runs: +Since persistence isn't guaranteed (instances may be interrupted or runs may occur on different instances), use instance +volumes only for caching or with directories manually mounted to network storage. -
- - ```yaml - type: task - - volumes: - - /dstack-cache/pip:/root/.cache/pip - ``` +> Instance volumes are currently supported for all backends except `runpod`, `vastai` and `kubernetes`, +> and can also be used with [SSH fleets](fleets.md#ssh). -
- -=== "Network storage with SSH fleet" - - If you manage your own instances, you can mount network storages (e.g., NFS or SMB) to the hosts and access them in the runs. - Imagine you mounted the same network storage to all the fleet instances using the same path `/mnt/nfs-storage`, - then you can treat the instance volume as a shared persistent storage: +### Use instance volumes for caching -
- - ```yaml - type: task +For example, if a run regularly installs packages with `pip install`, +you can mount the `/root/.cache/pip` folder inside the container to a folder on the instance for +reuse. - volumes: - - /mnt/nfs-storage:/storage - ``` +
-
+```yaml +type: task -## FAQ +volumes: + - /dstack-cache/pip:/root/.cache/pip +``` -##### Can I use network volumes across backends? +
-Since volumes are backed up by cloud network disks, you can only use them within the same cloud. If you need to access -data across different backends, you should either use object storage or replicate the data across multiple volumes. +### Use instance volumes with SSH fleets + +If you control the instances (e.g. they are on-prem servers configured via [SSH fleets](fleets.md#ssh)), +you can mount network storage (e.g., NFS or SMB) and use the mount points as instance volumes. -##### Can I use network volumes across regions? +For example, if you mount a network storage to `/mnt/nfs-storage` on all hosts of your SSH fleet, +you can map this directory via instance volumes and be sure the data is persisted. -Typically, network volumes are associated with specific regions, so you can't use them in other regions. Often, -volumes are also linked to availability zones, but some providers support volumes that can be used across different -availability zones within the same region. +
-If you don't want to limit a run to one particular region, you can create different volumes for different regions -and specify them for the same mount point as [documented above](#attach-network-volume). +```yaml +type: task -##### Can I attach network volumes to multiple runs or instances? +volumes: + - /mnt/nfs-storage:/storage +``` -You can mount a volume in multiple runs. This feature is currently supported only by the `runpod` backend. +
\ No newline at end of file diff --git a/docs/docs/dev-environments.md b/docs/docs/dev-environments.md deleted file mode 100644 index 7ee94ec6b..000000000 --- a/docs/docs/dev-environments.md +++ /dev/null @@ -1,158 +0,0 @@ -# Dev environments - -A dev environment lets you provision an instance and access it with your desktop IDE. - -## Define a configuration - -First, define a dev environment configuration as a YAML file in your project folder. -The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `dev.dstack.yml` are both acceptable). - -
- -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -python: "3.11" -# Uncomment to use a custom Docker image -#image: dstackai/base:py3.13-0.6-cuda-12.1 -ide: vscode - -# Uncomment to leverage spot instances -#spot_policy: auto - -resources: - gpu: 24GB -``` - -
- -!!! info "Docker image" - If you don't specify your Docker image, `dstack` uses the [base](https://hub.docker.com/r/dstackai/base/tags) image - pre-configured with Python, Conda, and essential CUDA drivers. - -!!! info "Reference" - See [.dstack.yml](reference/dstack.yml/dev-environment.md) for all the options supported by - dev environments, along with multiple examples. - -## Run a configuration - -To run a dev environment, pass the configuration to [`dstack apply`](reference/cli/dstack/apply.md): - -
- -```shell -$ dstack apply -f examples/.dstack.yml - - # BACKEND REGION RESOURCES SPOT PRICE - 1 runpod CA-MTL-1 9xCPU, 48GB, A5000:24GB yes $0.11 - 2 runpod EU-SE-1 9xCPU, 43GB, A5000:24GB yes $0.11 - 3 gcp us-west4 4xCPU, 16GB, L4:24GB yes $0.214516 - -Submit the run vscode? [y/n]: y - -Launching `vscode`... ----> 100% - -To open in VS Code Desktop, use this link: - vscode://vscode-remote/ssh-remote+vscode/workflow -``` - -
- -`dstack apply` automatically provisions an instance, uploads the contents of the repo (incl. your local uncommitted changes), -and sets up an IDE on the instance. - -!!! info "Windows" - On Windows, `dstack` works both natively and inside WSL. But, for dev environments, - it's recommended _not to use_ `dstack apply` _inside WSL_ due to a [VS Code issue :material-arrow-top-right-thin:{ .external }](https://github.com/microsoft/vscode-remote-release/issues/937){:target="_blank"}. - -### VS Code - -To open the dev environment in your desktop IDE, use the link from the output -(such as `vscode://vscode-remote/ssh-remote+fast-moth-1/workflow`). - -![](../assets/images/dstack-vscode-jupyter.png){ width=800 } - -### SSH - -Alternatively, while the CLI is attached to the run, you can connect to the dev environment via SSH: - -
- -```shell -$ ssh fast-moth-1 -``` - -
- -## Manage runs - -### List runs - -The [`dstack ps`](reference/cli/dstack/ps.md) command lists all running jobs and their statuses. -Use `--watch` (or `-w`) to monitor the live status of runs. - -### Stop a run - -A dev environment runs until you stop it or its lifetime exceeds [`max_duration`](reference/dstack.yml/dev-environment.md#max_duration). -To gracefully stop a dev environment, use [`dstack stop`](reference/cli/dstack/stop.md). -Pass `--abort` or `-x` to stop without waiting for a graceful shutdown. - -### Attach to a run - -By default, `dstack apply` runs in attached mode – it establishes the SSH tunnel to the run, forwards ports, and shows real-time logs. -If you detached from a run, you can reattach to it using [`dstack attach`](reference/cli/dstack/attach.md). - -### See run logs - -To see the logs of a run without attaching, use [`dstack logs`](reference/cli/dstack/logs.md). -Pass `--diagnose`/`-d` to `dstack logs` to see the diagnostics logs. It may be useful if a run fails. -For more information on debugging failed runs, see the [troubleshooting](guides/troubleshooting.md) guide. - -## Manage fleets - -Fleets are groups of cloud instances or SSH machines that you use to run dev environments, tasks, and services. -You can let `dstack apply` provision fleets or [create and manage them directly](concepts/fleets.md). - -### Creation policy - -By default, when you run `dstack apply` with a dev environment, task, or service, -`dstack` reuses `idle` instances from an existing [fleet](concepts/fleets.md). -If no `idle` instances match the requirements, `dstack` automatically creates a new fleet -using configured backends. - -To ensure `dstack apply` doesn't create a new fleet but reuses an existing one, -pass `-R` (or `--reuse`) to `dstack apply`. - -
- -```shell -$ dstack apply -R -f examples/.dstack.yml -``` - -
- -Alternatively, set [`creation_policy`](reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. - -### Idle duration - -If a fleet is created automatically, it stays `idle` for 5 minutes by default and can be reused within that time. -If the fleet is not reused within this period, it is automatically terminated. -To change the default idle duration, set -[`idle_duration`](reference/dstack.yml/fleet.md#idle_duration) in the run configuration (e.g., `0s`, `1m`, or `off` for -unlimited). - -!!! info "Fleets" - For greater control over fleet provisioning, configuration, and lifecycle management, it is recommended to use - [fleets](concepts/fleets.md) directly. - -## What's next? - -1. Read about [tasks](tasks.md), [services](services.md), and [repos](concepts/repos.md) -2. Learn how to manage [fleets](concepts/fleets.md) - -!!! info "Reference" - See [.dstack.yml](reference/dstack.yml/dev-environment.md) for all the options supported by - dev environments, along with multiple examples. diff --git a/docs/docs/concepts/projects.md b/docs/docs/guides/administration.md similarity index 93% rename from docs/docs/concepts/projects.md rename to docs/docs/guides/administration.md index 86cef65f0..c2f2ce433 100644 --- a/docs/docs/concepts/projects.md +++ b/docs/docs/guides/administration.md @@ -1,12 +1,12 @@ -# Projects +# Administration Projects enable the isolation of different teams and their resources. Each project can configure its own backends and control which users have access to it. -> While project backends can be configured via [`~/.dstack/server/config.yml`](../reference/server/config.yml.md), use the control plane UI to fully manage -> projects, users, and user permissions. +> While project backends can be configured via [`~/.dstack/server/config.yml`](../reference/server/config.yml.md), +> use the UI to fully manage projects, users, and user permissions. -## Project backends +## Project backends { #backends } In addition to [`~/.dstack/server/config.yml`](../reference/server/config.yml.md), a global admin or a project admin can configure backends on the project settings page. diff --git a/docs/docs/guides/protips.md b/docs/docs/guides/protips.md index 8171ef2a1..88ed21d0f 100644 --- a/docs/docs/guides/protips.md +++ b/docs/docs/guides/protips.md @@ -41,7 +41,7 @@ To persist data across runs, it is recommended to use volumes. (for persisting data even if the instance is interrupted) and [instance](../concepts/volumes.md#instance-volumes) (useful for persisting cached data across runs while the instance remains active). -> If you use [SSH fleets](../concepts/fleets.md#ssh-fleets), you can mount network storage (e.g., NFS or SMB) to the hosts and access it in runs via instance volumes. +> If you use [SSH fleets](../concepts/fleets.md#ssh), you can mount network storage (e.g., NFS or SMB) to the hosts and access it in runs via instance volumes. ## Dev environments @@ -124,8 +124,8 @@ This allows you to access the remote `8501` port on `localhost:8501` while the C This will forward the remote `8501` port to `localhost:3000`. !!! info "Tasks vs. services" - [Services](../services.md) provide external access, `https`, replicas with autoscaling, OpenAI-compatible endpoint - and other service features. If you don't need them, you can use [tasks](../tasks.md) for running apps. + [Services](../concepts/services.md) provide external access, `https`, replicas with autoscaling, OpenAI-compatible endpoint + and other service features. If you don't need them, you can use [tasks](../concepts/tasks.md) for running apps. ## Docker and Docker Compose diff --git a/docs/docs/guides/server-deployment.md b/docs/docs/guides/server-deployment.md index 032f40b83..9472ee688 100644 --- a/docs/docs/guides/server-deployment.md +++ b/docs/docs/guides/server-deployment.md @@ -38,8 +38,7 @@ You can run the server either through `pip` or using Docker.
-=== "AWS CloudFormation" - +??? info "AWS CloudFormation" If you'd like to deploy the server to a private AWS VPC, you can use our CloudFormation [template :material-arrow-top-right-thin:{ .external }](https://console.aws.amazon.com/cloudformation/home#/stacks/quickcreate?templateURL=https://get-dstack.s3.eu-west-1.amazonaws.com/cloudformation/template.yaml){:target="_blank"}. @@ -57,14 +56,14 @@ You can run the server either through `pip` or using Docker. ## Backend configuration -To use `dstack` with your own cloud accounts, create the `~/.dstack/server/config.yml` file and -[configure backends](../reference/server/config.yml.md). +To use `dstack` with cloud providers, configure [backends](../concepts/backends.md) +via the `~/.dstack/server/config.yml` file. The server loads this file on startup. -Alternatively, you can configure backends on the [project settings page](../concepts/projects/#project-backends) via the control plane's UI. +Alternatively, you can configure backends on the [project settings page](../guides/administration.md#backends) via UI. > For using `dstack` with on-prem servers, no backend configuration is required. -> See [SSH fleets](../concepts/fleets.md#ssh-fleets) for more details. +> Use [SSH fleets](../concepts/fleets.md#ssh) instead. ## State persistence @@ -183,12 +182,12 @@ If you want backend credentials and user tokens to be encrypted, set up encrypti === "Client" The client backward compatibility is maintained across patch releases. A new minor release indicates that the release breaks client backward compatibility. This means you don't need to update the server when you update the client to a new patch release. Still, upgrading a client to a new minor version requires upgrading the server too. -## FAQ +## FAQs -##### Can I run multiple replicas of the dstack server? +??? info "Can I run multiple replicas of dstack server?" -Yes, you can if you configure `dstack` to use [PostgreSQL](#postgresql) and [AWS CloudWatch](#aws-cloudwatch). + Yes, you can if you configure `dstack` to use [PostgreSQL](#postgresql) and [AWS CloudWatch](#aws-cloudwatch). -##### Does the dstack server support blue-green or rolling deployments? +??? info "Does dstack server support blue-green or rolling deployments?" -Yes, it does if you configure `dstack` to use [PostgreSQL](#postgresql) and [AWS CloudWatch](#aws-cloudwatch). + Yes, it does if you configure `dstack` to use [PostgreSQL](#postgresql) and [AWS CloudWatch](#aws-cloudwatch). diff --git a/docs/docs/guides/troubleshooting.md b/docs/docs/guides/troubleshooting.md index 68e3799fd..28f463ade 100644 --- a/docs/docs/guides/troubleshooting.md +++ b/docs/docs/guides/troubleshooting.md @@ -94,7 +94,7 @@ pointing to the gateway's hostname is configured. #### Cause 1: Bad Authorization -If the service endpoint returns a 403 error, it is likely because the [`Authorization`](../services.md#access-the-endpoint) +If the service endpoint returns a 403 error, it is likely because the [`Authorization`](../concepts/services.md#service-endpoint) header with the correct `dstack` token was not provided. [//]: # (#### Other) diff --git a/docs/docs/index.md b/docs/docs/index.md index 7e25baa6b..ccbe19a88 100644 --- a/docs/docs/index.md +++ b/docs/docs/index.md @@ -21,9 +21,9 @@ for AI workloads both in the cloud and on-prem, speeding up the development, tra `dstack` supports the following configurations: -* [Dev environments](dev-environments.md) — for interactive development using a desktop IDE -* [Tasks](tasks.md) — for scheduling jobs, incl. distributed ones (or running web apps) -* [Services](services.md) — for deploying models (or web apps) +* [Dev environments](concepts/dev-environments.md) — for interactive development using a desktop IDE +* [Tasks](concepts/tasks.md) — for scheduling jobs, incl. distributed ones (or running web apps) +* [Services](concepts/services.md) — for deploying models (or web apps) * [Fleets](concepts/fleets.md) — for managing cloud and on-prem clusters * [Volumes](concepts/volumes.md) — for managing network volumes (to persist data) * [Gateways](concepts/gateways.md) — for publishing services with a custom domain and HTTPS @@ -48,28 +48,16 @@ cloud platforms or on-premise servers. ## How does it compare to other tools? -??? info "Kubernetes" - #### How does dstack compare to Kubernetes? - - `dstack` and Kubernetes are both container orchestrators for cloud and on-premises environments. - - However, `dstack` is more lightweight, and is designed specifically for AI, enabling AI engineers to handle development, training, and +??? info "How does dstack compare to Kubernetes?" + `dstack` is more lightweight, and is designed specifically for AI, enabling AI engineers to handle development, training, and deployment without needing extra tools or Ops support. With `dstack`, you don't need Kubeflow or other ML platforms on top—everything is available out of the box. - Additionally, `dstack` is much easier to use for on-premises servers—just provide hostnames and SSH credentials, + Additionally, `dstack` is much easier to use with on-prem servers—just provide hostnames and SSH credentials, and `dstack` will automatically create a fleet ready for use with development environments, tasks, and services. - #### How does dstack compare to KubeFlow? - `dstack` can be used entirely instead of Kubeflow. It covers everything that Kubeflow does, and much more on top, - including development environments, services, and additional features. - - `dstack` is easier to set up with on-premises servers, doesn't require Kubernetes, and works with multiple cloud - providers out of the box. - - #### Can dstack and Kubernetes be used together? - +??? info "Can dstack and Kubernetes be used together?" For AI development, it’s more efficient to use `dstack` directly with your cloud accounts or on-prem servers—without Kubernetes. However, if you prefer, you can set up the `dstack` server with a Kubernetes backend to provision through Kubernetes. @@ -77,17 +65,22 @@ cloud platforms or on-premise servers. Does your Ops team insist on using Kubernetes for production-grade deployment? You can use `dstack` and Kubernetes side by side; `dstack` for development and Kubernetes for production-grade deployment. -??? info "Slurm" - #### How does dstack compare to Slurm? +??? info "How does dstack compare to KubeFlow?" + `dstack` can be used entirely instead of Kubeflow. It covers everything that Kubeflow does, and much more on top, + including development environments, services, and additional features. + + `dstack` is easier to set up with on-premises servers, doesn't require Kubernetes, and works with multiple cloud + providers out of the box. + +??? info "How does dstack compare to Slurm?" `dstack` can be used entirely instead of Slurm. It covers everything that Slurm does, and a lot more on top, including dev environments, services, out-of-the-box cloud support, easier setup with on-premises servers, and much more. [//]: # (??? info "Cloud platforms") [//]: # ( TBA) -## Where do I start? - -1. Proceed to [installation](installation/index.md) -2. See [quickstart](quickstart.md) -3. Browse [examples](/examples) -4. Join [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd){:target="_blank"} \ No newline at end of file +!!! info "Where do I start?" + 1. Proceed to [installation](installation/index.md) + 2. See [quickstart](quickstart.md) + 3. Browse [examples](/examples) + 4. Join [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd){:target="_blank"} \ No newline at end of file diff --git a/docs/docs/installation/index.md b/docs/docs/installation/index.md index f60b5d341..dac5d6951 100644 --- a/docs/docs/installation/index.md +++ b/docs/docs/installation/index.md @@ -11,10 +11,9 @@ To use the open-source version of `dstack` with your own cloud accounts or on-pr ### (Optional) Configure backends -To use `dstack` with your own cloud accounts, create the `~/.dstack/server/config.yml` file and -[configure backends](../reference/server/config.yml.md). Alternatively, you can configure backends via the control plane UI after you start the server. +To use `dstack` with cloud providers, configure [backends](../concepts/backends.md). -You can skip backends configuration if you intend to run containers only on your on-prem servers. Use [SSH fleets](../concepts/fleets.md#ssh-fleets) for that. +> For using `dstack` with on-prem servers, create [SSH fleets](../concepts/fleets.md#ssh) instead. ## Start the server @@ -95,10 +94,9 @@ Configuration is updated at ~/.dstack/config.yml This configuration is stored in `~/.dstack/config.yml`. -## What's next? - -1. Check the [server/config.yml reference](../reference/server/config.yml.md) on how to configure backends -2. Check [SSH fleets](../concepts/fleets.md#ssh-fleets) to learn about running on your on-prem servers -3. Follow [quickstart](../quickstart.md) -4. Browse [examples](/examples) -5. Join the community via [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd) \ No newline at end of file +!!! info "What's next?" + 1. Check the [server/config.yml reference](../reference/server/config.yml.md) on how to configure backends + 2. Check [SSH fleets](../concepts/fleets.md#ssh) to learn about running on your on-prem servers + 3. Follow [quickstart](../quickstart.md) + 4. Browse [examples](/examples) + 5. Join the community via [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd) \ No newline at end of file diff --git a/docs/docs/quickstart.md b/docs/docs/quickstart.md index b8434a6f4..b4fb106ef 100644 --- a/docs/docs/quickstart.md +++ b/docs/docs/quickstart.md @@ -21,8 +21,6 @@ $ dstack init A dev environment lets you provision an instance and access it with your desktop IDE. - #### Define a configuration - Create the following configuration file inside the repo:
@@ -44,8 +42,6 @@ $ dstack init
- #### Run the configuration - Run the configuration via [`dstack apply`](reference/cli/dstack/apply.md):
@@ -77,8 +73,6 @@ $ dstack init A task allows you to schedule a job or run a web app. Tasks can be distributed and can forward ports. - #### Define a configuration - Create the following configuration file inside the repo:
@@ -110,8 +104,6 @@ $ dstack init [`nodes`](reference/dstack.yml/task.md#distributed-tasks), and `dstack` will run it on a cluster. - #### Run the configuration - Run the configuration via [`dstack apply`](reference/cli/dstack/apply.md):
@@ -142,8 +134,6 @@ $ dstack init A service allows you to deploy a model or any web app as an endpoint. - #### Define a configuration - Create the following configuration file inside the repo:
@@ -175,8 +165,6 @@ $ dstack init
- #### Run the configuration - Run the configuration via [`dstack apply`](reference/cli/dstack/apply.md):
@@ -216,9 +204,7 @@ and runs the configuration. Something not working? See the [troubleshooting](guides/troubleshooting.md) guide. -## What's next? - -1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), [services](services.md), - and [repos](concepts/repos.md) -2. Join [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd) -3. Browse [examples](https://dstack.ai/examples) +!!! info "What's next?" + 1. Read about [backends](concepts/backends.md), [dev environments](concepts/dev-environments.md), [tasks](concepts/tasks.md), and [services](concepts/services.md) + 2. Join [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd) + 3. Browse [examples](https://dstack.ai/examples) diff --git a/docs/docs/reference/dstack.yml/dev-environment.md b/docs/docs/reference/dstack.yml/dev-environment.md index 00c8a6bc4..2bcd9794c 100644 --- a/docs/docs/reference/dstack.yml/dev-environment.md +++ b/docs/docs/reference/dstack.yml/dev-environment.md @@ -1,272 +1,6 @@ -# dev-environment +# `dev-environment` -The `dev-environment` configuration type allows running [dev environments](../../dev-environments.md). - -> Configuration files must be inside the project repo, and their names must end with `.dstack.yml` -> (e.g. `.dstack.yml` or `dev.dstack.yml` are both acceptable). -> Any configuration can be run via [`dstack apply`](../cli/dstack/apply.md). - -## Examples - -### Python version - -If you don't specify `image`, `dstack` uses its base Docker image pre-configured with -`python`, `pip`, `conda` (Miniforge), and essential CUDA drivers. -The `python` property determines which default Docker image is used. - -
- -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -# If `image` is not specified, dstack uses its base image -python: "3.10" - -ide: vscode -``` - -
- -??? info "nvcc" - By default, the base Docker image doesn’t include `nvcc`, which is required for building custom CUDA kernels. - If you need `nvcc`, set the corresponding property to true. - - ```yaml - type: dev-environment - # The name is optional, if not specified, generated randomly - name: vscode - - # If `image` is not specified, dstack uses its base image - python: "3.10" - # Ensure nvcc is installed (req. for Flash Attention) - nvcc: true - - ide: vscode - ``` - -### Docker - -If you want, you can specify your own Docker image via `image`. - -
- -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -# Any custom Docker image -image: ghcr.io/huggingface/text-generation-inference:latest - -ide: vscode -``` - -
- -??? info "Private registry" - - Use the `registry_auth` property to provide credentials for a private Docker registry. - - ```yaml - type: dev-environment - # The name is optional, if not specified, generated randomly - name: vscode - - # Any private Docker image - image: ghcr.io/huggingface/text-generation-inference:latest - # Credentials of the private Docker registry - registry_auth: - username: peterschmidt85 - password: ghp_e49HcZ9oYwBzUbcSk2080gXZOU2hiT9AeSR5 - - ide: vscode - ``` - -!!! info "Docker and Docker Compose" - All backends except `runpod`, `vastai`, and `kubernetes` also allow using [Docker and Docker Compose](../../guides/protips.md#docker-and-docker-compose) inside `dstack` runs. - -### Resources { #_resources } - -When you specify a resource value like `cpu` or `memory`, -you can either use an exact value (e.g. `24GB`) or a -range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). - -
- -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -ide: vscode - -resources: - # 200GB or more RAM - memory: 200GB.. - # 4 GPUs from 40GB to 80GB - gpu: 40GB..80GB:4 - # Shared memory (required by multi-gpu) - shm_size: 16GB - # Disk size - disk: 500GB -``` - -
- -The `gpu` property allows specifying not only memory size but also GPU vendor, names -and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), -`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), -`A100:40GB:2` (two A100 GPUs of 40GB). - -??? info "Google Cloud TPU" - To use TPUs, specify its architecture via the `gpu` property. - - ```yaml - type: dev-environment - # The name is optional, if not specified, generated randomly - name: vscode - - ide: vscode - - resources: - gpu: v2-8 - ``` - - Currently, only 8 TPU cores can be specified, supporting single TPU device workloads. Multi-TPU support is coming soon. - -??? info "Shared memory" - If you are using parallel communicating processes (e.g., dataloaders in PyTorch), you may need to configure - `shm_size`, e.g. set it to `16GB`. - -### Environment variables - -
- -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -# Environment variables -env: - - HF_TOKEN - - HF_HUB_ENABLE_HF_TRANSFER=1 - -ide: vscode -``` - -
- -If you don't assign a value to an environment variable (see `HF_TOKEN` above), -`dstack` will require the value to be passed via the CLI or set in the current process. -For instance, you can define environment variables in a `.envrc` file and utilize tools like `direnv`. - -#### System environment variables - -The following environment variables are available in any run by default: - -| Name | Description | -|-------------------------|-----------------------------------------| -| `DSTACK_RUN_NAME` | The name of the run | -| `DSTACK_REPO_ID` | The ID of the repo | -| `DSTACK_GPUS_NUM` | The total number of GPUs in the run | - -### Spot policy - -You can choose whether to use spot instances, on-demand instances, or any available type. - -
- -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -ide: vscode - -# Uncomment to leverage spot instances -#spot_policy: auto -``` - -
- -The `spot_policy` accepts `spot`, `on-demand`, and `auto`. The default for dev environments is `on-demand`. - -### Backends - -By default, `dstack` provisions instances in all configured backends. However, you can specify the list of backends: - -
- -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -ide: vscode - -# Use only listed backends -backends: [aws, gcp] -``` - -
- -### Regions - -By default, `dstack` uses all configured regions. However, you can specify the list of regions: - -
- -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -ide: vscode - -# Use only listed regions -regions: [eu-west-1, eu-west-2] -``` - -
- -### Volumes - -Volumes allow you to persist data between runs. -To attach a volume, simply specify its name using the `volumes` property and specify where to mount its contents: - -
- -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -ide: vscode - -# Map the name of the volume to any path -volumes: - - name: my-new-volume - path: /volume_data -``` - -
- -Once you run this configuration, the contents of the volume will be attached to `/volume_data` inside the dev -environment, and its contents will persist across runs. - -??? Info "Instance volumes" - If data persistence is not a strict requirement, use can also use - ephemeral [instance volumes](../../concepts/volumes.md#instance-volumes). - -!!! info "Limitations" - When you're running a dev environment, task, or service with `dstack`, it automatically mounts the project folder contents - to `/workflow` (and sets that as the current working directory). Right now, `dstack` doesn't allow you to - attach volumes to `/workflow` or any of its subdirectories. - -The `dev-environment` configuration type supports many other options. See below. +The `dev-environment` configuration type allows running [dev environments](../../concepts/dev-environments.md). ## Root reference @@ -276,7 +10,7 @@ The `dev-environment` configuration type supports many other options. See below. type: required: true -## `retry` +### `retry` #SCHEMA# dstack._internal.core.models.profiles.ProfileRetry overrides: @@ -284,7 +18,7 @@ The `dev-environment` configuration type supports many other options. See below. type: required: true -## `resources` +### `resources` #SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema overrides: @@ -293,7 +27,7 @@ The `dev-environment` configuration type supports many other options. See below. required: true item_id_prefix: resources- -## `resources.gpu` { #resources-gpu data-toc-label="resources.gpu" } +#### `resources.gpu` { #resources-gpu data-toc-label="gpu" } #SCHEMA# dstack._internal.core.models.resources.GPUSpecSchema overrides: @@ -301,7 +35,7 @@ The `dev-environment` configuration type supports many other options. See below. type: required: true -## `resources.disk` { #resources-disk data-toc-label="resources.disk" } +#### `resources.disk` { #resources-disk data-toc-label="disk" } #SCHEMA# dstack._internal.core.models.resources.DiskSpecSchema overrides: @@ -309,7 +43,7 @@ The `dev-environment` configuration type supports many other options. See below. type: required: true -## `registry_auth` +### `registry_auth` #SCHEMA# dstack._internal.core.models.configurations.RegistryAuth overrides: @@ -317,7 +51,7 @@ The `dev-environment` configuration type supports many other options. See below. type: required: true -## `volumes[n]` { #_volumes data-toc-label="volumes" } +### `volumes[n]` { #_volumes data-toc-label="volumes" } === "Network volumes" @@ -340,4 +74,4 @@ The `dev-environment` configuration type supports many other options. See below. The short syntax for volumes is a colon-separated string in the form of `source:destination` * `volume-name:/container/path` for network volumes - * `/instance/path:/container/path` for instance volumes + * `/instance/path:/container/path` for instance volumes diff --git a/docs/docs/reference/dstack.yml/fleet.md b/docs/docs/reference/dstack.yml/fleet.md index ffcaad705..537ddb109 100644 --- a/docs/docs/reference/dstack.yml/fleet.md +++ b/docs/docs/reference/dstack.yml/fleet.md @@ -1,67 +1,7 @@ -# fleet +# `fleet` The `fleet` configuration type allows creating and updating fleets. -> Configuration files must be inside the project repo, and their names must end with `.dstack.yml` -> (e.g. `.dstack.yml` or `fleet.dstack.yml` are both acceptable). -> Any configuration can be run via [`dstack apply`](../cli/dstack/apply.md). - -## Examples - -### Cloud fleet - -
- -```yaml -type: fleet -# The name is optional, if not specified, generated randomly -name: my-fleet - -# The number of instances -nodes: 4 -# Ensure the instances are interconnected -placement: cluster - -# Uncomment to leverage spot instances -#spot_policy: auto - -resources: - gpu: - # 24GB or more vRAM - memory: 24GB.. - # One or more GPU - count: 1.. -``` - -
- -### SSH fleet - -
- -```yaml -type: fleet -# The name is optional, if not specified, generated randomly -name: my-ssh-fleet - -# Ensure instances are interconnected -placement: cluster - -# The user, private SSH key, and hostnames of the on-prem servers -ssh_config: - user: ubuntu - identity_file: ~/.ssh/id_rsa - hosts: - - 3.255.177.51 - - 3.255.177.52 -``` - -
- -[//]: # (TODO: a cluster, individual user and identity file, etc) - -[//]: # (TODO: other examples, for all properties like in dev-environment/task/service) - ## Root reference #SCHEMA# dstack._internal.core.models.fleets.FleetConfiguration @@ -70,19 +10,20 @@ ssh_config: type: required: true -## `ssh_config` +### `ssh_config` { data-toc-label="ssh_config" } #SCHEMA# dstack._internal.core.models.fleets.SSHParams overrides: show_root_heading: false + item_id_prefix: ssh_config- -## `ssh_config.hosts[n]` +#### `ssh_config.hosts[n]` { #ssh_config-hosts data-toc-label="hosts" } #SCHEMA# dstack._internal.core.models.fleets.SSHHostParams overrides: show_root_heading: false -## `resources` +### `resources` #SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema overrides: @@ -91,7 +32,7 @@ ssh_config: required: true item_id_prefix: resources- -## `resouces.gpu` { #resources-gpu data-toc-label="resources.gpu" } +#### `resouces.gpu` { #resources-gpu data-toc-label="gpu" } #SCHEMA# dstack._internal.core.models.resources.GPUSpecSchema overrides: @@ -99,7 +40,7 @@ ssh_config: type: required: true -## `resouces.disk` { #resources-disk data-toc-label="resources.disk" } +#### `resouces.disk` { #resources-disk data-toc-label="disk" } #SCHEMA# dstack._internal.core.models.resources.DiskSpecSchema overrides: @@ -107,7 +48,7 @@ ssh_config: type: required: true -## `retry` +### `retry` #SCHEMA# dstack._internal.core.models.profiles.ProfileRetry overrides: diff --git a/docs/docs/reference/dstack.yml/gateway.md b/docs/docs/reference/dstack.yml/gateway.md index 73bb06d7f..4d81d5d50 100644 --- a/docs/docs/reference/dstack.yml/gateway.md +++ b/docs/docs/reference/dstack.yml/gateway.md @@ -1,34 +1,7 @@ -# gateway +# `gateway` The `gateway` configuration type allows creating and updating [gateways](../../concepts/gateways.md). -> Configuration files must be inside the project repo, and their names must end with `.dstack.yml` -> (e.g. `.dstack.yml` or `gateway.dstack.yml` are both acceptable). -> Any configuration can be run via [`dstack apply`](../cli/dstack/apply.md). - -## Examples - -### Creating a new gateway { #new-gateway } - -
- -```yaml -type: gateway -# A name of the gateway -name: example-gateway - -# Gateways are bound to a specific backend and region -backend: aws -region: eu-west-1 - -# This domain will be used to access the endpoint -domain: example.com -``` - -
- -[//]: # (TODO: other examples, e.g. private gateways) - ## Root reference #SCHEMA# dstack._internal.core.models.gateways.GatewayConfiguration @@ -37,18 +10,20 @@ domain: example.com type: required: true -## `certificate[type=lets-encrypt]` +### `certificate` -#SCHEMA# dstack._internal.core.models.gateways.LetsEncryptGatewayCertificate - overrides: - show_root_heading: false - type: - required: true +=== "Let's encrypt" -## `certificate[type=acm]` + #SCHEMA# dstack._internal.core.models.gateways.LetsEncryptGatewayCertificate + overrides: + show_root_heading: false + type: + required: true -#SCHEMA# dstack._internal.core.models.gateways.ACMGatewayCertificate - overrides: - show_root_heading: false - type: - required: true +=== "ACM" + + #SCHEMA# dstack._internal.core.models.gateways.ACMGatewayCertificate + overrides: + show_root_heading: false + type: + required: true diff --git a/docs/docs/reference/dstack.yml/service.md b/docs/docs/reference/dstack.yml/service.md index 1ad737b11..8d661743c 100644 --- a/docs/docs/reference/dstack.yml/service.md +++ b/docs/docs/reference/dstack.yml/service.md @@ -1,428 +1,6 @@ -# service +# `service` -The `service` configuration type allows running [services](../../services.md). - -> Configuration files must be inside the project repo, and their names must end with `.dstack.yml` -> (e.g. `.dstack.yml` or `serve.dstack.yml` are both acceptable). -> Any configuration can be run via [`dstack apply`](../cli/dstack/apply.md). - -## Examples - -### Python version - -If you don't specify `image`, `dstack` uses its base Docker image pre-configured with -`python`, `pip`, `conda` (Miniforge), and essential CUDA drivers. -The `python` property determines which default Docker image is used. - -
- -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -# If `image` is not specified, dstack uses its base image -python: "3.10" - -# Commands of the service -commands: - - python3 -m http.server -# The port of the service -port: 8000 -``` - -
- -??? info "nvcc" - By default, the base Docker image doesn’t include `nvcc`, which is required for building custom CUDA kernels. - If you need `nvcc`, set the corresponding property to true. - -
- - ```yaml - type: service - # The name is optional, if not specified, generated randomly - name: http-server-service - - # If `image` is not specified, dstack uses its base image - python: "3.10" - # Ensure nvcc is installed (req. for Flash Attention) - nvcc: true - - # Commands of the service - commands: - - python3 -m http.server - # The port of the service - port: 8000 - ``` - -
- -### Docker - -If you want, you can specify your own Docker image via `image`. - -
- - ```yaml - type: service - # The name is optional, if not specified, generated randomly - name: http-server-service - - # Any custom Docker image - image: dstackai/base:py3.13-0.6-cuda-12.1 - - # Commands of the service - commands: - - python3 -m http.server - # The port of the service - port: 8000 - ``` - -
- -??? info "Private Docker registry" - - Use the `registry_auth` property to provide credentials for a private Docker registry. - - ```yaml - type: service - # The name is optional, if not specified, generated randomly - name: http-server-service - - # Any private Docker iamge - image: dstackai/base:py3.13-0.6-cuda-12.1 - # Credentials of the private registry - registry_auth: - username: peterschmidt85 - password: ghp_e49HcZ9oYwBzUbcSk2080gXZOU2hiT9AeSR5 - - # Commands of the service - commands: - - python3 -m http.server - # The port of the service - port: 8000 - ``` - -!!! info "Docker and Docker Compose" - All backends except `runpod`, `vastai`, and `kubernetes` also allow using [Docker and Docker Compose](../../guides/protips.md#docker-and-docker-compose) inside `dstack` runs. - -### Models { #model-mapping } - -If you are running a chat model with an OpenAI-compatible interface, -set the [`model`](#model) property to make the model accessible via -the OpenAI-compatible endpoint provided by `dstack`. - -
- -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: llama31-service - -python: "3.10" - -# Required environment variables -env: - - HF_TOKEN -commands: - - pip install vllm - - vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct --max-model-len 4096 -# Expose the port of the service -port: 8000 - -resources: - # Change to what is required - gpu: 24GB - -# Register the model -model: meta-llama/Meta-Llama-3.1-8B-Instruct - -# Alternatively, use this syntax to set more model settings: -# model: -# type: chat -# name: meta-llama/Meta-Llama-3.1-8B-Instruct -# format: openai -# prefix: /v1 -``` - -
- -Once the service is up, the model will be available via the OpenAI-compatible endpoint -at `/proxy/models/` -or at `https://gateway.` if your project has a gateway. - -### Auto-scaling - -By default, `dstack` runs a single replica of the service. -You can configure the number of replicas as well as the auto-scaling rules. - -
- -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: llama31-service - -python: "3.10" - -# Required environment variables -env: - - HF_TOKEN -commands: - - pip install vllm - - vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct --max-model-len 4096 -# Expose the port of the service -port: 8000 - -resources: - # Change to what is required - gpu: 24GB - -# Minimum and maximum number of replicas -replicas: 1..4 -scaling: - # Requests per seconds - metric: rps - # Target metric value - target: 10 -``` - -
- -The [`replicas`](#replicas) property can be a number or a range. - -> The [`metric`](#metric) property of [`scaling`](#scaling) only supports the `rps` metric (requests per second). In this -> case `dstack` adjusts the number of replicas (scales up or down) automatically based on the load. - -Setting the minimum number of replicas to `0` allows the service to scale down to zero when there are no requests. - -!!! info "Gateway" - Services with a fixed number of replicas are supported both with and without a - [gateway](../../concepts/gateways.md). - Auto-scaling is currently only supported for services running with a gateway. - -### Resources { #_resources } - -If you specify memory size, you can either specify an explicit size (e.g. `24GB`) or a -range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). - -
- -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -python: "3.10" - -# Commands of the service -commands: - - pip install vllm - - python -m vllm.entrypoints.openai.api_server - --model mistralai/Mixtral-8X7B-Instruct-v0.1 - --host 0.0.0.0 - --tensor-parallel-size $DSTACK_GPUS_NUM -# Expose the port of the service -port: 8000 - -resources: - # 2 GPUs of 80GB - gpu: 80GB:2 - - # Minimum disk size - disk: 200GB -``` - -
- -The `gpu` property allows specifying not only memory size but also GPU vendor, names -and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), -`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), -`A100:40GB:2` (two A100 GPUs of 40GB). - -??? info "Shared memory" - If you are using parallel communicating processes (e.g., dataloaders in PyTorch), you may need to configure - `shm_size`, e.g. set it to `16GB`. - -### Authorization - -By default, the service endpoint requires the `Authorization` header with `"Bearer "`. -Authorization can be disabled by setting `auth` to `false`. - -
- -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -# Disable authorization -auth: false - -python: "3.10" - -# Commands of the service -commands: - - python3 -m http.server -# The port of the service -port: 8000 -``` - -
- -### Environment variables - -
- -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: llama-2-7b-service - -python: "3.10" - -# Environment variables -env: - - HF_TOKEN - - MODEL=NousResearch/Llama-2-7b-chat-hf -# Commands of the service -commands: - - pip install vllm - - python -m vllm.entrypoints.openai.api_server --model $MODEL --port 8000 -# The port of the service -port: 8000 - -resources: - # Required GPU vRAM - gpu: 24GB -``` - -
- -> If you don't assign a value to an environment variable (see `HF_TOKEN` above), -`dstack` will require the value to be passed via the CLI or set in the current process. -For instance, you can define environment variables in a `.envrc` file and utilize tools like `direnv`. - -#### System environment variables - -The following environment variables are available in any run by default: - -| Name | Description | -|-------------------------|-----------------------------------------| -| `DSTACK_RUN_NAME` | The name of the run | -| `DSTACK_REPO_ID` | The ID of the repo | -| `DSTACK_GPUS_NUM` | The total number of GPUs in the run | - -### Spot policy - -You can choose whether to use spot instances, on-demand instances, or any available type. - -
- -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -commands: - - python3 -m http.server -# The port of the service -port: 8000 - -# Uncomment to leverage spot instances -#spot_policy: auto -``` - -
- -The `spot_policy` accepts `spot`, `on-demand`, and `auto`. The default for services is `on-demand`. - -### Backends - -By default, `dstack` provisions instances in all configured backends. However, you can specify the list of backends: - -
- -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -# Commands of the service -commands: - - python3 -m http.server -# The port of the service -port: 8000 - -# Use only listed backends -backends: [aws, gcp] -``` - -
- -### Regions - -By default, `dstack` uses all configured regions. However, you can specify the list of regions: - -
- -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -# Commands of the service -commands: - - python3 -m http.server -# The port of the service -port: 8000 - -# Use only listed regions -regions: [eu-west-1, eu-west-2] -``` - -
- -### Volumes - -Volumes allow you to persist data between runs. -To attach a volume, simply specify its name using the `volumes` property and specify where to mount its contents: - -
- -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -# Commands of the service -commands: - - python3 -m http.server -# The port of the service -port: 8000 - -# Map the name of the volume to any path -volumes: - - name: my-new-volume - path: /volume_data -``` - -
- -Once you run this configuration, the contents of the volume will be attached to `/volume_data` inside the service, -and its contents will persist across runs. - -??? Info "Instance volumes" - If data persistence is not a strict requirement, use can also use - ephemeral [instance volumes](../../concepts/volumes.md#instance-volumes). - -!!! info "Limitations" - When you're running a dev environment, task, or service with `dstack`, it automatically mounts the project folder contents - to `/workflow` (and sets that as the current working directory). Right now, `dstack` doesn't allow you to - attach volumes to `/workflow` or any of its subdirectories. - -The `service` configuration type supports many other options. See below. +The `service` configuration type allows running [services](../../concepts/services.md). ## Root reference @@ -432,62 +10,64 @@ The `service` configuration type supports many other options. See below. type: required: true -## `model[format=openai]` - -#SCHEMA# dstack._internal.core.models.gateways.OpenAIChatModel - overrides: - show_root_heading: false - type: - required: true - -## `model[format=tgi]` - -> TGI provides an OpenAI-compatible API starting with version 1.4.0, -so models served by TGI can be defined with `format: openai` too. - -#SCHEMA# dstack._internal.core.models.gateways.TGIChatModel - overrides: - show_root_heading: false - type: - required: true - -??? info "Chat template" - - By default, `dstack` loads the [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating) - from the model's repository. If it is not present there, manual configuration is required. - - ```yaml - type: service - - image: ghcr.io/huggingface/text-generation-inference:latest - env: - - MODEL_ID=TheBloke/Llama-2-13B-chat-GPTQ - commands: - - text-generation-launcher --port 8000 --trust-remote-code --quantize gptq - port: 8000 - - resources: - gpu: 80GB +### `model` { data-toc-label="model" } - # Enable the OpenAI-compatible endpoint - model: - type: chat - name: TheBloke/Llama-2-13B-chat-GPTQ - format: tgi - chat_template: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' }}{% endif %}{% endfor %}" - eos_token: "" - ``` +=== "OpenAI" - ##### Limitations + #SCHEMA# dstack._internal.core.models.gateways.OpenAIChatModel + overrides: + show_root_heading: false + type: + required: true - Please note that model mapping is an experimental feature with the following limitations: +=== "TGI" - 1. Doesn't work if your `chat_template` uses `bos_token`. As a workaround, replace `bos_token` inside `chat_template` with the token content itself. - 2. Doesn't work if `eos_token` is defined in the model repository as a dictionary. As a workaround, set `eos_token` manually, as shown in the example above (see Chat template). + > TGI provides an OpenAI-compatible API starting with version 1.4.0, + so models served by TGI can be defined with `format: openai` too. + + #SCHEMA# dstack._internal.core.models.gateways.TGIChatModel + overrides: + show_root_heading: false + type: + required: true - If you encounter any other issues, please make sure to file a [GitHub issue](https://github.com/dstackai/dstack/issues/new/choose). + ??? info "Chat template" + + By default, `dstack` loads the [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating) + from the model's repository. If it is not present there, manual configuration is required. + + ```yaml + type: service + + image: ghcr.io/huggingface/text-generation-inference:latest + env: + - MODEL_ID=TheBloke/Llama-2-13B-chat-GPTQ + commands: + - text-generation-launcher --port 8000 --trust-remote-code --quantize gptq + port: 8000 + + resources: + gpu: 80GB + + # Enable the OpenAI-compatible endpoint + model: + type: chat + name: TheBloke/Llama-2-13B-chat-GPTQ + format: tgi + chat_template: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' }}{% endif %}{% endfor %}" + eos_token: "" + ``` + + ##### Limitations + + Please note that model mapping is an experimental feature with the following limitations: + + 1. Doesn't work if your `chat_template` uses `bos_token`. As a workaround, replace `bos_token` inside `chat_template` with the token content itself. + 2. Doesn't work if `eos_token` is defined in the model repository as a dictionary. As a workaround, set `eos_token` manually, as shown in the example above (see Chat template). + + If you encounter any other issues, please make sure to file a [GitHub issue](https://github.com/dstackai/dstack/issues/new/choose). -## `scaling` +### `scaling` #SCHEMA# dstack._internal.core.models.configurations.ScalingSpec overrides: @@ -495,13 +75,13 @@ so models served by TGI can be defined with `format: openai` too. type: required: true -## `retry` +### `retry` #SCHEMA# dstack._internal.core.models.profiles.ProfileRetry overrides: show_root_heading: false -## `resources` +### `resources` #SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema overrides: @@ -510,7 +90,7 @@ so models served by TGI can be defined with `format: openai` too. required: true item_id_prefix: resources- -## `resouces.gpu` { #resources-gpu data-toc-label="resources.gpu" } +#### `resouces.gpu` { #resources-gpu data-toc-label="gpu" } #SCHEMA# dstack._internal.core.models.resources.GPUSpecSchema overrides: @@ -518,7 +98,7 @@ so models served by TGI can be defined with `format: openai` too. type: required: true -## `resouces.disk` { #resources-disk data-toc-label="resources.disk" } +#### `resouces.disk` { #resources-disk data-toc-label="disk" } #SCHEMA# dstack._internal.core.models.resources.DiskSpecSchema overrides: @@ -526,7 +106,7 @@ so models served by TGI can be defined with `format: openai` too. type: required: true -## `registry_auth` +### `registry_auth` #SCHEMA# dstack._internal.core.models.configurations.RegistryAuth overrides: @@ -534,7 +114,7 @@ so models served by TGI can be defined with `format: openai` too. type: required: true -## `volumes[n]` { #_volumes data-toc-label="volumes" } +### `volumes[n]` { #_volumes data-toc-label="volumes" } === "Network volumes" diff --git a/docs/docs/reference/dstack.yml/task.md b/docs/docs/reference/dstack.yml/task.md index 3c99eb8a2..f08cf06cb 100644 --- a/docs/docs/reference/dstack.yml/task.md +++ b/docs/docs/reference/dstack.yml/task.md @@ -1,448 +1,6 @@ -# task +# `task` -The `task` configuration type allows running [tasks](../../tasks.md). - -> Configuration files must be inside the project repo, and their names must end with `.dstack.yml` -> (e.g. `.dstack.yml` or `train.dstack.yml` are both acceptable). -> Any configuration can be run via [`dstack apply`](../cli/dstack/apply.md). - -## Examples - -### Python version - -If you don't specify `image`, `dstack` uses its base Docker image pre-configured with -`python`, `pip`, `conda` (Miniforge), and essential CUDA drivers. -The `python` property determines which default Docker image is used. - -
- -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -# If `image` is not specified, dstack uses its base image -python: "3.10" - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py -``` - -
- -??? info "nvcc" - By default, the base Docker image doesn’t include `nvcc`, which is required for building custom CUDA kernels. - If you need `nvcc`, set the corresponding property to true. - - - ```yaml - type: task - # The name is optional, if not specified, generated randomly - name: train - - # If `image` is not specified, dstack uses its base image - python: "3.10" - # Ensure nvcc is installed (req. for Flash Attention) - nvcc: true - - commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - ``` - -### Ports { #_ports } - -A task can configure ports. In this case, if the task is running an application on a port, `dstack run` -will securely allow you to access this port from your local machine through port forwarding. - -
- -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -python: "3.10" - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - tensorboard --logdir results/runs & - - python fine-tuning/qlora/train.py -# Expose the port to access TensorBoard -ports: - - 6000 -``` - -
- -When running it, `dstack run` forwards `6000` port to `localhost:6000`, enabling secure access. - -[//]: # (See [tasks](../../tasks.md#configure-ports) for more detail.) - -### Docker - -If you want, you can specify your own Docker image via `image`. - -
- -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: train - -# Any custom Docker image -image: dstackai/base:py3.13-0.6-cuda-12.1 - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py -``` - -
- -??? info "Private registry" - Use the `registry_auth` property to provide credentials for a private Docker registry. - - ```yaml - type: dev-environment - # The name is optional, if not specified, generated randomly - name: train - - # Any private Docker image - image: dstackai/base:py3.13-0.6-cuda-12.1 - # Credentials of the private Docker registry - registry_auth: - username: peterschmidt85 - password: ghp_e49HcZ9oYwBzUbcSk2080gXZOU2hiT9AeSR5 - - # Commands of the task - commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - ``` - -!!! info "Docker and Docker Compose" - All backends except `runpod`, `vastai`, and `kubernetes` also allow using [Docker and Docker Compose](../../guides/protips.md#docker-and-docker-compose) inside `dstack` runs. - -### Resources { #_resources } - -If you specify memory size, you can either specify an explicit size (e.g. `24GB`) or a -range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). - -
- -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - -resources: - # 200GB or more RAM - memory: 200GB.. - # 4 GPUs from 40GB to 80GB - gpu: 40GB..80GB:4 - # Shared memory (required by multi-gpu) - shm_size: 16GB - # Disk size - disk: 500GB -``` - -
- -The `gpu` property allows specifying not only memory size but also GPU vendor, names -and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), -`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), -`A100:40GB:2` (two A100 GPUs of 40GB). - -??? info "Google Cloud TPU" - To use TPUs, specify its architecture via the `gpu` property. - - ```yaml - type: task - # The name is optional, if not specified, generated randomly - name: train - - python: "3.10" - - # Commands of the task - commands: - - pip install torch~=2.3.0 torch_xla[tpu]~=2.3.0 torchvision -f https://storage.googleapis.com/libtpu-releases/index.html - - git clone --recursive https://github.com/pytorch/xla.git - - python3 xla/test/test_train_mp_imagenet.py --fake_data --model=resnet50 --num_epochs=1 - - resources: - gpu: v2-8 - ``` - - Currently, only 8 TPU cores can be specified, supporting single host workloads. Multi-host support is coming soon. - -??? info "Shared memory" - If you are using parallel communicating processes (e.g., dataloaders in PyTorch), you may need to configure - `shm_size`, e.g. set it to `16GB`. - -### Environment variables - -
- -```yaml -type: task - -python: "3.10" - -# Environment variables -env: - - HF_TOKEN - - HF_HUB_ENABLE_HF_TRANSFER=1 - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py -``` - -
- -If you don't assign a value to an environment variable (see `HF_TOKEN` above), -`dstack` will require the value to be passed via the CLI or set in the current process. -For instance, you can define environment variables in a `.envrc` file and utilize tools like `direnv`. - -##### System environment variables - -The following environment variables are available in any run by default: - -| Name | Description | -|-------------------------|------------------------------------------------------------------| -| `DSTACK_RUN_NAME` | The name of the run | -| `DSTACK_REPO_ID` | The ID of the repo | -| `DSTACK_GPUS_NUM` | The total number of GPUs in the run | -| `DSTACK_NODES_NUM` | The number of nodes in the run | -| `DSTACK_GPUS_PER_NODE` | The number of GPUs per node | -| `DSTACK_NODE_RANK` | The rank of the node | -| `DSTACK_MASTER_NODE_IP` | The internal IP address the master node | -| `DSTACK_NODES_IPS` | The list of internal IP addresses of all nodes delimited by "\n" | - -### Distributed tasks - -By default, a task runs on a single node. However, you can run it on a cluster of nodes by specifying `nodes`: - -
- -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train-distrib - -# The size of the cluster -nodes: 2 - -python: "3.10" - -# Commands of the task -commands: - - pip install -r requirements.txt - - torchrun - --nproc_per_node=$DSTACK_GPUS_PER_NODE - --node_rank=$DSTACK_NODE_RANK - --nnodes=$DSTACK_NODES_NUM - --master_addr=$DSTACK_MASTER_NODE_IP - --master_port=8008 resnet_ddp.py - --num_epochs 20 - -resources: - gpu: 24GB -``` - -
- -If you run the task, `dstack` first provisions the master node and then runs the other nodes of the cluster. - -??? info "Network" - To ensure all nodes are provisioned into a cluster placement group and to enable the highest level of inter-node - connectivity, it is recommended to manually create a [fleet](../../concepts/fleets.md) before running a task. - This won’t be needed once [this issue :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/issues/1805){:target="_blank"} - is fixed. - -> `dstack` is easy to use with `accelerate`, `torchrun`, and other distributed frameworks. All you need to do -is pass the corresponding environment variables such as `DSTACK_GPUS_PER_NODE`, `DSTACK_NODE_RANK`, `DSTACK_NODES_NUM`, -`DSTACK_MASTER_NODE_IP`, and `DSTACK_GPUS_NUM` (see [System environment variables](#system-environment-variables)). - -??? info "Backends" - Running on multiple nodes is supported only with the `aws`, `gcp`, `azure`, `oci` backends, or - [SSH fleets](../../concepts/fleets.md#ssh-fleets). - - Additionally, the `aws` backend supports [Elastic Fabric Adapter :material-arrow-top-right-thin:{ .external }](https://aws.amazon.com/hpc/efa/){:target="_blank"}. - For a list of instance types with EFA support see [Fleets](../../concepts/fleets.md#cloud-fleets). - -### Web applications - -Here's an example of using `ports` to run web apps with `tasks`. - -
- -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: streamlit-hello - -python: "3.10" - -# Commands of the task -commands: - - pip3 install streamlit - - streamlit hello -# Expose the port to access the web app -ports: - - 8501 - -``` - -
- -### Spot policy - -You can choose whether to use spot instances, on-demand instances, or any available type. - -
- -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - -# Uncomment to leverage spot instances -#spot_policy: auto -``` - -
- -The `spot_policy` accepts `spot`, `on-demand`, and `auto`. The default for tasks is `on-demand`. - -### Queueing tasks { #queueing-tasks } - -By default, if `dstack apply` cannot find capacity, the task fails. - -To queue the task and wait for capacity, specify the [`retry`](#retry) -property: - -
- -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - -retry: - # Retry on no-capacity errors - on_events: [no-capacity] - # Retry within 1 day - duration: 1d -``` - -
- -### Backends - -By default, `dstack` provisions instances in all configured backends. However, you can specify the list of backends: - -
- -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - -# Use only listed backends -backends: [aws, gcp] -``` - -
- -### Regions - -By default, `dstack` uses all configured regions. However, you can specify the list of regions: - -
- -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - -# Use only listed regions -regions: [eu-west-1, eu-west-2] -``` - -
- -### Volumes - -Volumes allow you to persist data between runs. -To attach a volume, simply specify its name using the `volumes` property and specify where to mount its contents: - -
- -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: vscode - -python: "3.10" - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - -# Map the name of the volume to any path -volumes: - - name: my-new-volume - path: /volume_data -``` - -
- -Once you run this configuration, the contents of the volume will be attached to `/volume_data` inside the task, -and its contents will persist across runs. - -??? Info "Instance volumes" - If data persistence is not a strict requirement, use can also use - ephemeral [instance volumes](../../concepts/volumes.md#instance-volumes). - -!!! info "Limitations" - When you're running a dev environment, task, or service with `dstack`, it automatically mounts the project folder contents - to `/workflow` (and sets that as the current working directory). Right now, `dstack` doesn't allow you to - attach volumes to `/workflow` or any of its subdirectories. - -The `task` configuration type supports many other options. See below. +The `task` configuration type allows running [tasks](../../concepts/tasks.md). ## Root reference @@ -452,7 +10,7 @@ The `task` configuration type supports many other options. See below. type: required: true -## `retry` +### `retry` #SCHEMA# dstack._internal.core.models.profiles.ProfileRetry overrides: @@ -460,7 +18,7 @@ The `task` configuration type supports many other options. See below. type: required: true -## `resources` +### `resources` #SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema overrides: @@ -469,7 +27,7 @@ The `task` configuration type supports many other options. See below. required: true item_id_prefix: resources- -## `resouces.gpu` { #resources-gpu data-toc-label="resources.gpu" } +#### `resouces.gpu` { #resources-gpu data-toc-label="gpu" } #SCHEMA# dstack._internal.core.models.resources.GPUSpecSchema overrides: @@ -477,7 +35,7 @@ The `task` configuration type supports many other options. See below. type: required: true -## `resouces.disk` { #resources-disk data-toc-label="resources.disk" } +#### `resouces.disk` { #resources-disk data-toc-label="disk" } #SCHEMA# dstack._internal.core.models.resources.DiskSpecSchema overrides: @@ -485,7 +43,7 @@ The `task` configuration type supports many other options. See below. type: required: true -## `registry_auth` +### `registry_auth` #SCHEMA# dstack._internal.core.models.configurations.RegistryAuth overrides: @@ -493,7 +51,7 @@ The `task` configuration type supports many other options. See below. type: required: true -## `volumes[n]` { #_volumes data-toc-label="volumes" } +### `volumes[n]` { #_volumes data-toc-label="volumes" } === "Network volumes" diff --git a/docs/docs/reference/dstack.yml/volume.md b/docs/docs/reference/dstack.yml/volume.md index 246270ab2..af34a166a 100644 --- a/docs/docs/reference/dstack.yml/volume.md +++ b/docs/docs/reference/dstack.yml/volume.md @@ -1,52 +1,7 @@ -# volume +# `volume` The `volume` configuration type allows creating, registering, and updating [volumes](../../concepts/volumes.md). -> Configuration files must be inside the project repo, and their names must end with `.dstack.yml` -> (e.g. `.dstack.yml` or `fleet.dstack.yml` are both acceptable). -> Any configuration can be run via [`dstack apply`](../cli/dstack/apply.md). - -## Examples - -### Creating a new volume { #new-volume } - -
- -```yaml -type: volume -# The name of the volume -name: my-new-volume - -# Volumes are bound to a specific backend and region -backend: aws -region: eu-central-1 - -# The size of the volume -size: 100GB -``` - -
- -### Registering an existing volume { #existing-volume } - -
- -```yaml -type: volume -# The name of the volume -name: my-existing-volume - -# Volumes are bound to a specific backend and region -backend: aws -region: eu-central-1 - -# The ID of the volume in AWS -volume_id: vol1235 -``` - -
- - ## Root reference #SCHEMA# dstack._internal.core.models.volumes.VolumeConfiguration diff --git a/docs/docs/reference/misc/environment-variables.md b/docs/docs/reference/misc/environment-variables.md index 43b600a46..ee9024277 100644 --- a/docs/docs/reference/misc/environment-variables.md +++ b/docs/docs/reference/misc/environment-variables.md @@ -5,7 +5,7 @@ The following read-only environment variables are automatically propagated to configurations for dev environments, tasks, and services: -##### DSTACK_RUN_NAME { #DSTACK_RUN_NAME } +###### DSTACK_RUN_NAME { #DSTACK_RUN_NAME } The name of the run. @@ -21,11 +21,11 @@ commands: If `name` is not set in the configuration, it is assigned a random name (e.g. `wet-mangust-1`). -##### DSTACK_REPO_ID { #DSTACK_REPO_ID } +###### DSTACK_REPO_ID { #DSTACK_REPO_ID } The ID of the repo -##### DSTACK_GPUS_NUM { #DSTACK_GPUS_NUM } +###### DSTACK_GPUS_NUM { #DSTACK_GPUS_NUM } The total number of GPUs in the run @@ -49,19 +49,19 @@ resources: gpu: 24GB ``` -##### DSTACK_NODES_NUM { #DSTACK_NODES_NUM } +###### DSTACK_NODES_NUM { #DSTACK_NODES_NUM } The number of nodes in the run -##### DSTACK_GPUS_PER_NODE { #DSTACK_GPUS_PER_NODE } +###### DSTACK_GPUS_PER_NODE { #DSTACK_GPUS_PER_NODE } The number of GPUs per node -##### DSTACK_NODE_RANK { #DSTACK_NODE_RANK } +###### DSTACK_NODE_RANK { #DSTACK_NODE_RANK } The rank of the node -##### DSTACK_NODE_RANK { #DSTACK_NODE_RANK } +###### DSTACK_NODE_RANK { #DSTACK_NODE_RANK } The internal IP address the master node. @@ -90,7 +90,7 @@ resources: gpu: 24GB ``` -##### DSTACK_NODES_IPS { #DSTACK_NODES_IPS } +###### DSTACK_NODES_IPS { #DSTACK_NODES_IPS } The list of internal IP addresses of all nodes delimited by `"\n"` @@ -102,7 +102,7 @@ via `dstack server` or deployed using Docker. For more details on the options below, refer to the [server deployment](../../guides/server-deployment.md) guide. -##### DSTACK_SERVER_LOG_LEVEL { #DSTACK_SERVER_LOG_LEVEL } +###### DSTACK_SERVER_LOG_LEVEL { #DSTACK_SERVER_LOG_LEVEL } Has the same effect as `--log-level`. Defaults to `INFO`. @@ -117,43 +117,43 @@ $ DSTACK_SERVER_LOG_LEVEL=debug dstack server
-##### DSTACK_SERVER_LOG_FORMAT { #DSTACK_SERVER_LOG_FORMAT } +###### DSTACK_SERVER_LOG_FORMAT { #DSTACK_SERVER_LOG_FORMAT } Sets format of log output. Can be `rich`, `standard`, `json`. Defaults to `rich`. -##### DSTACK_SERVER_HOST { #DSTACK_SERVER_HOST } +###### DSTACK_SERVER_HOST { #DSTACK_SERVER_HOST } Has the same effect as `--host`. Defaults to `127.0.0.1`. -##### DSTACK_SERVER_PORT { #DSTACK_SERVER_PORT } +###### DSTACK_SERVER_PORT { #DSTACK_SERVER_PORT } Has the same effect as `--port`. Defaults to `3000`. -##### DSTACK_SERVER_ADMIN_TOKEN { #DSTACK_SERVER_ADMIN_TOKEN } +###### DSTACK_SERVER_ADMIN_TOKEN { #DSTACK_SERVER_ADMIN_TOKEN } Has the same effect as `--token`. Defaults to `None`. -##### DSTACK_SERVER_DIR { #DSTACK_SERVER_DIR } +###### DSTACK_SERVER_DIR { #DSTACK_SERVER_DIR } Sets path to store data and server configs. Defaults to `~/.dstack/server`. -##### DSTACK_DATABASE_URL { #DSTACK_DATABASE_URL } +###### DSTACK_DATABASE_URL { #DSTACK_DATABASE_URL } The database URL to use instead of default SQLite. Currently `dstack` supports Postgres. Example: `postgresql+asyncpg://myuser:mypassword@localhost:5432/mydatabase`. Defaults to `None`. -##### DSTACK_SERVER_CLOUDWATCH_LOG_GROUP { #DSTACK_SERVER_CLOUDWATCH_LOG_GROUP } +###### DSTACK_SERVER_CLOUDWATCH_LOG_GROUP { #DSTACK_SERVER_CLOUDWATCH_LOG_GROUP } The CloudWatch Logs group for workloads logs. If not set, the default file-based log storage is used. -##### DSTACK_SERVER_CLOUDWATCH_LOG_REGION { #DSTACK_SERVER_CLOUDWATCH_LOG_REGION } +###### DSTACK_SERVER_CLOUDWATCH_LOG_REGION { #DSTACK_SERVER_CLOUDWATCH_LOG_REGION } The CloudWatch Logs region. Defaults to `None`. -##### DSTACK_DEFAULT_SERVICE_CLIENT_MAX_BODY_SIZE { #DSTACK_DEFAULT_SERVICE_CLIENT_MAX_BODY_SIZE } +###### DSTACK_DEFAULT_SERVICE_CLIENT_MAX_BODY_SIZE { #DSTACK_DEFAULT_SERVICE_CLIENT_MAX_BODY_SIZE } Request body size limit for services, in bytes. Defaults to 64 MiB. -##### DSTACK_FORBID_SERVICES_WITHOUT_GATEWAY { #DSTACK_FORBID_SERVICES_WITHOUT_GATEWAY } +###### DSTACK_FORBID_SERVICES_WITHOUT_GATEWAY { #DSTACK_FORBID_SERVICES_WITHOUT_GATEWAY } Forbids registering new services without a gateway if set to any value. @@ -172,7 +172,7 @@ Forbids registering new services without a gateway if set to any value. The following environment variables are supported by the CLI. -##### DSTACK_CLI_LOG_LEVEL { #DSTACK_CLI_LOG_LEVEL } +###### DSTACK_CLI_LOG_LEVEL { #DSTACK_CLI_LOG_LEVEL } Configures CLI logging level. Defaults to `INFO`. @@ -186,6 +186,6 @@ $ DSTACK_CLI_LOG_LEVEL=debug dstack apply -f .dstack.yml
-##### DSTACK_PROJECT { #DSTACK_PROJECT } +###### DSTACK_PROJECT { #DSTACK_PROJECT } Has the same effect as `--project`. Defaults to `None`. diff --git a/docs/docs/reference/server/config.yml.md b/docs/docs/reference/server/config.yml.md index 20916b5ed..b329dd843 100644 --- a/docs/docs/reference/server/config.yml.md +++ b/docs/docs/reference/server/config.yml.md @@ -1,931 +1,7 @@ # ~/.dstack/server/config.yml The `~/.dstack/server/config.yml` file is used -to [configure](../../installation/index.md#1-configure-backends) the `dstack` server cloud accounts -and other sever-level settings such as encryption. - -## Configure backends { #backends } - -> The `dstack` server allows you to configure backends for multiple projects. -> If you don't need multiple projects, use only the `main` project. - -Each cloud account must be configured under the `backends` property of the respective project. -See the examples below. - -### Cloud providers { #clouds } - -#### AWS - -There are two ways to configure AWS: using an access key or using the default credentials. - -=== "Default credentials" - - If you have default credentials set up (e.g. in `~/.aws/credentials`), configure the backend like this: - -
- - ```yaml - projects: - - name: main - backends: - - type: aws - creds: - type: default - ``` - -
- -=== "Access key" - - Create an access key by following the [this guide :material-arrow-top-right-thin:{ .external }](https://docs.aws.amazon.com/cli/latest/userguide/cli-authentication-user.html#cli-authentication-user-get). - Once you've downloaded the `.csv` file with your IAM user's Access key ID and Secret access key, proceed to - configure the backend. - -
- - ```yaml - projects: - - name: main - backends: - - type: aws - creds: - type: access_key - access_key: KKAAUKLIZ5EHKICAOASV - secret_key: pn158lMqSBJiySwpQ9ubwmI6VUU3/W2fdJdFwfgO - ``` - -
- -??? info "Required permissions" - The following AWS policy permissions are sufficient for `dstack` to work: - - ``` - { - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "ec2:AttachVolume", - "ec2:AuthorizeSecurityGroupEgress", - "ec2:AuthorizeSecurityGroupIngress", - "ec2:CreatePlacementGroup", - "ec2:CancelSpotInstanceRequests", - "ec2:CreateSecurityGroup", - "ec2:CreateTags", - "ec2:CreateVolume", - "ec2:DeletePlacementGroup", - "ec2:DeleteVolume", - "ec2:DescribeAvailabilityZones", - "ec2:DescribeCapacityReservations" - "ec2:DescribeImages", - "ec2:DescribeInstances", - "ec2:DescribeInstanceAttribute", - "ec2:DescribeInstanceTypes", - "ec2:DescribeRouteTables", - "ec2:DescribeSecurityGroups", - "ec2:DescribeSubnets", - "ec2:DescribeVpcs", - "ec2:DescribeVolumes", - "ec2:DetachVolume", - "ec2:RunInstances", - "ec2:TerminateInstances" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "servicequotas:ListServiceQuotas", - "servicequotas:GetServiceQuota" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "elasticloadbalancing:CreateLoadBalancer", - "elasticloadbalancing:CreateTargetGroup", - "elasticloadbalancing:CreateListener", - "elasticloadbalancing:RegisterTargets", - "elasticloadbalancing:AddTags", - "elasticloadbalancing:DeleteLoadBalancer", - "elasticloadbalancing:DeleteTargetGroup", - "elasticloadbalancing:DeleteListener", - "elasticloadbalancing:DeregisterTargets" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "acm:DescribeCertificate", - "acm:ListCertificates" - ], - "Resource": "*" - } - ] - } - ``` - - The `elasticloadbalancing:*` and `acm:*` permissions are only needed for provisioning gateways with ACM (AWS Certificate Manager) certificates. - -??? info "VPC" - By default, `dstack` uses the default VPC. It's possible to customize it: - - === "vpc_name" - - ```yaml - projects: - - name: main - backends: - - type: aws - creds: - type: default - - vpc_name: my-vpc - ``` - - === "vpc_ids" - ```yaml - projects: - - name: main - backends: - - type: aws - creds: - type: default - - default_vpcs: true - vpc_ids: - us-east-1: vpc-0a2b3c4d5e6f7g8h - us-east-2: vpc-9i8h7g6f5e4d3c2b - us-west-1: vpc-4d3c2b1a0f9e8d7 - ``` - - For the regions without configured `vpc_ids`, enable default VPCs by setting `default_vpcs` to `true`. - -??? info "Private subnets" - By default, `dstack` provisions instances with public IPs and permits inbound SSH traffic. - If you want `dstack` to use private subnets and provision instances without public IPs, set `public_ips` to `false`. - - ```yaml - projects: - - name: main - backends: - - type: aws - creds: - type: default - - public_ips: false - ``` - - Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets. - Additionally, private subnets must have outbound internet connectivity provided by NAT Gateway, Transit Gateway, or other mechanism. - -??? info "OS images" - By default, `dstack` uses its own [AMI :material-arrow-top-right-thin:{ .external }](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) - optimized for `dstack`. - To use your own or other third-party images, set the `os_images` property: - - ```yaml - projects: - - name: main - backends: - - type: aws - creds: - type: default - - os_images: - cpu: - name: my-ami-for-cpu-instances - owner: self - user: dstack - nvidia: - name: 'Some ThirdParty CUDA image' - owner: 123456789012 - user: ubuntu - ``` - - Here, both `cpu` and `nvidia` properties are optional, but if the property is not set, you won´t be able to use the corresponding instance types. - - The `name` is an AMI name. - The `owner` is either an AWS account ID (a 12-digit number) or a special value `self` indicating the current account. - The `user` specifies an OS user for instance provisioning. - - !!! info "Image requirements" - * SSH server listening on port 22 - * `user` with passwordless sudo access - * Docker is installed - * (For NVIDIA instances) NVIDIA/CUDA drivers and NVIDIA Container Toolkit are installed - -#### Azure - -There are two ways to configure Azure: using a client secret or using the default credentials. - -=== "Default credentials" - - If you have default credentials set up, configure the backend like this: - -
- - ```yaml - projects: - - name: main - backends: - - type: azure - subscription_id: 06c82ce3-28ff-4285-a146-c5e981a9d808 - tenant_id: f84a7584-88e4-4fd2-8e97-623f0a715ee1 - creds: - type: default - ``` - -
- - If you don't know your `subscription_id` and `tenant_id`, use [Azure CLI :material-arrow-top-right-thin:{ .external }](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli): - - ```shell - az account show --query "{subscription_id: id, tenant_id: tenantId}" - ``` - -=== "Client secret" - - A client secret can be created using the [Azure CLI :material-arrow-top-right-thin:{ .external }](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli): - - ```shell - SUBSCRIPTION_ID=... - az ad sp create-for-rbac - --name dstack-app \ - --role $DSTACK_ROLE \ - --scopes /subscriptions/$SUBSCRIPTION_ID \ - --query "{ tenant_id: tenant, client_id: appId, client_secret: password }" - ``` - - Once you have `tenant_id`, `client_id`, and `client_secret`, go ahead and configure the backend. - -
- - ```yaml - projects: - - name: main - backends: - - type: azure - subscription_id: 06c82ce3-28ff-4285-a146-c5e981a9d808 - tenant_id: f84a7584-88e4-4fd2-8e97-623f0a715ee1 - creds: - type: client - client_id: acf3f73a-597b-46b6-98d9-748d75018ed0 - client_secret: 1Kb8Q~o3Q2hdEvrul9yaj5DJDFkuL3RG7lger2VQ - ``` - -
- - If you don't know your `subscription_id`, use [Azure CLI :material-arrow-top-right-thin:{ .external }](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli): - - ```shell - az account show --query "{subscription_id: id}" - ``` - -??? info "Required permissions" - The following Azure permissions are sufficient for `dstack` to work: - - ```json - { - "properties": { - "roleName": "dstack-role", - "description": "Minimal required permissions for using Azure with dstack", - "assignableScopes": [ - "/subscriptions/${YOUR_SUBSCRIPTION_ID}" - ], - "permissions": [ - { - "actions": [ - "Microsoft.Authorization/*/read", - "Microsoft.Compute/availabilitySets/*", - "Microsoft.Compute/locations/*", - "Microsoft.Compute/virtualMachines/*", - "Microsoft.Compute/virtualMachineScaleSets/*", - "Microsoft.Compute/cloudServices/*", - "Microsoft.Compute/disks/write", - "Microsoft.Compute/disks/read", - "Microsoft.Compute/disks/delete", - "Microsoft.Network/networkSecurityGroups/*", - "Microsoft.Network/locations/*", - "Microsoft.Network/virtualNetworks/*", - "Microsoft.Network/networkInterfaces/*", - "Microsoft.Network/publicIPAddresses/*", - "Microsoft.Resources/subscriptions/resourceGroups/read", - "Microsoft.Resources/subscriptions/resourceGroups/write", - "Microsoft.Resources/subscriptions/read" - ], - "notActions": [], - "dataActions": [], - "notDataActions": [] - } - ] - } - } - ``` - -??? info "VPC" - By default, `dstack` creates new Azure networks and subnets for every configured region. - It's possible to use custom networks by specifying `vpc_ids`: - - ```yaml - projects: - - name: main - backends: - - type: azure - creds: - type: default - regions: [westeurope] - vpc_ids: - westeurope: myNetworkResourceGroup/myNetworkName - ``` - - -??? info "Private subnets" - By default, `dstack` provisions instances with public IPs and permits inbound SSH traffic. - If you want `dstack` to use private subnets and provision instances without public IPs, - specify custom networks using `vpc_ids` and set `public_ips` to `false`. - - ```yaml - projects: - - name: main - backends: - - type: azure - creds: - type: default - regions: [westeurope] - vpc_ids: - westeurope: myNetworkResourceGroup/myNetworkName - public_ips: false - ``` - - Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets. - Additionally, private subnets must have outbound internet connectivity provided by [NAT Gateway or other mechanism](https://learn.microsoft.com/en-us/azure/nat-gateway/nat-overview). - -#### GCP - -There are two ways to configure GCP: using a service account or using the default credentials. - -=== "Default credentials" - - Enable GCP application default credentials: - - ```shell - gcloud auth application-default login - ``` - - Then configure the backend like this: - -
- - ```yaml - projects: - - name: main - backends: - - type: gcp - project_id: gcp-project-id - creds: - type: default - ``` - -
- -=== "Service account" - - To create a service account, follow [this guide :material-arrow-top-right-thin:{ .external }](https://cloud.google.com/iam/docs/service-accounts-create). After setting up the service account [create a key :material-arrow-top-right-thin:{ .external }](https://cloud.google.com/iam/docs/keys-create-delete) for it and download the corresponding JSON file. - - Then go ahead and configure the backend by specifying the downloaded file path. - -
- - ```yaml - projects: - - name: main - backends: - - type: gcp - project_id: gcp-project-id - creds: - type: service_account - filename: ~/.dstack/server/gcp-024ed630eab5.json - ``` - -
- -If you don't know your GCP project ID, use [Google Cloud CLI :material-arrow-top-right-thin:{ .external }](https://cloud.google.com/sdk/docs/install-sdk): - -```shell -gcloud projects list --format="json(projectId)" -``` - -??? info "Required permissions" - The following GCP permissions are sufficient for `dstack` to work: - - ``` - compute.disks.create - compute.disks.delete - compute.disks.get - compute.disks.list - compute.disks.setLabels - compute.disks.use - compute.firewalls.create - compute.images.useReadOnly - compute.instances.attachDisk - compute.instances.create - compute.instances.delete - compute.instances.detachDisk - compute.instances.get - compute.instances.setLabels - compute.instances.setMetadata - compute.instances.setServiceAccount - compute.instances.setTags - compute.networks.get - compute.networks.updatePolicy - compute.regions.get - compute.regions.list - compute.routers.list - compute.subnetworks.list - compute.subnetworks.use - compute.subnetworks.useExternalIp - compute.zoneOperations.get - ``` - - If you plan to use TPUs, additional permissions are required: - - ``` - tpu.nodes.create - tpu.nodes.get - tpu.nodes.update - tpu.nodes.delete - tpu.operations.get - tpu.operations.list - ``` - - Also, the use of TPUs requires the `serviceAccountUser` role. - For TPU VMs, dstack will use the default service account. - -??? info "Required APIs" - First, ensure the required APIs are enabled in your GCP `project_id`. - - ```shell - PROJECT_ID=... - gcloud config set project $PROJECT_ID - gcloud services enable cloudapis.googleapis.com - gcloud services enable compute.googleapis.com - ``` - -??? info "VPC" - - === "VPC" - -
- - ```yaml - projects: - - name: main - backends: - - type: gcp - project_id: gcp-project-id - creds: - type: default - - vpc_name: my-custom-vpc - ``` - -
- - === "Shared VPC" - -
- - ```yaml - projects: - - name: main - backends: - - type: gcp - project_id: gcp-project-id - creds: - type: default - - vpc_name: my-custom-vpc - vpc_project_id: another-project-id - ``` - -
- - When using a Shared VPC, ensure there is a firewall rule allowing `INGRESS` traffic on port `22`. - You can limit this rule to `dstack` instances using the `dstack-runner-instance` target tag. - - When using GCP gateways with a Shared VPC, also ensure there is a firewall rule allowing `INGRESS` traffic on ports `22`, `80`, `443`. - You can limit this rule to `dstack` gateway instances using the `dstack-gateway-instance` target tag. - - To use TPUs with a Shared VPC, you need to grant the TPU Service Account in your service project permissions - to manage resources in the host project by granting the "TPU Shared VPC Agent" (roles/tpu.xpnAgent) role - ([more in the GCP docs](https://cloud.google.com/tpu/docs/shared-vpc-networks#vpc-shared-vpc)). - -??? info "Private subnets" - By default, `dstack` provisions instances with public IPs and permits inbound SSH traffic. - If you want `dstack` to use private subnets and provision instances without public IPs, set `public_ips` to `false`. - - ```yaml - projects: - - name: main - backends: - - type: gcp - creds: - type: default - - public_ips: false - ``` - - Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets. - Additionally, [Cloud NAT](https://cloud.google.com/nat/docs/overview) must be configured to provide access to external resources for provisioned instances. - -#### Lambda - -Log into your [Lambda Cloud :material-arrow-top-right-thin:{ .external }](https://lambdalabs.com/service/gpu-cloud) account, click API keys in the sidebar, and then click the `Generate API key` -button to create a new API key. - -Then, go ahead and configure the backend: - -
- -```yaml -projects: -- name: main - backends: - - type: lambda - creds: - type: api_key - api_key: eersct_yrpiey-naaeedst-tk-_cb6ba38e1128464aea9bcc619e4ba2a5.iijPMi07obgt6TZ87v5qAEj61RVxhd0p -``` - -
- -#### RunPod - -Log into your [RunPod :material-arrow-top-right-thin:{ .external }](https://www.runpod.io/console/) console, click Settings in the sidebar, expand the `API Keys` section, and click -the button to create a Read & Write key. - -Then proceed to configuring the backend. - -
- -```yaml -projects: - - name: main - backends: - - type: runpod - creds: - type: api_key - api_key: US9XTPDIV8AR42MMINY8TCKRB8S4E7LNRQ6CAUQ9 -``` - -
- -#### Vast.ai - -Log into your [Vast.ai :material-arrow-top-right-thin:{ .external }](https://cloud.vast.ai/) account, click Account in the sidebar, and copy your -API Key. - -Then, go ahead and configure the backend: - -
- -```yaml -projects: -- name: main - backends: - - type: vastai - creds: - type: api_key - api_key: d75789f22f1908e0527c78a283b523dd73051c8c7d05456516fc91e9d4efd8c5 -``` - -
- -Also, the `vastai` backend supports on-demand instances only. Spot instance support coming soon. - -#### TensorDock - -Log into your [TensorDock :material-arrow-top-right-thin:{ .external }](https://dashboard.tensordock.com/) account, click Developers in the sidebar, and use the `Create an Authorization` section to create a new authorization key. - -Then, go ahead and configure the backend: - -
- -```yaml -projects: - - name: main - backends: - - type: tensordock - creds: - type: api_key - api_key: 248e621d-9317-7494-dc1557fa5825b-98b - api_token: FyBI3YbnFEYXdth2xqYRnQI7hiusssBC -``` - -
- -The `tensordock` backend supports on-demand instances only. Spot instance support coming soon. - -#### CUDO - -Log into your [CUDO Compute :material-arrow-top-right-thin:{ .external }](https://compute.cudo.org/) account, click API keys in the sidebar, and click the `Create an API key` button. - -Ensure you've created a project with CUDO Compute, then proceed to configuring the backend. - -
- -```yaml -projects: - - name: main - backends: - - type: cudo - project_id: my-cudo-project - creds: - type: api_key - api_key: 7487240a466624b48de22865589 -``` - -
- -#### OCI - -There are two ways to configure OCI: using client credentials or using the default credentials. - -=== "Default credentials" - If you have default credentials set up in `~/.oci/config`, configure the backend like this: - -
- - ```yaml - projects: - - name: main - backends: - - type: oci - creds: - type: default - ``` - -
- -=== "Client credentials" - - Log into the [OCI Console :material-arrow-top-right-thin:{ .external }](https://cloud.oracle.com), go to `My profile`, - select `API keys`, and click `Add API key`. - - Once you add a key, you'll see the configuration file. Copy its values to configure the backend as follows: - -
- - ```yaml - projects: - - name: main - backends: - - type: oci - creds: - type: client - user: ocid1.user.oc1..g5vlaeqfu47akmaafq665xsgmyaqjktyfxtacfxc4ftjxuca7aohnd2ev66m - tenancy: ocid1.tenancy.oc1..ajqsftvk4qarcfaak3ha4ycdsaahxmaita5frdwg3tqo2bcokpd3n7oizwai - region: eu-frankfurt-1 - fingerprint: 77:32:77:00:49:7c:cb:56:84:75:8e:77:96:7d:53:17 - key_file: ~/.oci/private_key.pem - ``` - -
- - Make sure to include either the path to your private key via `key_file` or the contents of the key via `key_content`. - -??? info "Required permissions" - - This is an example of a restrictive policy for a group of `dstack` users: - - ``` - Allow group to read compartments in tenancy where target.compartment.name = '' - Allow group to read marketplace-community-listings in compartment - Allow group to manage app-catalog-listing in compartment - Allow group to manage instances in compartment - Allow group to manage compute-capacity-reports in compartment - Allow group to manage volumes in compartment - Allow group to manage volume-attachments in compartment - Allow group to manage virtual-network-family in compartment - ``` - - To use this policy, create a compartment for `dstack` and specify it in `~/.dstack/server/config.yml`. - - ```yaml - projects: - - name: main - backends: - - type: oci - creds: - type: default - compartment_id: ocid1.compartment.oc1..aaaaaaaa - ``` - -#### DataCrunch - -Log into your [DataCrunch :material-arrow-top-right-thin:{ .external }](https://cloud.datacrunch.io/) account, click Keys in the sidebar, find `REST API Credentials` area and then click the `Generate Credentials` button. - -Then, go ahead and configure the backend: - -
- -```yaml -projects: - - name: main - backends: - - type: datacrunch - creds: - type: api_key - client_id: xfaHBqYEsArqhKWX-e52x3HH7w8T - client_secret: B5ZU5Qx9Nt8oGMlmMhNI3iglK8bjMhagTbylZy4WzncZe39995f7Vxh8 -``` - -
- -### On-prem servers { #on-prem } - -#### SSH fleets - -> For using `dstack` with on-prem servers, no backend configuration is required. -See [SSH fleets](../../concepts/fleets.md#ssh-fleets) for more details. - -#### Kubernetes - -To configure a Kubernetes backend, specify the path to the kubeconfig file, -and the port that `dstack` can use for proxying SSH traffic. -In case of a self-managed cluster, also specify the IP address of any node in the cluster. - -[//]: # (TODO: Mention that the Kind context has to be selected via `current-context` ) - -=== "Self-managed" - - Here's how to configure the backend to use a self-managed cluster. - -
- - ```yaml - projects: - - name: main - backends: - - type: kubernetes - kubeconfig: - filename: ~/.kube/config - networking: - ssh_host: localhost # The external IP address of any node - ssh_port: 32000 # Any port accessible outside of the cluster - ``` - -
- - The port specified to `ssh_port` must be accessible outside of the cluster. - - ??? info "Kind" - If you are using [Kind](https://kind.sigs.k8s.io/), make sure to make - to set up `ssh_port` via `extraPortMappings` for proxying SSH traffic: - - ```yaml - kind: Cluster - apiVersion: kind.x-k8s.io/v1alpha4 - nodes: - - role: control-plane - extraPortMappings: - - containerPort: 32000 # Must be same as `ssh_port` - hostPort: 32000 # Must be same as `ssh_port` - ``` - - Go ahead and create the cluster like this: - - ```shell - kind create cluster --config examples/misc/kubernetes/kind-config.yml - ``` - -[//]: # (TODO: Elaborate on the Kind's IP address on Linux) - -=== "Managed" - Here's how to configure the backend to use a managed cluster (AWS, GCP, Azure). - -
- - ```yaml - projects: - - name: main - backends: - - type: kubernetes - kubeconfig: - filename: ~/.kube/config - networking: - ssh_port: 32000 # Any port accessible outside of the cluster - ``` - -
- - The port specified to `ssh_port` must be accessible outside of the cluster. - - ??? info "EKS" - For example, if you are using EKS, make sure to add it via an ingress rule - of the corresponding security group: - - ```shell - aws ec2 authorize-security-group-ingress --group-id --protocol tcp --port 32000 --cidr 0.0.0.0/0 - ``` - -[//]: # (TODO: Elaborate on gateways, and what backends allow configuring them) - -[//]: # (TODO: Should we automatically detect ~/.kube/config) - -??? info "NVIDIA GPU Operator" - To use GPUs with Kubernetes, the cluster must be installed with the - [NVIDIA GPU Operator :material-arrow-top-right-thin:{ .external }](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/index.html). - - [//]: # (TODO: Provide short yet clear instructions. Elaborate on whether it works with Kind.) - -## Enable encryption { #encryption } - -By default, `dstack` stores data in plaintext. To enforce encryption, you -specify one or more encryption keys. - -`dstack` currently supports AES and identity (plaintext) encryption keys. -Support for external providers like HashiCorp Vault and AWS KMS is planned. - -=== "AES" - The `aes` encryption key encrypts data using [AES-256](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) in GCM mode. - To configure the `aes` encryption, generate a random 32-byte key: - -
- - ```shell - $ head -c 32 /dev/urandom | base64 - - opmx+r5xGJNVZeErnR0+n+ElF9ajzde37uggELxL - ``` - -
- - And specify it as `secret`: - - ```yaml - encryption: - keys: - - type: aes - name: key1 - secret: opmx+r5xGJNVZeErnR0+n+ElF9ajzde37uggELxL - ``` - -=== "Identity" - The `identity` encryption performs no encryption and stores data in plaintext. - You can specify an `identity` encryption key explicitly if you want to decrypt the data: - - ```yaml - encryption: - keys: - - type: identity - - type: aes - name: key1 - secret: opmx+r5xGJNVZeErnR0+n+ElF9ajzde37uggELxL - ``` - - With this configuration, the `aes` key will still be used to decrypt the old data, - but new writes will store the data in plaintext. - -??? info "Key rotation" - If multiple keys are specified, the first is used for encryption, and all are tried for decryption. This enables key - rotation by specifying a new encryption key. - - ```yaml - encryption: - keys: - - type: aes - name: key2 - secret: cR2r1JmkPyL6edBQeHKz6ZBjCfS2oWk87Gc2G3wHVoA= - - - type: aes - name: key1 - secret: E5yzN6V3XvBq/f085ISWFCdgnOGED0kuFaAkASlmmO4= - ``` - - Old keys may be deleted once all existing records have been updated to re-encrypt sensitive data. - Encrypted values are prefixed with key names, allowing DB admins to identify the keys used for encryption. - -[//]: # (## Default permissions) - -[//]: # (`dstack` supports changing default permissions. For example, by default all users) -[//]: # (can create and manage their own projects. You can specify `default_permissions`) -[//]: # (so that only global admins can create and manage projects:) - -[//]: # (
) - -[//]: # (```yaml) -[//]: # (default_permissions:) -[//]: # ( allow_non_admins_create_projects: false) -[//]: # (```) - -[//]: # (
) - -See the [reference table](#default-permissions) for all configurable permissions. +to configure [backends](../../concepts/backends.md) and other [sever-level settings](../../guides/server-deployment.md). ## Root reference @@ -933,7 +9,7 @@ See the [reference table](#default-permissions) for all configurable permissions overrides: show_root_heading: false -## `projects[n]` { #_projects data-toc-label="projects" } +### `projects[n]` { #projects data-toc-label="projects" } #SCHEMA# dstack._internal.server.services.config.ProjectConfig overrides: @@ -941,7 +17,9 @@ See the [reference table](#default-permissions) for all configurable permissions backends: type: 'Union[AWSConfigInfoWithCreds, AzureConfigInfoWithCreds, GCPConfigInfoWithCreds, LambdaConfigInfoWithCreds, TensorDockConfigInfoWithCreds, VastAIConfigInfoWithCreds, KubernetesConfig]' -## `projects[n].backends[type=aws]` { #_aws data-toc-label="backends[type=aws]" } +#### `projects[n].backends` { #backends data-toc-label="backends" } + +##### `projects[n].backends[type=aws]` { #aws data-toc-label="aws" } #SCHEMA# dstack._internal.server.services.config.AWSConfig overrides: @@ -950,7 +28,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: aws- -## `projects[n].backends[type=aws].creds` { #_aws-creds data-toc-label="backends[type=aws].creds" } +###### `projects[n].backends[type=aws].creds` { #aws-creds data-toc-label="creds" } === "Access key" #SCHEMA# dstack._internal.core.models.backends.aws.AWSAccessKeyCreds @@ -966,15 +44,24 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=aws].os_images` { #_aws-os-images data-toc-label="backends[type=aws].os_images" } +###### `projects[n].backends[type=aws].os_images` { #aws-os_images data-toc-label="os_images" } #SCHEMA# dstack._internal.core.models.backends.aws.AWSOSImageConfig + overrides: + show_root_heading: false + type: + required: true + item_id_prefix: aws-os_images- + +###### `projects[n].backends[type=aws].os_images.cpu` { #aws-os_images-cpu data-toc-label="cpu" } + +#SCHEMA# dstack._internal.core.models.backends.aws.AWSOSImage overrides: show_root_heading: false type: required: true -## `projects[n].backends[type=aws].os_images.*` { #_aws-os-image data-toc-label="backends[type=aws].os_images.*" } +###### `projects[n].backends[type=aws].os_images.nvidia` { #aws-os_images-nvidia data-toc-label="nvidia" } #SCHEMA# dstack._internal.core.models.backends.aws.AWSOSImage overrides: @@ -982,7 +69,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=azure]` { #_azure data-toc-label="backends[type=azure]" } +##### `projects[n].backends[type=azure]` { #azure data-toc-label="azure" } #SCHEMA# dstack._internal.server.services.config.AzureConfig overrides: @@ -991,7 +78,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: azure- -## `projects[n].backends[type=azure].creds` { #_azure-creds data-toc-label="backends[type=azure].creds" } +###### `projects[n].backends[type=azure].creds` { #azure-creds data-toc-label="creds" } === "Client" #SCHEMA# dstack._internal.core.models.backends.azure.AzureClientCreds @@ -1007,7 +94,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=gcp]` { #_gcp data-toc-label="backends[type=gcp]" } +##### `projects[n].backends[type=gcp]` { #gcp data-toc-label="gcp" } #SCHEMA# dstack._internal.server.services.config.GCPConfig overrides: @@ -1016,7 +103,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: gcp- -## `projects[n].backends[type=gcp].creds` { #_gcp-creds data-toc-label="backends[type=gcp].creds" } +###### `projects[n].backends[type=gcp].creds` { #gcp-creds data-toc-label="creds" } === "Service account" #SCHEMA# dstack._internal.server.services.config.GCPServiceAccountCreds @@ -1039,7 +126,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=lambda]` { #_lambda data-toc-label="backends[type=lambda]" } +##### `projects[n].backends[type=lambda]` { #lambda data-toc-label="lambda" } #SCHEMA# dstack._internal.server.services.config.LambdaConfig overrides: @@ -1048,7 +135,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: lambda- -## `projects[n].backends[type=lambda].creds` { #_lambda-creds data-toc-label="backends[type=lambda].creds" } +###### `projects[n].backends[type=lambda].creds` { #lambda-creds data-toc-label="creds" } #SCHEMA# dstack._internal.core.models.backends.lambdalabs.LambdaAPIKeyCreds overrides: @@ -1056,7 +143,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=runpod]` { #_runpod data-toc-label="backends[type=runpod]" } +###### `projects[n].backends[type=runpod]` { #runpod data-toc-label="runpod" } #SCHEMA# dstack._internal.server.services.config.RunpodConfig overrides: @@ -1065,7 +152,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: runpod- -## `projects[n].backends[type=runpod].creds` { #_runpod-creds data-toc-label="backends[type=runpod].creds" } +###### `projects[n].backends[type=runpod].creds` { #runpod-creds data-toc-label="creds" } #SCHEMA# dstack._internal.core.models.backends.runpod.RunpodAPIKeyCreds overrides: @@ -1073,7 +160,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=vastai]` { #_vastai data-toc-label="backends[type=vastai]" } +###### `projects[n].backends[type=vastai]` { #vastai data-toc-label="vastai" } #SCHEMA# dstack._internal.server.services.config.VastAIConfig overrides: @@ -1082,7 +169,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: vastai- -## `projects[n].backends[type=vastai].creds` { #_vastai-creds data-toc-label="backends[type=vastai].creds" } +###### `projects[n].backends[type=vastai].creds` { #vastai-creds data-toc-label="creds" } #SCHEMA# dstack._internal.core.models.backends.vastai.VastAIAPIKeyCreds overrides: @@ -1090,7 +177,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=tensordock]` { #_tensordock data-toc-label="backends[type=tensordock]" } +##### `projects[n].backends[type=tensordock]` { #tensordock data-toc-label="tensordock" } #SCHEMA# dstack._internal.server.services.config.TensorDockConfig overrides: @@ -1099,7 +186,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: tensordock- -## `projects[n].backends[type=tensordock].creds` { #_tensordock-creds data-toc-label="backends[type=tensordock].creds" } +###### `projects[n].backends[type=tensordock].creds` { #tensordock-creds data-toc-label="creds" } #SCHEMA# dstack._internal.core.models.backends.tensordock.TensorDockAPIKeyCreds overrides: @@ -1107,7 +194,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=oci]` { #_oci data-toc-label="backends[type=oci]" } +##### `projects[n].backends[type=oci]` { #oci data-toc-label="oci" } #SCHEMA# dstack._internal.server.services.config.OCIConfig overrides: @@ -1116,7 +203,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: oci- -## `projects[n].backends[type=oci].creds` { #_oci-creds data-toc-label="backends[type=oci].creds" } +###### `projects[n].backends[type=oci].creds` { #oci-creds data-toc-label="creds" } === "Client" #SCHEMA# dstack._internal.core.models.backends.oci.OCIClientCreds @@ -1132,7 +219,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=cudo]` { #_cudo data-toc-label="backends[type=cudo]" } +##### `projects[n].backends[type=cudo]` { #cudo data-toc-label="cudo" } #SCHEMA# dstack._internal.server.services.config.CudoConfig overrides: @@ -1141,7 +228,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: cudo- -## `projects[n].backends[type=cudo].creds` { #_cudo-creds data-toc-label="backends[type=cudo].creds" } +###### `projects[n].backends[type=cudo].creds` { #cudo-creds data-toc-label="creds" } #SCHEMA# dstack._internal.core.models.backends.cudo.CudoAPIKeyCreds overrides: @@ -1149,7 +236,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=datacrunch]` { #_datacrunch data-toc-label="backends[type=datacrunch]" } +##### `projects[n].backends[type=datacrunch]` { #datacrunch data-toc-label="datacrunch" } #SCHEMA# dstack._internal.server.services.config.DataCrunchConfig overrides: @@ -1158,7 +245,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: datacrunch- -## `projects[n].backends[type=datacrunch].creds` { #_datacrunch-creds data-toc-label="backends[type=datacrunch].creds" } +###### `projects[n].backends[type=datacrunch].creds` { #datacrunch-creds data-toc-label="creds" } #SCHEMA# dstack._internal.core.models.backends.datacrunch.DataCrunchAPIKeyCreds overrides: @@ -1166,15 +253,16 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=kubernetes]` { #_kubernetes data-toc-label="backends[type=kubernetes]" } +##### `projects[n].backends[type=kubernetes]` { #kubernetes data-toc-label="kubernetes" } #SCHEMA# dstack._internal.server.services.config.KubernetesConfig overrides: show_root_heading: false type: required: true + item_id_prefix: kubernetes- -## `projects[n].backends[type=kubernetes].kubeconfig` { #_kubeconfig data-toc-label="kubeconfig" } +###### `projects[n].backends[type=kubernetes].kubeconfig` { #kubernetes-kubeconfig data-toc-label="kubeconfig" } ##SCHEMA# dstack._internal.server.services.config.KubeconfigConfig overrides: @@ -1187,19 +275,21 @@ See the [reference table](#default-permissions) for all configurable permissions cat my-service-account-file.json | jq -c | jq -R ``` -## `projects[n].backends[type=kubernetes].networking` { #_networking data-toc-label="networking" } +###### `projects[n].backends[type=kubernetes].networking` { #kuberentes-networking data-toc-label="networking" } ##SCHEMA# dstack._internal.core.models.backends.kubernetes.KubernetesNetworkingConfig overrides: show_root_heading: false -## `encryption` { #_encryption data-toc-label="encryption" } +### `encryption` { #encryption data-toc-label="encryption" } #SCHEMA# dstack._internal.server.services.config.EncryptionConfig overrides: show_root_heading: false -## `encryption.keys[n][type=identity]` { #_encryption-keys-identity data-toc-label="encryption.keys.identity" } +#### `encryption.keys` { #encryption-keys data-toc-label="keys" } + +##### `encryption.keys[n][type=identity]` { #encryption-keys-identity data-toc-label="identity" } #SCHEMA# dstack._internal.server.services.encryption.keys.identity.IdentityEncryptionKeyConfig overrides: @@ -1207,7 +297,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `encryption.keys[n][type=aes]` { #_encryption-keys-aes data-toc-label="encryption.keys.aes" } +##### `encryption.keys[n][type=aes]` { #encryption-keys-aes data-toc-label="aes" } #SCHEMA# dstack._internal.server.services.encryption.keys.aes.AESEncryptionKeyConfig overrides: @@ -1215,8 +305,97 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `default_permissions` { #_default-permissions data-toc-label="default-permissions" } +### `default_permissions` { #default_permissions data-toc-label="default_permissions" } #SCHEMA# dstack._internal.server.services.permissions.DefaultPermissions overrides: show_root_heading: false + +## Examples + +> The `dstack` server allows you to configure backends for multiple projects. +> If you don't need multiple projects, use only the `main` project. + +### Encryption keys { #examples-encryption } + +By default, `dstack` stores data in plaintext. To enforce encryption, you +specify one or more encryption keys. + +`dstack` currently supports AES and identity (plaintext) encryption keys. +Support for external providers like HashiCorp Vault and AWS KMS is planned. + +=== "AES" + The `aes` encryption key encrypts data using [AES-256](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) in GCM mode. + To configure the `aes` encryption, generate a random 32-byte key: + +
+ + ```shell + $ head -c 32 /dev/urandom | base64 + + opmx+r5xGJNVZeErnR0+n+ElF9ajzde37uggELxL + ``` + +
+ + And specify it as `secret`: + + ```yaml + encryption: + keys: + - type: aes + name: key1 + secret: opmx+r5xGJNVZeErnR0+n+ElF9ajzde37uggELxL + ``` + +=== "Identity" + The `identity` encryption performs no encryption and stores data in plaintext. + You can specify an `identity` encryption key explicitly if you want to decrypt the data: + + ```yaml + encryption: + keys: + - type: identity + - type: aes + name: key1 + secret: opmx+r5xGJNVZeErnR0+n+ElF9ajzde37uggELxL + ``` + + With this configuration, the `aes` key will still be used to decrypt the old data, + but new writes will store the data in plaintext. + +??? info "Key rotation" + If multiple keys are specified, the first is used for encryption, and all are tried for decryption. This enables key + rotation by specifying a new encryption key. + + ```yaml + encryption: + keys: + - type: aes + name: key2 + secret: cR2r1JmkPyL6edBQeHKz6ZBjCfS2oWk87Gc2G3wHVoA= + + - type: aes + name: key1 + secret: E5yzN6V3XvBq/f085ISWFCdgnOGED0kuFaAkASlmmO4= + ``` + + Old keys may be deleted once all existing records have been updated to re-encrypt sensitive data. + Encrypted values are prefixed with key names, allowing DB admins to identify the keys used for encryption. + +[//]: # (## Default permissions) + +[//]: # (`dstack` supports changing default permissions. For example, by default all users) +[//]: # (can create and manage their own projects. You can specify `default_permissions`) +[//]: # (so that only global admins can create and manage projects:) + +[//]: # (
) + +[//]: # (```yaml) +[//]: # (default_permissions:) +[//]: # ( allow_non_admins_create_projects: false) +[//]: # (```) + +[//]: # (
) + +See the [reference table](#default-permissions) for all configurable permissions. \ No newline at end of file diff --git a/docs/docs/services.md b/docs/docs/services.md deleted file mode 100644 index 4a1599a84..000000000 --- a/docs/docs/services.md +++ /dev/null @@ -1,202 +0,0 @@ -# Services - -Services allow you to deploy models or any web app as a secure and scalable endpoint. - -When running models, services provide access through the unified OpenAI-compatible endpoint. - -## Define a configuration - -First, define a service configuration as a YAML file in your project folder. -The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `dev.dstack.yml` are both acceptable). - -
- -```yaml -type: service -name: llama31 - -# If `image` is not specified, dstack uses its default image -python: "3.11" -env: - - HF_TOKEN - - MODEL_ID=meta-llama/Meta-Llama-3.1-8B-Instruct - - MAX_MODEL_LEN=4096 -commands: - - pip install vllm - - vllm serve $MODEL_ID - --max-model-len $MAX_MODEL_LEN - --tensor-parallel-size $DSTACK_GPUS_NUM -port: 8000 -# Register the model -model: meta-llama/Meta-Llama-3.1-8B-Instruct - -# Uncomment to leverage spot instances -#spot_policy: auto - -resources: - gpu: 24GB -``` - -
- -Note, the `model` property is optional and not needed when deploying a non-OpenAI-compatible model or a regular web app. - -!!! info "Docker image" - If you don't specify your Docker image, `dstack` uses the [base](https://hub.docker.com/r/dstackai/base/tags) image - pre-configured with Python, Conda, and essential CUDA drivers. - -!!! info "Gateway" - To enable [auto-scaling](reference/dstack.yml/service.md#auto-scaling), or use a custom domain with HTTPS, - set up a [gateway](concepts/gateways.md) before running the service. - If you're using [dstack Sky :material-arrow-top-right-thin:{ .external }](https://sky.dstack.ai){:target="_blank"}, - a gateway is pre-configured for you. - -!!! info "Reference" - See [.dstack.yml](reference/dstack.yml/service.md) for all the options supported by - services, along with multiple examples. - -## Run a service - -To run a service, pass the configuration to [`dstack apply`](reference/cli/dstack/apply.md): - -
- -```shell -$ HF_TOKEN=... -$ dstack apply -f service.dstack.yml - - # BACKEND REGION RESOURCES SPOT PRICE - 1 runpod CA-MTL-1 18xCPU, 100GB, A5000:24GB:2 yes $0.22 - 2 runpod EU-SE-1 18xCPU, 100GB, A5000:24GB:2 yes $0.22 - 3 gcp us-west4 27xCPU, 150GB, A5000:24GB:3 yes $0.33 - -Submit the run llama31? [y/n]: y - -Provisioning... ----> 100% - -Service is published at: - http://localhost:3000/proxy/services/main/llama31/ -Model meta-llama/Meta-Llama-3.1-8B-Instruct is published at: - http://localhost:3000/proxy/models/main/ -``` - -
- -`dstack apply` automatically provisions instances, uploads the contents of the repo (incl. your local uncommitted changes), -and runs the service. - -## Access the endpoint - -### Service - -If a [gateway](concepts/gateways.md) is not configured, the service’s endpoint will be accessible at -`/proxy/services///`. -If a [gateway](concepts/gateways.md) is configured, the service endpoint will be accessible at -`https://.`. - -
- -```shell -$ curl http://localhost:3000/proxy/services/main/llama31/v1/chat/completions \ - -H 'Content-Type: application/json' \ - -H 'Authorization: Bearer <dstack token>' \ - -d '{ - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", - "messages": [ - { - "role": "user", - "content": "Compose a poem that explains the concept of recursion in programming." - } - ] - }' -``` - -
- -!!! info "Auth" - By default, the service endpoint requires the `Authorization` header with `Bearer `. - Authorization can be disabled by setting [`auth`](reference/dstack.yml/service.md#authorization) to `false` in the - service configuration file. - -### Model - -If the service defines the `model` property, the model can be accessed with -the OpenAI-compatible endpoint at `/proxy/models//`, -or via the control plane UI's playground. - -When a [gateway](concepts/gateways.md) is configured, the OpenAI-compatible endpoint is available at `https://gateway./`. - -## Manage runs - -### List runs - -The [`dstack ps`](reference/cli/dstack/ps.md) command lists all running jobs and their statuses. -Use `--watch` (or `-w`) to monitor the live status of runs. - -### Stop a run - -A service runs until you stop it or its lifetime exceeds [`max_duration`](reference/dstack.yml/dev-environment.md#max_duration). -To gracefully stop a service, use [`dstack stop`](reference/cli/dstack/stop.md). -Pass `--abort` or `-x` to stop without waiting for a graceful shutdown. - -### Attach to a run - -By default, `dstack apply` runs in attached mode – it establishes the SSH tunnel to the run, forwards ports, and shows real-time logs. -If you detached from a run, you can reattach to it using [`dstack attach`](reference/cli/dstack/attach.md). - -### See run logs - -To see the logs of a run without attaching, use [`dstack logs`](reference/cli/dstack/logs.md). -Pass `--diagnose`/`-d` to `dstack logs` to see the diagnostics logs. It may be useful if a run fails. -For more information on debugging failed runs, see the [troubleshooting](guides/troubleshooting.md) guide. - -## Manage fleets - -Fleets are groups of cloud instances or SSH machines that you use to run dev environments, tasks, and services. -You can let `dstack apply` provision fleets or [create and manage them directly](concepts/fleets.md). - -### Creation policy - -By default, when you run `dstack apply` with a dev environment, task, or service, -`dstack` reuses `idle` instances from an existing [fleet](concepts/fleets.md). -If no `idle` instances match the requirements, it automatically creates a new fleet -using backends. - -To ensure `dstack apply` doesn't create a new fleet but reuses an existing one, -pass `-R` (or `--reuse`) to `dstack apply`. - -
- -```shell -$ dstack apply -R -f examples/.dstack.yml -``` - -
- -Alternatively, set [`creation_policy`](reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. - -### Idle duration - -If a fleet is created automatically, it stays `idle` for 5 minutes by default and can be reused within that time. -If the fleet is not reused within this period, it is automatically terminated. -To change the default idle duration, set -[`idle_duration`](reference/dstack.yml/fleet.md#idle_duration) in the run configuration (e.g., `0s`, `1m`, or `off` for -unlimited). - -!!! info "Fleets" - For greater control over fleet provisioning, configuration, and lifecycle management, it is recommended to use - [fleets](concepts/fleets.md) directly. - -## What's next? - -1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), and [repos](concepts/repos.md) -2. Learn how to manage [fleets](concepts/fleets.md) -3. See how to set up [gateways](concepts/gateways.md) -4. Check the [TGI :material-arrow-top-right-thin:{ .external }](/examples/deployment/tgi/){:target="_blank"}, - [vLLM :material-arrow-top-right-thin:{ .external }](/examples/deployment/vllm/){:target="_blank"}, and - [NIM :material-arrow-top-right-thin:{ .external }](/examples/deployment/nim/){:target="_blank"} examples - -!!! info "Reference" - See [.dstack.yml](reference/dstack.yml/service.md) for all the options supported by - services, along with multiple examples. diff --git a/docs/docs/tasks.md b/docs/docs/tasks.md deleted file mode 100644 index 53521a05d..000000000 --- a/docs/docs/tasks.md +++ /dev/null @@ -1,164 +0,0 @@ -# Tasks - -A task allows you to run arbitrary commands on one or more nodes. -They are best suited for one-off jobs like training or batch processing, -but can also be used for serving apps if features supported by [services](`services.md`) are not required. - -## Define a configuration - -First, define a task configuration as a YAML file in your project folder. -The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `dev.dstack.yml` are both acceptable). - -[//]: # (TODO: Make tabs - single machine & distributed tasks & web app) - -
- -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: axolotl-train - -# Using the official Axolotl's Docker image -image: winglian/axolotl-cloud:main-20240429-py3.11-cu121-2.2.1 - -# Required environment variables -env: - - HF_TOKEN - - WANDB_API_KEY -# Commands of the task -commands: - - accelerate launch -m axolotl.cli.train examples/fine-tuning/axolotl/config.yaml - -resources: - gpu: - # 24GB or more vRAM - memory: 24GB.. - # Two or more GPU - count: 2.. -``` - -
- -!!! info "Docker image" - If you don't specify your Docker image, `dstack` uses the [base](https://hub.docker.com/r/dstackai/base/tags) image - pre-configured with Python, Conda, and essential CUDA drivers. - -!!! info "Distributed tasks" - By default, tasks run on a single instance. However, you can specify - the [number of nodes](reference/dstack.yml/task.md#distributed-tasks). - In this case, the task will run on a cluster of instances. - -!!! info "Reference" - See [.dstack.yml](reference/dstack.yml/task.md) for all the options supported by - tasks, along with multiple examples. - -## Run a configuration - -To run a task, pass the configuration to [`dstack apply`](reference/cli/dstack/apply.md): - -
- -```shell -$ HF_TOKEN=... -$ WANDB_API_KEY=... -$ dstack apply -f examples/.dstack.yml - - # BACKEND REGION RESOURCES SPOT PRICE - 1 runpod CA-MTL-1 18xCPU, 100GB, A5000:24GB:2 yes $0.22 - 2 runpod EU-SE-1 18xCPU, 100GB, A5000:24GB:2 yes $0.22 - 3 gcp us-west4 27xCPU, 150GB, A5000:24GB:3 yes $0.33 - -Submit the run axolotl-train? [y/n]: y - -Launching `axolotl-train`... ----> 100% - -{'loss': 1.4967, 'grad_norm': 1.2734375, 'learning_rate': 1.0000000000000002e-06, 'epoch': 0.0} - 0% 1/24680 [00:13<95:34:17, 13.94s/it] - 6% 73/1300 [00:48<13:57, 1.47it/s] -``` - -
- -`dstack apply` automatically provisions instances, uploads the contents of the repo (incl. your local uncommitted changes), -and runs the commands. - -!!! info "Ports" - If the task specifies [`ports`](reference/dstack.yml/task.md#_ports), `dstack apply` automatically forwards them to your - local machine for convenient and secure access. - -!!! info "Queueing tasks" - By default, if `dstack apply` cannot find capacity, the task fails. - To queue the task and wait for capacity, specify the [`retry`](reference/dstack.yml/task.md#queueing-tasks) - property in the task configuration. - -## Manage runs - -### List runs - -The [`dstack ps`](reference/cli/dstack/ps.md) command lists all running jobs and their statuses. -Use `--watch` (or `-w`) to monitor the live status of runs. - -### Stop a run - -A task runs until it's completed or its lifetime exceeds [`max_duration`](reference/dstack.yml/dev-environment.md#max_duration). -You can also gracefully stop a task using [`dstack stop`](reference/cli/dstack/stop.md). -Pass `--abort` or `-x` to stop without waiting for a graceful shutdown. - -### Attach to a run - -By default, `dstack apply` runs in attached mode – it establishes the SSH tunnel to the run, forwards ports, and shows real-time logs. -If you detached from a run, you can reattach to it using [`dstack attach`](reference/cli/dstack/attach.md). - -### See run logs - -To see the logs of a run without attaching, use [`dstack logs`](reference/cli/dstack/logs.md). -Pass `--diagnose`/`-d` to `dstack logs` to see the diagnostics logs. It may be useful if a run fails. -For more information on debugging failed runs, see the [troubleshooting](guides/troubleshooting.md) guide. - -## Manage fleets - -Fleets are groups of cloud instances or SSH machines that you use to run dev environments, tasks, and services. -You can let `dstack apply` provision fleets or [create and manage them directly](concepts/fleets.md). - -### Creation policy - -By default, when you run `dstack apply` with a dev environment, task, or service, -`dstack` reuses `idle` instances from an existing [fleet](concepts/fleets.md). -If no `idle` instances match the requirements, `dstack` automatically creates a new fleet -using configured backends. - -To ensure `dstack apply` doesn't create a new fleet but reuses an existing one, -pass `-R` (or `--reuse`) to `dstack apply`. - -
- -```shell -$ dstack apply -R -f examples/.dstack.yml -``` - -
- -Alternatively, set [`creation_policy`](reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. - -### Idle duration - -If a fleet is created automatically, it stays `idle` for 5 minutes by default and can be reused within that time. -If the fleet is not reused within this period, it is automatically terminated. -To change the default idle duration, set -[`idle_duration`](reference/dstack.yml/fleet.md#idle_duration) in the run configuration (e.g., `0s`, `1m`, or `off` for -unlimited). - -!!! info "Fleets" - For greater control over fleet provisioning, configuration, and lifecycle management, it is recommended to use - [fleets](concepts/fleets.md) directly. - -## What's next? - -1. Read about [dev environments](dev-environments.md), [services](services.md), and [repos](concepts/repos.md) -2. Learn how to manage [fleets](concepts/fleets.md) -3. Check the [Axolotl](/examples/fine-tuning/axolotl) example - -!!! info "Reference" - See [.dstack.yml](reference/dstack.yml/task.md) for all the options supported by - tasks, along with multiple examples. diff --git a/docs/overrides/home.html b/docs/overrides/home.html index b6099eb53..4a73df03b 100644 --- a/docs/overrides/home.html +++ b/docs/overrides/home.html @@ -395,7 +395,7 @@

Get started in under a minute

- +
SSH fleets
@@ -604,4 +604,4 @@

FAQ

-{% endblock %} +{% endblock %} \ No newline at end of file diff --git a/docs/overrides/main.html b/docs/overrides/main.html index f824ceb57..14110a795 100644 --- a/docs/overrides/main.html +++ b/docs/overrides/main.html @@ -134,4 +134,31 @@
+{% endblock %} + +{% block site_nav %} + {% if nav %} + {% if page.meta and page.meta.hide %} + {% set hidden = "hidden" if "navigation" in page.meta.hide %} + {% endif %} + + {% endif %} + {% if "toc.integrate" not in features %} + {% if page.meta and page.meta.hide %} + {% set hidden = "hidden" if "toc" in page.meta.hide %} + {% endif %} + + {% endif %} {% endblock %} \ No newline at end of file diff --git a/docs/overrides/toc-item.html b/docs/overrides/toc-item.html new file mode 100644 index 000000000..a4618bcbd --- /dev/null +++ b/docs/overrides/toc-item.html @@ -0,0 +1,25 @@ +{#- + This file was automatically generated - do not edit +-#} +
  • + + + {% if toc_item.typeset %} + + {{ toc_item.typeset.title }} + + {% else %} + {{ toc_item.title }} + {% endif %} + + + {% if toc_item.children %} + + {% endif %} +
  • diff --git a/docs/overrides/toc.html b/docs/overrides/toc.html new file mode 100644 index 000000000..577f4988a --- /dev/null +++ b/docs/overrides/toc.html @@ -0,0 +1,25 @@ +{#- + This file was automatically generated - do not edit +-#} +{% set title = lang.t("toc") %} +{% if config.mdx_configs.toc and config.mdx_configs.toc.title %} + {% set title = config.mdx_configs.toc.title %} +{% endif %} + diff --git a/examples/accelerators/amd/README.md b/examples/accelerators/amd/README.md index bb0dd67db..7abad7f19 100644 --- a/examples/accelerators/amd/README.md +++ b/examples/accelerators/amd/README.md @@ -1,7 +1,7 @@ # AMD `dstack` supports running dev environments, tasks, and services on AMD GPUs. -You can do that by setting up an [SSH fleet](https://dstack.ai/docs/concepts/fleets#ssh-fleets) +You can do that by setting up an [SSH fleet](https://dstack.ai/docs/concepts/fleets#ssh) with on-prem AMD GPUs or configuring a backend that offers AMD GPUs such as the `runpod` backend. ## Deployment diff --git a/examples/fine-tuning/alignment-handbook/fleet-distrib.dstack.yml b/examples/fine-tuning/alignment-handbook/fleet-distrib.dstack.yml index 94ac80bbc..6cc965fe7 100644 --- a/examples/fine-tuning/alignment-handbook/fleet-distrib.dstack.yml +++ b/examples/fine-tuning/alignment-handbook/fleet-distrib.dstack.yml @@ -10,7 +10,7 @@ placement: cluster # Uncomment to leverage spot instances #spot_policy: auto # Terminate instances if not used for one hour -termination_idle_time: 1h +idle_duration: 1h resources: gpu: diff --git a/examples/fine-tuning/alignment-handbook/fleet.dstack.yml b/examples/fine-tuning/alignment-handbook/fleet.dstack.yml index 7caad34ee..d7480b86b 100644 --- a/examples/fine-tuning/alignment-handbook/fleet.dstack.yml +++ b/examples/fine-tuning/alignment-handbook/fleet.dstack.yml @@ -8,7 +8,7 @@ nodes: 1 # Uncomment to leverage spot instances #spot_policy: auto # Terminate the instance if not used for one hour -termination_idle_time: 1h +idle_duration: 1h resources: gpu: diff --git a/mkdocs.yml b/mkdocs.yml index 448c24ef9..162e3868d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -109,9 +109,10 @@ plugins: 'changelog/0.16.1.md': 'https://github.com/dstackai/dstack/releases/0.16.1' 'changelog/0.17.0.md': 'https://github.com/dstackai/dstack/releases/0.17.0' 'changelog/0.18.0.md': 'https://github.com/dstackai/dstack/releases/0.18.0' - 'docs/concepts/dev-environments.md': 'docs/dev-environments.md' - 'docs/concepts/tasks.md': 'docs/tasks.md' - 'docs/concepts/services.md': 'docs/services.md' + 'docs/concepts/projects.md': 'docs/guides/administration.md' + 'docs/dev-environments.md': 'docs/concepts/dev-environments.md' + 'docs/tasks.md': 'docs/concepts/tasks.md' + 'docs/services.md': 'docs/concepts/services.md' 'docs/fleets.md': 'docs/concepts/fleets.md' 'docs/examples/index.md': 'examples.md' 'docs/examples/llms/llama31.md': 'examples/llms/llama31/index.md' @@ -156,6 +157,8 @@ markdown_extensions: - pymdownx.highlight - pymdownx.details - pymdownx.superfences + - pymdownx.snippets: + base_path: ["docs"] - pymdownx.tabbed: alternate_style: true slugify: !!python/object/apply:pymdownx.slugs.slugify @@ -164,7 +167,7 @@ markdown_extensions: - pymdownx.tasklist: custom_checkbox: true - toc: - toc_depth: 3 + toc_depth: 5 permalink: true - attr_list - md_in_html @@ -208,18 +211,19 @@ nav: - Installation: docs/installation/index.md - Quickstart: docs/quickstart.md - Concepts: - - Dev environments: docs/dev-environments.md - - Tasks: docs/tasks.md - - Services: docs/services.md - - Repos: docs/concepts/repos.md + - Backends: docs/concepts/backends.md + - Dev environments: docs/concepts/dev-environments.md + - Tasks: docs/concepts/tasks.md + - Services: docs/concepts/services.md - Fleets: docs/concepts/fleets.md - Volumes: docs/concepts/volumes.md - Gateways: docs/concepts/gateways.md - - Projects: docs/concepts/projects.md + - Repos: docs/concepts/repos.md - Guides: - Protips: docs/guides/protips.md - Server deployment: docs/guides/server-deployment.md - Troubleshooting: docs/guides/troubleshooting.md + - Administration: docs/guides/administration.md - Reference: - .dstack.yml: - dev-environment: docs/reference/dstack.yml/dev-environment.md diff --git a/scripts/docs/gen_schema_reference.py b/scripts/docs/gen_schema_reference.py index 382bb4ae4..4337c0a76 100644 --- a/scripts/docs/gen_schema_reference.py +++ b/scripts/docs/gen_schema_reference.py @@ -77,11 +77,11 @@ def generate_schema_reference( if field_type: if field.annotation.__name__ == "Annotated": if field_type.__name__ == "Optional": - field_type = get_args(get_args(field.annotation)[0])[0] + field_type = get_args(field_type)[0] if field_type.__name__ == "List": - field_type = get_args(get_args(field.annotation)[0])[0] + field_type = get_args(field_type)[0] if field_type.__name__ == "Union": - field_type = get_args(get_args(field.annotation)[0])[0] + field_type = get_args(field_type)[0] base_model = ( inspect.isclass(field_type) and issubclass(field_type, BaseModel) @@ -122,7 +122,7 @@ def generate_schema_reference( prefix + " ".join( [ - f"#### {item_header}", + f"###### {item_header}", "-", item_optional_marker, item_description, diff --git a/src/dstack/_internal/core/models/gateways.py b/src/dstack/_internal/core/models/gateways.py index 71b859866..db39ba3c1 100644 --- a/src/dstack/_internal/core/models/gateways.py +++ b/src/dstack/_internal/core/models/gateways.py @@ -132,7 +132,9 @@ class TGIChatModel(BaseChatModel): eos_token (Optional[str]): The custom end of sentence token. If not specified, the default end of sentence token from the HuggingFace Hub configuration will be used. """ - format: Annotated[Literal["tgi"], Field(description="The serving format")] + format: Annotated[ + Literal["tgi"], Field(description="The serving format. Must be set to `tgi`") + ] chat_template: Annotated[ Optional[str], Field( @@ -166,7 +168,9 @@ class OpenAIChatModel(BaseChatModel): prefix (str): The `base_url` prefix: `http://hostname/{prefix}/chat/completions`. Defaults to `/v1`. """ - format: Annotated[Literal["openai"], Field(description="The serving format")] + format: Annotated[ + Literal["openai"], Field(description="The serving format. Must be set to `openai`") + ] prefix: Annotated[str, Field(description="The `base_url` prefix (after hostname)")] = "/v1"