From 49519ad792b2ac534e22dd0770a6b3d4fa4b3eb0 Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Tue, 31 Dec 2024 23:58:01 +0100 Subject: [PATCH 01/13] [Docs] Many docs improvements #2170 --- docs/assets/stylesheets/extra.css | 22 +- .../say-goodbye-to-managed-notebooks.md | 2 +- docs/blog/posts/amd-on-runpod.md | 4 +- docs/blog/posts/dstack-sky.md | 4 +- docs/blog/posts/tpu-on-gcp.md | 4 +- docs/blog/posts/volumes-on-runpod.md | 8 +- docs/docs/concepts/backends.md | 847 +++++++++++++ docs/docs/{ => concepts}/dev-environments.md | 34 +- docs/docs/concepts/fleets.md | 4 +- docs/docs/concepts/gateways.md | 4 +- docs/docs/concepts/repos.md | 2 +- docs/docs/{ => concepts}/services.md | 52 +- docs/docs/{ => concepts}/tasks.md | 38 +- .../projects.md => guides/administration.md} | 2 +- docs/docs/guides/protips.md | 4 +- docs/docs/guides/server-deployment.md | 2 +- docs/docs/guides/troubleshooting.md | 2 +- docs/docs/index.md | 6 +- docs/docs/installation/index.md | 6 +- docs/docs/quickstart.md | 15 +- .../reference/dstack.yml/dev-environment.md | 173 ++- docs/docs/reference/dstack.yml/fleet.md | 109 +- docs/docs/reference/dstack.yml/gateway.md | 56 +- docs/docs/reference/dstack.yml/service.md | 299 +++-- docs/docs/reference/dstack.yml/task.md | 185 ++- docs/docs/reference/dstack.yml/volume.md | 25 +- .../reference/misc/environment-variables.md | 42 +- docs/docs/reference/server/config.yml.md | 1086 ++--------------- docs/overrides/home.html | 2 +- docs/overrides/main.html | 27 + docs/overrides/toc-item.html | 25 + docs/overrides/toc.html | 25 + mkdocs.yml | 18 +- scripts/docs/gen_schema_reference.py | 8 +- src/dstack/_internal/core/models/gateways.py | 8 +- 35 files changed, 1614 insertions(+), 1536 deletions(-) create mode 100644 docs/docs/concepts/backends.md rename docs/docs/{ => concepts}/dev-environments.md (78%) rename docs/docs/{ => concepts}/services.md (71%) rename docs/docs/{ => concepts}/tasks.md (76%) rename docs/docs/{concepts/projects.md => guides/administration.md} (99%) create mode 100644 docs/overrides/toc-item.html create mode 100644 docs/overrides/toc.html diff --git a/docs/assets/stylesheets/extra.css b/docs/assets/stylesheets/extra.css index 345c88a56..6f33b5eb2 100644 --- a/docs/assets/stylesheets/extra.css +++ b/docs/assets/stylesheets/extra.css @@ -445,7 +445,7 @@ color: rgba(0,0,0,0.87); } -.md-typeset :not(pre) :is(h1, h2, h3, h4) > code { +.md-typeset :not(pre) :is(h1, h2, h3, h4, h5, h6) > code { color: inherit; background: inherit; padding: 0; @@ -455,7 +455,7 @@ h4.doc-heading { font-size: inherit; } -.md-typeset :not(pre, h1, h2, h3, h4) > code { +.md-typeset :not(pre, h1, h2, h3, h4, h5, h6) > code { background-color: rgba(163, 68, 215, 0.05); /*border: 1px solid #dce0e6;*/ border-radius: 2px; @@ -467,27 +467,27 @@ h4.doc-heading { margin: 0 4px; } -.md-typeset :is(h1, h2, h3, h4) > code { +.md-typeset :is(h1, h2, h3, h4, h5, h6) > code { background-color: inherit; color: inherit; /*padding: 0; margin: 0;*/ } -.md-typeset :is(h1, h2, h3, h4) > a > code { +.md-typeset :is(h1, h2, h3, h4, h5, h6) > a > code { font-size: inherit; color: inherit; } -.md-typeset :is(table) :not(pre, h1, h2, h3, h4) > code { +.md-typeset :is(table) :not(pre, h1, h2, h3, h4, h5, h6) > code { font-size: .85em; } -.md-typeset :not(pre, h1, h2, h3, h4) > code { +.md-typeset :not(pre, h1, h2, h3, h4, h5, h6) > code { font-size: 0.65rem; } -.md-typeset :not(pre, h1, h2, h3, h4) > a code { +.md-typeset :not(pre, h1, h2, h3, h4, h5, h6) > a code { color: #ce00ff; } @@ -639,7 +639,7 @@ code .md-code__nav:hover .md-code__button { /*letter-spacing: 0;*/ } -.md-typeset h1, .md-typeset h2, .md-typeset h3, .md-typeset h4, .md-typeset h5 { +.md-typeset h1, .md-typeset h2, .md-typeset h3, .md-typeset h4, .md-typeset h5, .md-typeset h6 { font-weight: 800; letter-spacing: -1px; color: rgb(0, 0, 0); @@ -654,6 +654,10 @@ code .md-code__nav:hover .md-code__button { font-size: 17px; } +.md-typeset h6 { + font-size: 15px; +} + .md-typeset h3 { font-size: 21.5px; margin-block-end: 0; @@ -1198,7 +1202,7 @@ html .md-footer-meta.md-typeset a:is(:focus,:hover) { margin: 1em .8rem; } - .md-typeset .tabbed-block :is(h1, h2, h3, h4, h5) { + .md-typeset .tabbed-block :is(h1, h2, h3, h4, h5, h6) { margin-left: .8rem; } diff --git a/docs/blog/archive/say-goodbye-to-managed-notebooks.md b/docs/blog/archive/say-goodbye-to-managed-notebooks.md index 1ce6338bd..55e00e1c0 100644 --- a/docs/blog/archive/say-goodbye-to-managed-notebooks.md +++ b/docs/blog/archive/say-goodbye-to-managed-notebooks.md @@ -98,4 +98,4 @@ You can securely access the cloud development environment with the desktop IDE o ![](../../assets/images/dstack-vscode-jupyter.png){ width=800 } !!! info "Learn more" - Check out our [guide](../../docs/dev-environments.md) for running dev environments in your cloud. \ No newline at end of file + Check out our [guide](../../docs/concepts/dev-environments.md) for running dev environments in your cloud. \ No newline at end of file diff --git a/docs/blog/posts/amd-on-runpod.md b/docs/blog/posts/amd-on-runpod.md index 8b524778a..5bfbee693 100644 --- a/docs/blog/posts/amd-on-runpod.md +++ b/docs/blog/posts/amd-on-runpod.md @@ -39,7 +39,7 @@ you can now specify an AMD GPU under `resources`. Below are a few examples. ## Configuration === "Service" - Here's an example of a [service](../../docs/services.md) that deploys + Here's an example of a [service](../../docs/concepts/services.md) that deploys Llama 3.1 70B in FP16 using [TGI :material-arrow-top-right-thin:{ .external }](https://huggingface.co/docs/text-generation-inference/en/installation_amd){:target="_blank"}.
@@ -71,7 +71,7 @@ you can now specify an AMD GPU under `resources`. Below are a few examples.
=== "Dev environment" - Here's an example of a [dev environment](../../docs/dev-environments.md) using + Here's an example of a [dev environment](../../docs/concepts/dev-environments.md) using [TGI :material-arrow-top-right-thin:{ .external }](https://huggingface.co/docs/text-generation-inference/en/installation_amd){:target="_blank"}'s Docker image: diff --git a/docs/blog/posts/dstack-sky.md b/docs/blog/posts/dstack-sky.md index 1c21532b5..61e49bbcc 100644 --- a/docs/blog/posts/dstack-sky.md +++ b/docs/blog/posts/dstack-sky.md @@ -73,8 +73,8 @@ Continue? [y/n]: You can use both on-demand and spot instances without needing to manage quotas, as they are automatically handled for you. -With `dstack Sky` you can use all of `dstack`'s features, incl. [dev environments](../../docs/dev-environments.md), -[tasks](../../docs/tasks.md), [services](../../docs/services.md), and +With `dstack Sky` you can use all of `dstack`'s features, incl. [dev environments](../../docs/concepts/dev-environments.md), +[tasks](../../docs/concepts/tasks.md), [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md). To publish services, the open-source version requires setting up a gateway with your own domain. diff --git a/docs/blog/posts/tpu-on-gcp.md b/docs/blog/posts/tpu-on-gcp.md index a3ee0afb2..765ccb808 100644 --- a/docs/blog/posts/tpu-on-gcp.md +++ b/docs/blog/posts/tpu-on-gcp.md @@ -211,8 +211,8 @@ Note, `v5litepod` is optimized for fine-tuning transformer-based models. Each co 1. Browse [Optimum TPU :material-arrow-top-right-thin:{ .external }](https://github.com/huggingface/optimum-tpu){:target="_blank"}, [Optimum TPU TGI :material-arrow-top-right-thin:{ .external }](https://github.com/huggingface/optimum-tpu/tree/main/text-generation-inference){:target="_blank"} and [vLLM :material-arrow-top-right-thin:{ .external }](https://docs.vllm.ai/en/latest/getting_started/tpu-installation.html){:target="_blank"}. -2. Check [dev environments](../../docs/dev-environments.md), [tasks](https://dstack.ai/docs/tasks), - [services](../../docs/services.md), and [fleets](../../docs/concepts/fleets.md). +2. Check [dev environments](../../docs/concepts/dev-environments.md), [tasks](https://dstack.ai/docs/tasks), + [services](../../docs/concepts/services.md), and [fleets](../../docs/concepts/fleets.md). !!! info "Multi-host TPUs" If you’d like to use `dstack` with more than eight TPU cores, upvote the corresponding diff --git a/docs/blog/posts/volumes-on-runpod.md b/docs/blog/posts/volumes-on-runpod.md index 4350d1052..485b43f34 100644 --- a/docs/blog/posts/volumes-on-runpod.md +++ b/docs/blog/posts/volumes-on-runpod.md @@ -18,7 +18,7 @@ deploying a model on RunPod. -Suppose you want to deploy Llama 3.1 on RunPod as a [service](../../docs/services.md): +Suppose you want to deploy Llama 3.1 on RunPod as a [service](../../docs/concepts/services.md):
@@ -115,7 +115,7 @@ env: commands: - text-generation-launcher port: 80 -# Register the mdoel +# Register the model model: meta-llama/Meta-Llama-3.1-8B-Instruct # Uncomment to leverage spot instances @@ -131,9 +131,9 @@ In this case, `dstack` attaches the specified volume to each new replica. This e once, reducing cold start time in proportion to the model size. A notable feature of RunPod is that volumes can be attached to multiple containers simultaneously. This capability is -particularly useful for autoscalable services or distributed tasks. +particularly useful for auto-scalable services or distributed tasks. Using [volumes](../../docs/concepts/volumes.md) not only optimizes inference cold start times but also enhances the efficiency of data and model checkpoint loading during training and fine-tuning. -Whether you're running [tasks](../../docs/tasks.md) or [dev environments](../../docs/dev-environments.md), leveraging +Whether you're running [tasks](../../docs/concepts/tasks.md) or [dev environments](../../docs/concepts/dev-environments.md), leveraging volumes can significantly streamline your workflow and improve overall performance. \ No newline at end of file diff --git a/docs/docs/concepts/backends.md b/docs/docs/concepts/backends.md new file mode 100644 index 000000000..936e16182 --- /dev/null +++ b/docs/docs/concepts/backends.md @@ -0,0 +1,847 @@ +# Backends + +`dstack` can provision and manage compute across a variety of providers. + +To use `dstack` with specific providers, configure backends in the +`~/.dstack/server/config.yml` file before starting the server. +Alternatively, you can configure them via the control plane UI once the server is up. + +Below are examples of how to configure backends for each provider. + +## Cloud providers + +### AWS + +There are two ways to configure AWS: using an access key or using the default credentials. + +=== "Default credentials" + + If you have default credentials set up (e.g. in `~/.aws/credentials`), configure the backend like this: + +
+ + ```yaml + projects: + - name: main + backends: + - type: aws + creds: + type: default + ``` + +
+ +=== "Access key" + + Create an access key by following the [this guide :material-arrow-top-right-thin:{ .external }](https://docs.aws.amazon.com/cli/latest/userguide/cli-authentication-user.html#cli-authentication-user-get). + Once you've downloaded the `.csv` file with your IAM user's Access key ID and Secret access key, proceed to + configure the backend. + +
+ + ```yaml + projects: + - name: main + backends: + - type: aws + creds: + type: access_key + access_key: KKAAUKLIZ5EHKICAOASV + secret_key: pn158lMqSBJiySwpQ9ubwmI6VUU3/W2fdJdFwfgO + ``` + +
+ +??? info "Required permissions" + The following AWS policy permissions are sufficient for `dstack` to work: + + ``` + { + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Action": [ + "ec2:AttachVolume", + "ec2:AuthorizeSecurityGroupEgress", + "ec2:AuthorizeSecurityGroupIngress", + "ec2:CreatePlacementGroup", + "ec2:CancelSpotInstanceRequests", + "ec2:CreateSecurityGroup", + "ec2:CreateTags", + "ec2:CreateVolume", + "ec2:DeletePlacementGroup", + "ec2:DeleteVolume", + "ec2:DescribeAvailabilityZones", + "ec2:DescribeCapacityReservations" + "ec2:DescribeImages", + "ec2:DescribeInstances", + "ec2:DescribeInstanceAttribute", + "ec2:DescribeInstanceTypes", + "ec2:DescribeRouteTables", + "ec2:DescribeSecurityGroups", + "ec2:DescribeSubnets", + "ec2:DescribeVpcs", + "ec2:DescribeVolumes", + "ec2:DetachVolume", + "ec2:RunInstances", + "ec2:TerminateInstances" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "servicequotas:ListServiceQuotas", + "servicequotas:GetServiceQuota" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "elasticloadbalancing:CreateLoadBalancer", + "elasticloadbalancing:CreateTargetGroup", + "elasticloadbalancing:CreateListener", + "elasticloadbalancing:RegisterTargets", + "elasticloadbalancing:AddTags", + "elasticloadbalancing:DeleteLoadBalancer", + "elasticloadbalancing:DeleteTargetGroup", + "elasticloadbalancing:DeleteListener", + "elasticloadbalancing:DeregisterTargets" + ], + "Resource": "*" + }, + { + "Effect": "Allow", + "Action": [ + "acm:DescribeCertificate", + "acm:ListCertificates" + ], + "Resource": "*" + } + ] + } + ``` + + The `elasticloadbalancing:*` and `acm:*` permissions are only needed for provisioning gateways with ACM (AWS Certificate Manager) certificates. + +??? info "VPC" + By default, `dstack` uses the default VPC. It's possible to customize it: + + === "vpc_name" + + ```yaml + projects: + - name: main + backends: + - type: aws + creds: + type: default + + vpc_name: my-vpc + ``` + + === "vpc_ids" + ```yaml + projects: + - name: main + backends: + - type: aws + creds: + type: default + + default_vpcs: true + vpc_ids: + us-east-1: vpc-0a2b3c4d5e6f7g8h + us-east-2: vpc-9i8h7g6f5e4d3c2b + us-west-1: vpc-4d3c2b1a0f9e8d7 + ``` + + For the regions without configured `vpc_ids`, enable default VPCs by setting `default_vpcs` to `true`. + +??? info "Private subnets" + By default, `dstack` provisions instances with public IPs and permits inbound SSH traffic. + If you want `dstack` to use private subnets and provision instances without public IPs, set `public_ips` to `false`. + + ```yaml + projects: + - name: main + backends: + - type: aws + creds: + type: default + + public_ips: false + ``` + + Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets. + Additionally, private subnets must have outbound internet connectivity provided by NAT Gateway, Transit Gateway, or other mechanism. + +??? info "OS images" + By default, `dstack` uses its own [AMI :material-arrow-top-right-thin:{ .external }](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) + optimized for `dstack`. + To use your own or other third-party images, set the `os_images` property: + + ```yaml + projects: + - name: main + backends: + - type: aws + creds: + type: default + + os_images: + cpu: + name: my-ami-for-cpu-instances + owner: self + user: dstack + nvidia: + name: 'Some ThirdParty CUDA image' + owner: 123456789012 + user: ubuntu + ``` + + Here, both `cpu` and `nvidia` properties are optional, but if the property is not set, you won´t be able to use the corresponding instance types. + + The `name` is an AMI name. + The `owner` is either an AWS account ID (a 12-digit number) or a special value `self` indicating the current account. + The `user` specifies an OS user for instance provisioning. + + !!! info "Image requirements" + * SSH server listening on port 22 + * `user` with passwordless sudo access + * Docker is installed + * (For NVIDIA instances) NVIDIA/CUDA drivers and NVIDIA Container Toolkit are installed + +### Azure + +There are two ways to configure Azure: using a client secret or using the default credentials. + +=== "Default credentials" + + If you have default credentials set up, configure the backend like this: + +
+ + ```yaml + projects: + - name: main + backends: + - type: azure + subscription_id: 06c82ce3-28ff-4285-a146-c5e981a9d808 + tenant_id: f84a7584-88e4-4fd2-8e97-623f0a715ee1 + creds: + type: default + ``` + +
+ + If you don't know your `subscription_id` and `tenant_id`, use [Azure CLI :material-arrow-top-right-thin:{ .external }](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli): + + ```shell + az account show --query "{subscription_id: id, tenant_id: tenantId}" + ``` + +=== "Client secret" + + A client secret can be created using the [Azure CLI :material-arrow-top-right-thin:{ .external }](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli): + + ```shell + SUBSCRIPTION_ID=... + az ad sp create-for-rbac + --name dstack-app \ + --role $DSTACK_ROLE \ + --scopes /subscriptions/$SUBSCRIPTION_ID \ + --query "{ tenant_id: tenant, client_id: appId, client_secret: password }" + ``` + + Once you have `tenant_id`, `client_id`, and `client_secret`, go ahead and configure the backend. + +
+ + ```yaml + projects: + - name: main + backends: + - type: azure + subscription_id: 06c82ce3-28ff-4285-a146-c5e981a9d808 + tenant_id: f84a7584-88e4-4fd2-8e97-623f0a715ee1 + creds: + type: client + client_id: acf3f73a-597b-46b6-98d9-748d75018ed0 + client_secret: 1Kb8Q~o3Q2hdEvrul9yaj5DJDFkuL3RG7lger2VQ + ``` + +
+ + If you don't know your `subscription_id`, use [Azure CLI :material-arrow-top-right-thin:{ .external }](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli): + + ```shell + az account show --query "{subscription_id: id}" + ``` + +??? info "Required permissions" + The following Azure permissions are sufficient for `dstack` to work: + + ```json + { + "properties": { + "roleName": "dstack-role", + "description": "Minimal required permissions for using Azure with dstack", + "assignableScopes": [ + "/subscriptions/${YOUR_SUBSCRIPTION_ID}" + ], + "permissions": [ + { + "actions": [ + "Microsoft.Authorization/*/read", + "Microsoft.Compute/availabilitySets/*", + "Microsoft.Compute/locations/*", + "Microsoft.Compute/virtualMachines/*", + "Microsoft.Compute/virtualMachineScaleSets/*", + "Microsoft.Compute/cloudServices/*", + "Microsoft.Compute/disks/write", + "Microsoft.Compute/disks/read", + "Microsoft.Compute/disks/delete", + "Microsoft.Network/networkSecurityGroups/*", + "Microsoft.Network/locations/*", + "Microsoft.Network/virtualNetworks/*", + "Microsoft.Network/networkInterfaces/*", + "Microsoft.Network/publicIPAddresses/*", + "Microsoft.Resources/subscriptions/resourceGroups/read", + "Microsoft.Resources/subscriptions/resourceGroups/write", + "Microsoft.Resources/subscriptions/read" + ], + "notActions": [], + "dataActions": [], + "notDataActions": [] + } + ] + } + } + ``` + +??? info "VPC" + By default, `dstack` creates new Azure networks and subnets for every configured region. + It's possible to use custom networks by specifying `vpc_ids`: + + ```yaml + projects: + - name: main + backends: + - type: azure + creds: + type: default + regions: [westeurope] + vpc_ids: + westeurope: myNetworkResourceGroup/myNetworkName + ``` + + +??? info "Private subnets" + By default, `dstack` provisions instances with public IPs and permits inbound SSH traffic. + If you want `dstack` to use private subnets and provision instances without public IPs, + specify custom networks using `vpc_ids` and set `public_ips` to `false`. + + ```yaml + projects: + - name: main + backends: + - type: azure + creds: + type: default + regions: [westeurope] + vpc_ids: + westeurope: myNetworkResourceGroup/myNetworkName + public_ips: false + ``` + + Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets. + Additionally, private subnets must have outbound internet connectivity provided by [NAT Gateway or other mechanism](https://learn.microsoft.com/en-us/azure/nat-gateway/nat-overview). + +### GCP + +There are two ways to configure GCP: using a service account or using the default credentials. + +=== "Default credentials" + + Enable GCP application default credentials: + + ```shell + gcloud auth application-default login + ``` + + Then configure the backend like this: + +
+ + ```yaml + projects: + - name: main + backends: + - type: gcp + project_id: gcp-project-id + creds: + type: default + ``` + +
+ +=== "Service account" + + To create a service account, follow [this guide :material-arrow-top-right-thin:{ .external }](https://cloud.google.com/iam/docs/service-accounts-create). After setting up the service account [create a key :material-arrow-top-right-thin:{ .external }](https://cloud.google.com/iam/docs/keys-create-delete) for it and download the corresponding JSON file. + + Then go ahead and configure the backend by specifying the downloaded file path. + +
+ + ```yaml + projects: + - name: main + backends: + - type: gcp + project_id: gcp-project-id + creds: + type: service_account + filename: ~/.dstack/server/gcp-024ed630eab5.json + ``` + +
+ +If you don't know your GCP project ID, use [Google Cloud CLI :material-arrow-top-right-thin:{ .external }](https://cloud.google.com/sdk/docs/install-sdk): + +```shell +gcloud projects list --format="json(projectId)" +``` + +??? info "Required permissions" + The following GCP permissions are sufficient for `dstack` to work: + + ``` + compute.disks.create + compute.disks.delete + compute.disks.get + compute.disks.list + compute.disks.setLabels + compute.disks.use + compute.firewalls.create + compute.images.useReadOnly + compute.instances.attachDisk + compute.instances.create + compute.instances.delete + compute.instances.detachDisk + compute.instances.get + compute.instances.setLabels + compute.instances.setMetadata + compute.instances.setServiceAccount + compute.instances.setTags + compute.networks.get + compute.networks.updatePolicy + compute.regions.get + compute.regions.list + compute.routers.list + compute.subnetworks.list + compute.subnetworks.use + compute.subnetworks.useExternalIp + compute.zoneOperations.get + ``` + + If you plan to use TPUs, additional permissions are required: + + ``` + tpu.nodes.create + tpu.nodes.get + tpu.nodes.update + tpu.nodes.delete + tpu.operations.get + tpu.operations.list + ``` + + Also, the use of TPUs requires the `serviceAccountUser` role. + For TPU VMs, dstack will use the default service account. + +??? info "Required APIs" + First, ensure the required APIs are enabled in your GCP `project_id`. + + ```shell + PROJECT_ID=... + gcloud config set project $PROJECT_ID + gcloud services enable cloudapis.googleapis.com + gcloud services enable compute.googleapis.com + ``` + +??? info "VPC" + + === "VPC" + +
+ + ```yaml + projects: + - name: main + backends: + - type: gcp + project_id: gcp-project-id + creds: + type: default + + vpc_name: my-custom-vpc + ``` + +
+ + === "Shared VPC" + +
+ + ```yaml + projects: + - name: main + backends: + - type: gcp + project_id: gcp-project-id + creds: + type: default + + vpc_name: my-custom-vpc + vpc_project_id: another-project-id + ``` + +
+ + When using a Shared VPC, ensure there is a firewall rule allowing `INGRESS` traffic on port `22`. + You can limit this rule to `dstack` instances using the `dstack-runner-instance` target tag. + + When using GCP gateways with a Shared VPC, also ensure there is a firewall rule allowing `INGRESS` traffic on ports `22`, `80`, `443`. + You can limit this rule to `dstack` gateway instances using the `dstack-gateway-instance` target tag. + + To use TPUs with a Shared VPC, you need to grant the TPU Service Account in your service project permissions + to manage resources in the host project by granting the "TPU Shared VPC Agent" (roles/tpu.xpnAgent) role + ([more in the GCP docs](https://cloud.google.com/tpu/docs/shared-vpc-networks#vpc-shared-vpc)). + +??? info "Private subnets" + By default, `dstack` provisions instances with public IPs and permits inbound SSH traffic. + If you want `dstack` to use private subnets and provision instances without public IPs, set `public_ips` to `false`. + + ```yaml + projects: + - name: main + backends: + - type: gcp + creds: + type: default + + public_ips: false + ``` + + Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets. + Additionally, [Cloud NAT](https://cloud.google.com/nat/docs/overview) must be configured to provide access to external resources for provisioned instances. + +### Lambda + +Log into your [Lambda Cloud :material-arrow-top-right-thin:{ .external }](https://lambdalabs.com/service/gpu-cloud) account, click API keys in the sidebar, and then click the `Generate API key` +button to create a new API key. + +Then, go ahead and configure the backend: + +
+ +```yaml +projects: +- name: main + backends: + - type: lambda + creds: + type: api_key + api_key: eersct_yrpiey-naaeedst-tk-_cb6ba38e1128464aea9bcc619e4ba2a5.iijPMi07obgt6TZ87v5qAEj61RVxhd0p +``` + +
+ +### RunPod + +Log into your [RunPod :material-arrow-top-right-thin:{ .external }](https://www.runpod.io/console/) console, click Settings in the sidebar, expand the `API Keys` section, and click +the button to create a Read & Write key. + +Then proceed to configuring the backend. + +
+ +```yaml +projects: + - name: main + backends: + - type: runpod + creds: + type: api_key + api_key: US9XTPDIV8AR42MMINY8TCKRB8S4E7LNRQ6CAUQ9 +``` + +
+ +### Vast.ai + +Log into your [Vast.ai :material-arrow-top-right-thin:{ .external }](https://cloud.vast.ai/) account, click Account in the sidebar, and copy your +API Key. + +Then, go ahead and configure the backend: + +
+ +```yaml +projects: +- name: main + backends: + - type: vastai + creds: + type: api_key + api_key: d75789f22f1908e0527c78a283b523dd73051c8c7d05456516fc91e9d4efd8c5 +``` + +
+ +Also, the `vastai` backend supports on-demand instances only. Spot instance support coming soon. + +### TensorDock + +Log into your [TensorDock :material-arrow-top-right-thin:{ .external }](https://dashboard.tensordock.com/) account, click Developers in the sidebar, and use the `Create an Authorization` section to create a new authorization key. + +Then, go ahead and configure the backend: + +
+ +```yaml +projects: + - name: main + backends: + - type: tensordock + creds: + type: api_key + api_key: 248e621d-9317-7494-dc1557fa5825b-98b + api_token: FyBI3YbnFEYXdth2xqYRnQI7hiusssBC +``` + +
+ +The `tensordock` backend supports on-demand instances only. Spot instance support coming soon. + +### CUDO + +Log into your [CUDO Compute :material-arrow-top-right-thin:{ .external }](https://compute.cudo.org/) account, click API keys in the sidebar, and click the `Create an API key` button. + +Ensure you've created a project with CUDO Compute, then proceed to configuring the backend. + +
+ +```yaml +projects: + - name: main + backends: + - type: cudo + project_id: my-cudo-project + creds: + type: api_key + api_key: 7487240a466624b48de22865589 +``` + +
+ +### OCI + +There are two ways to configure OCI: using client credentials or using the default credentials. + +=== "Default credentials" + If you have default credentials set up in `~/.oci/config`, configure the backend like this: + +
+ + ```yaml + projects: + - name: main + backends: + - type: oci + creds: + type: default + ``` + +
+ +=== "Client credentials" + + Log into the [OCI Console :material-arrow-top-right-thin:{ .external }](https://cloud.oracle.com), go to `My profile`, + select `API keys`, and click `Add API key`. + + Once you add a key, you'll see the configuration file. Copy its values to configure the backend as follows: + +
+ + ```yaml + projects: + - name: main + backends: + - type: oci + creds: + type: client + user: ocid1.user.oc1..g5vlaeqfu47akmaafq665xsgmyaqjktyfxtacfxc4ftjxuca7aohnd2ev66m + tenancy: ocid1.tenancy.oc1..ajqsftvk4qarcfaak3ha4ycdsaahxmaita5frdwg3tqo2bcokpd3n7oizwai + region: eu-frankfurt-1 + fingerprint: 77:32:77:00:49:7c:cb:56:84:75:8e:77:96:7d:53:17 + key_file: ~/.oci/private_key.pem + ``` + +
+ + Make sure to include either the path to your private key via `key_file` or the contents of the key via `key_content`. + +??? info "Required permissions" + + This is an example of a restrictive policy for a group of `dstack` users: + + ``` + Allow group to read compartments in tenancy where target.compartment.name = '' + Allow group to read marketplace-community-listings in compartment + Allow group to manage app-catalog-listing in compartment + Allow group to manage instances in compartment + Allow group to manage compute-capacity-reports in compartment + Allow group to manage volumes in compartment + Allow group to manage volume-attachments in compartment + Allow group to manage virtual-network-family in compartment + ``` + + To use this policy, create a compartment for `dstack` and specify it in `~/.dstack/server/config.yml`. + + ```yaml + projects: + - name: main + backends: + - type: oci + creds: + type: default + compartment_id: ocid1.compartment.oc1..aaaaaaaa + ``` + +### DataCrunch + +Log into your [DataCrunch :material-arrow-top-right-thin:{ .external }](https://cloud.datacrunch.io/) account, click Keys in the sidebar, find `REST API Credentials` area and then click the `Generate Credentials` button. + +Then, go ahead and configure the backend: + +
+ +```yaml +projects: + - name: main + backends: + - type: datacrunch + creds: + type: api_key + client_id: xfaHBqYEsArqhKWX-e52x3HH7w8T + client_secret: B5ZU5Qx9Nt8oGMlmMhNI3iglK8bjMhagTbylZy4WzncZe39995f7Vxh8 +``` + +
+ +## On-prem servers + +### SSH fleets + +> For using `dstack` with on-prem servers, no backend configuration is required. +> See [SSH fleets](fleets.md#ssh-fleets) for more details. + +### Kubernetes + +To configure a Kubernetes backend, specify the path to the kubeconfig file, +and the port that `dstack` can use for proxying SSH traffic. +In case of a self-managed cluster, also specify the IP address of any node in the cluster. + +[//]: # (TODO: Mention that the Kind context has to be selected via `current-context` ) + +=== "Self-managed" + + Here's how to configure the backend to use a self-managed cluster. + +
+ + ```yaml + projects: + - name: main + backends: + - type: kubernetes + kubeconfig: + filename: ~/.kube/config + networking: + ssh_host: localhost # The external IP address of any node + ssh_port: 32000 # Any port accessible outside of the cluster + ``` + +
+ + The port specified to `ssh_port` must be accessible outside of the cluster. + + ??? info "Kind" + If you are using [Kind](https://kind.sigs.k8s.io/), make sure to make + to set up `ssh_port` via `extraPortMappings` for proxying SSH traffic: + + ```yaml + kind: Cluster + apiVersion: kind.x-k8s.io/v1alpha4 + nodes: + - role: control-plane + extraPortMappings: + - containerPort: 32000 # Must be same as `ssh_port` + hostPort: 32000 # Must be same as `ssh_port` + ``` + + Go ahead and create the cluster like this: + + ```shell + kind create cluster --config examples/misc/kubernetes/kind-config.yml + ``` + +[//]: # (TODO: Elaborate on the Kind's IP address on Linux) + +=== "Managed" + Here's how to configure the backend to use a managed cluster (AWS, GCP, Azure). + +
+ + ```yaml + projects: + - name: main + backends: + - type: kubernetes + kubeconfig: + filename: ~/.kube/config + networking: + ssh_port: 32000 # Any port accessible outside of the cluster + ``` + +
+ + The port specified to `ssh_port` must be accessible outside of the cluster. + + ??? info "EKS" + For example, if you are using EKS, make sure to add it via an ingress rule + of the corresponding security group: + + ```shell + aws ec2 authorize-security-group-ingress --group-id --protocol tcp --port 32000 --cidr 0.0.0.0/0 + ``` + +[//]: # (TODO: Elaborate on gateways, and what backends allow configuring them) + +[//]: # (TODO: Should we automatically detect ~/.kube/config) + +??? info "NVIDIA GPU Operator" + To use GPUs with Kubernetes, the cluster must be installed with the + [NVIDIA GPU Operator :material-arrow-top-right-thin:{ .external }](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/index.html). + + [//]: # (TODO: Provide short yet clear instructions. Elaborate on whether it works with Kind.) + +## dstack Sky + +If you're using [dstack Sky :material-arrow-top-right-thin:{ .external }](https://sky.dstack.ai){:target="_blank"}, +backends are pre-configured to use compute from `dstack`'s marketplace. + +You can reconfigure backends via the UI, to use your own cloud accounts instead. \ No newline at end of file diff --git a/docs/docs/dev-environments.md b/docs/docs/concepts/dev-environments.md similarity index 78% rename from docs/docs/dev-environments.md rename to docs/docs/concepts/dev-environments.md index 96055f4b9..b3859b15b 100644 --- a/docs/docs/dev-environments.md +++ b/docs/docs/concepts/dev-environments.md @@ -33,12 +33,12 @@ resources: pre-configured with Python, Conda, and essential CUDA drivers. !!! info "Reference" - See [.dstack.yml](reference/dstack.yml/dev-environment.md) for all the options supported by + See [.dstack.yml](../reference/dstack.yml/dev-environment.md) for all the options supported by dev environments, along with multiple examples. ## Run a configuration -To run a dev environment, pass the configuration to [`dstack apply`](reference/cli/dstack/apply.md): +To run a dev environment, pass the configuration to [`dstack apply`](../reference/cli/dstack/apply.md):
@@ -73,7 +73,7 @@ and sets up an IDE on the instance. To open the dev environment in your desktop IDE, use the link from the output (such as `vscode://vscode-remote/ssh-remote+fast-moth-1/workflow`). -![](../assets/images/dstack-vscode-jupyter.png){ width=800 } +![](../../assets/images/dstack-vscode-jupyter.png){ width=800 } ### SSH @@ -91,35 +91,35 @@ $ ssh fast-moth-1 ### List runs -The [`dstack ps`](reference/cli/dstack/ps.md) command lists all running jobs and their statuses. +The [`dstack ps`](../reference/cli/dstack/ps.md) command lists all running jobs and their statuses. Use `--watch` (or `-w`) to monitor the live status of runs. ### Stop a run -A dev environment runs until you stop it or its lifetime exceeds [`max_duration`](reference/dstack.yml/dev-environment.md#max_duration). -To gracefully stop a dev environment, use [`dstack stop`](reference/cli/dstack/stop.md). +A dev environment runs until you stop it or its lifetime exceeds [`max_duration`](../reference/dstack.yml/dev-environment.md#max_duration). +To gracefully stop a dev environment, use [`dstack stop`](../reference/cli/dstack/stop.md). Pass `--abort` or `-x` to stop without waiting for a graceful shutdown. ### Attach to a run By default, `dstack apply` runs in attached mode – it establishes the SSH tunnel to the run, forwards ports, and shows real-time logs. -If you detached from a run, you can reattach to it using [`dstack attach`](reference/cli/dstack/attach.md). +If you detached from a run, you can reattach to it using [`dstack attach`](../reference/cli/dstack/attach.md). ### See run logs -To see the logs of a run without attaching, use [`dstack logs`](reference/cli/dstack/logs.md). +To see the logs of a run without attaching, use [`dstack logs`](../reference/cli/dstack/logs.md). Pass `--diagnose`/`-d` to `dstack logs` to see the diagnostics logs. It may be useful if a run fails. -For more information on debugging failed runs, see the [troubleshooting](guides/troubleshooting.md) guide. +For more information on debugging failed runs, see the [troubleshooting](../guides/troubleshooting.md) guide. ## Manage fleets Fleets are groups of cloud instances or SSH machines that you use to run dev environments, tasks, and services. -You can let `dstack apply` provision fleets or [create and manage them directly](concepts/fleets.md). +You can let `dstack apply` provision fleets or [create and manage them directly](../concepts/fleets.md). ### Creation policy By default, when you run `dstack apply` with a dev environment, task, or service, -`dstack` reuses `idle` instances from an existing [fleet](concepts/fleets.md). +`dstack` reuses `idle` instances from an existing [fleet](../concepts/fleets.md). If no `idle` instances match the requirements, `dstack` automatically creates a new fleet using configured backends. @@ -134,24 +134,24 @@ $ dstack apply -R -f examples/.dstack.yml
-Alternatively, set [`creation_policy`](reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. +Alternatively, set [`creation_policy`](../reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. ### Termination policy If a fleet is created automatically, it remains `idle` for 5 minutes and can be reused within that time. To change the default idle duration, set -[`termination_idle_time`](reference/dstack.yml/fleet.md#termination_idle_time) in the run configuration (e.g., to 0 or a +[`termination_idle_time`](../reference/dstack.yml/fleet.md#termination_idle_time) in the run configuration (e.g., to 0 or a longer duration). !!! info "Fleets" For greater control over fleet provisioning, configuration, and lifecycle management, it is recommended to use - [fleets](concepts/fleets.md) directly. + [fleets](fleets.md) directly. ## What's next? -1. Read about [tasks](tasks.md), [services](services.md), and [repos](concepts/repos.md) -2. Learn how to manage [fleets](concepts/fleets.md) +1. Read about [tasks](tasks.md), [services](services.md), and [repos](repos.md) +2. Learn how to manage [fleets](fleets.md) !!! info "Reference" - See [.dstack.yml](reference/dstack.yml/dev-environment.md) for all the options supported by + See [.dstack.yml](../reference/dstack.yml/dev-environment.md) for all the options supported by dev environments, along with multiple examples. diff --git a/docs/docs/concepts/fleets.md b/docs/docs/concepts/fleets.md index 92de3c65c..c4fd8fd86 100644 --- a/docs/docs/concepts/fleets.md +++ b/docs/docs/concepts/fleets.md @@ -254,8 +254,8 @@ To terminate and delete specific instances from a fleet, pass `-i INSTANCE_NUM`. ## What's next? -1. Read about [dev environments](../dev-environments.md), [tasks](../tasks.md), and - [services](../services.md) +1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), and + [services](services.md) 2. Join the community via [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd) !!! info "Reference" diff --git a/docs/docs/concepts/gateways.md b/docs/docs/concepts/gateways.md index d3234765c..1e19feda0 100644 --- a/docs/docs/concepts/gateways.md +++ b/docs/docs/concepts/gateways.md @@ -1,6 +1,6 @@ # Gateways -Gateways manage the ingress traffic of running [services](../services.md) +Gateways manage the ingress traffic of running [services](services.md) and provide them with an HTTPS endpoint mapped to your domain, handling authentication, load distribution, and auto-scaling. @@ -85,7 +85,7 @@ Alternatively, you can delete a gateway by passing the gateway name to `dstack ## What's next? -1. See [services](../services.md) on how to run services +1. See [services](services.md) on how to run services !!! info "Reference" See [.dstack.yml](../reference/dstack.yml/gateway.md) for all the options supported by diff --git a/docs/docs/concepts/repos.md b/docs/docs/concepts/repos.md index ff5291ccb..1dd2ed3d9 100644 --- a/docs/docs/concepts/repos.md +++ b/docs/docs/concepts/repos.md @@ -83,4 +83,4 @@ $ dstack apply -f .dstack.yml --no-repo ## What's next? -1. Read about [dev environments](../dev-environments.md), [tasks](../tasks.md), [services](../services.md) \ No newline at end of file +1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), [services](services.md) \ No newline at end of file diff --git a/docs/docs/services.md b/docs/docs/concepts/services.md similarity index 71% rename from docs/docs/services.md rename to docs/docs/concepts/services.md index cc68670a9..7bb194362 100644 --- a/docs/docs/services.md +++ b/docs/docs/concepts/services.md @@ -46,18 +46,18 @@ Note, the `model` property is optional and not needed when deploying a non-OpenA pre-configured with Python, Conda, and essential CUDA drivers. !!! info "Gateway" - To enable [auto-scaling](reference/dstack.yml/service.md#auto-scaling), or use a custom domain with HTTPS, - set up a [gateway](concepts/gateways.md) before running the service. + To enable [auto-scaling](../reference/dstack.yml/service.md#auto-scaling), or use a custom domain with HTTPS, + set up a [gateway](gateways.md) before running the service. If you're using [dstack Sky :material-arrow-top-right-thin:{ .external }](https://sky.dstack.ai){:target="_blank"}, a gateway is pre-configured for you. !!! info "Reference" - See [.dstack.yml](reference/dstack.yml/service.md) for all the options supported by + See [.dstack.yml](../reference/dstack.yml/service.md) for all the options supported by services, along with multiple examples. ## Run a service -To run a service, pass the configuration to [`dstack apply`](reference/cli/dstack/apply.md): +To run a service, pass the configuration to [`dstack apply`](../reference/cli/dstack/apply.md):
@@ -90,9 +90,9 @@ and runs the service. ### Service -If a [gateway](concepts/gateways.md) is not configured, the service’s endpoint will be accessible at +If a [gateway](gateways.md) is not configured, the service’s endpoint will be accessible at `/proxy/services///`. -If a [gateway](concepts/gateways.md) is configured, the service endpoint will be accessible at +If a [gateway](gateways.md) is configured, the service endpoint will be accessible at `https://.`.
@@ -116,7 +116,7 @@ $ curl http://localhost:3000/proxy/services/main/llama31/v1/chat/completions \ !!! info "Auth" By default, the service endpoint requires the `Authorization` header with `Bearer `. - Authorization can be disabled by setting [`auth`](reference/dstack.yml/service.md#authorization) to `false` in the + Authorization can be disabled by setting [`auth`](../reference/dstack.yml/service.md#authorization) to `false` in the service configuration file. ### Model @@ -125,41 +125,41 @@ If the service defines the `model` property, the model can be accessed with the OpenAI-compatible endpoint at `/proxy/models//`, or via the control plane UI's playground. -When a [gateway](concepts/gateways.md) is configured, the OpenAI-compatible endpoint is available at `https://gateway./`. +When a [gateway](gateways.md) is configured, the OpenAI-compatible endpoint is available at `https://gateway./`. ## Manage runs ### List runs -The [`dstack ps`](reference/cli/dstack/ps.md) command lists all running jobs and their statuses. +The [`dstack ps`](../reference/cli/dstack/ps.md) command lists all running jobs and their statuses. Use `--watch` (or `-w`) to monitor the live status of runs. ### Stop a run -A service runs until you stop it or its lifetime exceeds [`max_duration`](reference/dstack.yml/dev-environment.md#max_duration). -To gracefully stop a service, use [`dstack stop`](reference/cli/dstack/stop.md). +A service runs until you stop it or its lifetime exceeds [`max_duration`](../reference/dstack.yml/dev-environment.md#max_duration). +To gracefully stop a service, use [`dstack stop`](../reference/cli/dstack/stop.md). Pass `--abort` or `-x` to stop without waiting for a graceful shutdown. ### Attach to a run By default, `dstack apply` runs in attached mode – it establishes the SSH tunnel to the run, forwards ports, and shows real-time logs. -If you detached from a run, you can reattach to it using [`dstack attach`](reference/cli/dstack/attach.md). +If you detached from a run, you can reattach to it using [`dstack attach`](../reference/cli/dstack/attach.md). ### See run logs -To see the logs of a run without attaching, use [`dstack logs`](reference/cli/dstack/logs.md). +To see the logs of a run without attaching, use [`dstack logs`](../reference/cli/dstack/logs.md). Pass `--diagnose`/`-d` to `dstack logs` to see the diagnostics logs. It may be useful if a run fails. -For more information on debugging failed runs, see the [troubleshooting](guides/troubleshooting.md) guide. +For more information on debugging failed runs, see the [troubleshooting](../guides/troubleshooting.md) guide. ## Manage fleets Fleets are groups of cloud instances or SSH machines that you use to run dev environments, tasks, and services. -You can let `dstack apply` provision fleets or [create and manage them directly](concepts/fleets.md). +You can let `dstack apply` provision fleets or [create and manage them directly](fleets.md). ### Creation policy By default, when you run `dstack apply` with a dev environment, task, or service, -`dstack` reuses `idle` instances from an existing [fleet](concepts/fleets.md). +`dstack` reuses `idle` instances from an existing [fleet](fleets.md). If no `idle` instances match the requirements, it automatically creates a new fleet using backends. @@ -174,28 +174,28 @@ $ dstack apply -R -f examples/.dstack.yml
-Alternatively, set [`creation_policy`](reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. +Alternatively, set [`creation_policy`](../reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. ### Termination policy If a fleet is created automatically, it remains `idle` for 5 minutes and can be reused within that time. To change the default idle duration, set -[`termination_idle_time`](reference/dstack.yml/fleet.md#termination_idle_time) in the run configuration (e.g., to 0 or a +[`termination_idle_time`](../reference/dstack.yml/fleet.md#termination_idle_time) in the run configuration (e.g., to 0 or a longer duration). !!! info "Fleets" For greater control over fleet provisioning, configuration, and lifecycle management, it is recommended to use - [fleets](concepts/fleets.md) directly. + [fleets](fleets.md) directly. ## What's next? -1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), and [repos](concepts/repos.md) -2. Learn how to manage [fleets](concepts/fleets.md) -3. See how to set up [gateways](concepts/gateways.md) -4. Check the [TGI :material-arrow-top-right-thin:{ .external }](/examples/deployment/tgi/){:target="_blank"}, - [vLLM :material-arrow-top-right-thin:{ .external }](/examples/deployment/vllm/){:target="_blank"}, and - [NIM :material-arrow-top-right-thin:{ .external }](/examples/deployment/nim/){:target="_blank"} examples +1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), and [repos](repos.md) +2. Learn how to manage [fleets](fleets.md) +3. See how to set up [gateways](gateways.md) +4. Check the [TGI :material-arrow-top-right-thin:{ .external }](../../examples/deployment/tgi/index.md){:target="_blank"}, + [vLLM :material-arrow-top-right-thin:{ .external }](../../examples/deployment/vllm/index.md){:target="_blank"}, and + [NIM :material-arrow-top-right-thin:{ .external }](../../examples/deployment/nim/index.md){:target="_blank"} examples !!! info "Reference" - See [.dstack.yml](reference/dstack.yml/service.md) for all the options supported by + See [.dstack.yml](../reference/dstack.yml/service.md) for all the options supported by services, along with multiple examples. diff --git a/docs/docs/tasks.md b/docs/docs/concepts/tasks.md similarity index 76% rename from docs/docs/tasks.md rename to docs/docs/concepts/tasks.md index a8b11b4a6..cc7ef72cc 100644 --- a/docs/docs/tasks.md +++ b/docs/docs/concepts/tasks.md @@ -45,16 +45,16 @@ resources: !!! info "Distributed tasks" By default, tasks run on a single instance. However, you can specify - the [number of nodes](reference/dstack.yml/task.md#distributed-tasks). + the [number of nodes](../reference/dstack.yml/task.md#distributed-tasks). In this case, the task will run on a cluster of instances. !!! info "Reference" - See [.dstack.yml](reference/dstack.yml/task.md) for all the options supported by + See [.dstack.yml](../reference/dstack.yml/task.md) for all the options supported by tasks, along with multiple examples. ## Run a configuration -To run a task, pass the configuration to [`dstack apply`](reference/cli/dstack/apply.md): +To run a task, pass the configuration to [`dstack apply`](../reference/cli/dstack/apply.md):
@@ -84,47 +84,47 @@ Launching `axolotl-train`... and runs the commands. !!! info "Ports" - If the task specifies [`ports`](reference/dstack.yml/task.md#_ports), `dstack apply` automatically forwards them to your + If the task specifies [`ports`](../reference/dstack.yml/task.md#_ports), `dstack apply` automatically forwards them to your local machine for convenient and secure access. !!! info "Queueing tasks" By default, if `dstack apply` cannot find capacity, the task fails. - To queue the task and wait for capacity, specify the [`retry`](reference/dstack.yml/task.md#queueing-tasks) + To queue the task and wait for capacity, specify the [`retry`](../reference/dstack.yml/task.md#queueing-tasks) property in the task configuration. ## Manage runs ### List runs -The [`dstack ps`](reference/cli/dstack/ps.md) command lists all running jobs and their statuses. +The [`dstack ps`](../reference/cli/dstack/ps.md) command lists all running jobs and their statuses. Use `--watch` (or `-w`) to monitor the live status of runs. ### Stop a run -A task runs until it's completed or its lifetime exceeds [`max_duration`](reference/dstack.yml/dev-environment.md#max_duration). -You can also gracefully stop a task using [`dstack stop`](reference/cli/dstack/stop.md). +A task runs until it's completed or its lifetime exceeds [`max_duration`](../reference/dstack.yml/dev-environment.md#max_duration). +You can also gracefully stop a task using [`dstack stop`](../reference/cli/dstack/stop.md). Pass `--abort` or `-x` to stop without waiting for a graceful shutdown. ### Attach to a run By default, `dstack apply` runs in attached mode – it establishes the SSH tunnel to the run, forwards ports, and shows real-time logs. -If you detached from a run, you can reattach to it using [`dstack attach`](reference/cli/dstack/attach.md). +If you detached from a run, you can reattach to it using [`dstack attach`](../reference/cli/dstack/attach.md). ### See run logs -To see the logs of a run without attaching, use [`dstack logs`](reference/cli/dstack/logs.md). +To see the logs of a run without attaching, use [`dstack logs`](../reference/cli/dstack/logs.md). Pass `--diagnose`/`-d` to `dstack logs` to see the diagnostics logs. It may be useful if a run fails. -For more information on debugging failed runs, see the [troubleshooting](guides/troubleshooting.md) guide. +For more information on debugging failed runs, see the [troubleshooting](../guides/troubleshooting.md) guide. ## Manage fleets Fleets are groups of cloud instances or SSH machines that you use to run dev environments, tasks, and services. -You can let `dstack apply` provision fleets or [create and manage them directly](concepts/fleets.md). +You can let `dstack apply` provision fleets or [create and manage them directly](fleets.md). ### Creation policy By default, when you run `dstack apply` with a dev environment, task, or service, -`dstack` reuses `idle` instances from an existing [fleet](concepts/fleets.md). +`dstack` reuses `idle` instances from an existing [fleet](fleets.md). If no `idle` instances match the requirements, `dstack` automatically creates a new fleet using configured backends. @@ -139,25 +139,25 @@ $ dstack apply -R -f examples/.dstack.yml
-Alternatively, set [`creation_policy`](reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. +Alternatively, set [`creation_policy`](../reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. ### Termination policy If a fleet is created automatically, it remains `idle` for 5 minutes and can be reused within that time. To change the default idle duration, set -[`termination_idle_time`](reference/dstack.yml/fleet.md#termination_idle_time) in the run configuration (e.g., to 0 or a +[`termination_idle_time`](../reference/dstack.yml/fleet.md#termination_idle_time) in the run configuration (e.g., to 0 or a longer duration). !!! info "Fleets" For greater control over fleet provisioning, configuration, and lifecycle management, it is recommended to use - [fleets](concepts/fleets.md) directly. + [fleets](fleets.md) directly. ## What's next? -1. Read about [dev environments](dev-environments.md), [services](services.md), and [repos](concepts/repos.md) -2. Learn how to manage [fleets](concepts/fleets.md) +1. Read about [dev environments](dev-environments.md), [services](services.md), and [repos](repos.md) +2. Learn how to manage [fleets](fleets.md) 3. Check the [Axolotl](/examples/fine-tuning/axolotl) example !!! info "Reference" - See [.dstack.yml](reference/dstack.yml/task.md) for all the options supported by + See [.dstack.yml](../reference/dstack.yml/task.md) for all the options supported by tasks, along with multiple examples. diff --git a/docs/docs/concepts/projects.md b/docs/docs/guides/administration.md similarity index 99% rename from docs/docs/concepts/projects.md rename to docs/docs/guides/administration.md index 86cef65f0..394d15de2 100644 --- a/docs/docs/concepts/projects.md +++ b/docs/docs/guides/administration.md @@ -1,4 +1,4 @@ -# Projects +# Administration Projects enable the isolation of different teams and their resources. Each project can configure its own backends and control which users have access to it. diff --git a/docs/docs/guides/protips.md b/docs/docs/guides/protips.md index 3e18f6294..d01ad4fad 100644 --- a/docs/docs/guides/protips.md +++ b/docs/docs/guides/protips.md @@ -123,8 +123,8 @@ This allows you to access the remote `8501` port on `localhost:8501` while the C This will forward the remote `8501` port to `localhost:3000`. !!! info "Tasks vs. services" - [Services](../services.md) provide external access, `https`, replicas with autoscaling, OpenAI-compatible endpoint - and other service features. If you don't need them, you can use [tasks](../tasks.md) for running apps. + [Services](../concepts/services.md) provide external access, `https`, replicas with autoscaling, OpenAI-compatible endpoint + and other service features. If you don't need them, you can use [tasks](../concepts/tasks.md) for running apps. ## Docker and Docker Compose diff --git a/docs/docs/guides/server-deployment.md b/docs/docs/guides/server-deployment.md index 032f40b83..67d3f24e7 100644 --- a/docs/docs/guides/server-deployment.md +++ b/docs/docs/guides/server-deployment.md @@ -61,7 +61,7 @@ To use `dstack` with your own cloud accounts, create the `~/.dstack/server/confi [configure backends](../reference/server/config.yml.md). The server loads this file on startup. -Alternatively, you can configure backends on the [project settings page](../concepts/projects/#project-backends) via the control plane's UI. +Alternatively, you can configure backends on the [project settings page](../guides/projects/#project-backends) via the control plane's UI. > For using `dstack` with on-prem servers, no backend configuration is required. > See [SSH fleets](../concepts/fleets.md#ssh-fleets) for more details. diff --git a/docs/docs/guides/troubleshooting.md b/docs/docs/guides/troubleshooting.md index 68e3799fd..11eb77369 100644 --- a/docs/docs/guides/troubleshooting.md +++ b/docs/docs/guides/troubleshooting.md @@ -94,7 +94,7 @@ pointing to the gateway's hostname is configured. #### Cause 1: Bad Authorization -If the service endpoint returns a 403 error, it is likely because the [`Authorization`](../services.md#access-the-endpoint) +If the service endpoint returns a 403 error, it is likely because the [`Authorization`](../concepts/services.md#access-the-endpoint) header with the correct `dstack` token was not provided. [//]: # (#### Other) diff --git a/docs/docs/index.md b/docs/docs/index.md index 7e25baa6b..e8ce7eaea 100644 --- a/docs/docs/index.md +++ b/docs/docs/index.md @@ -21,9 +21,9 @@ for AI workloads both in the cloud and on-prem, speeding up the development, tra `dstack` supports the following configurations: -* [Dev environments](dev-environments.md) — for interactive development using a desktop IDE -* [Tasks](tasks.md) — for scheduling jobs, incl. distributed ones (or running web apps) -* [Services](services.md) — for deploying models (or web apps) +* [Dev environments](concepts/dev-environments.md) — for interactive development using a desktop IDE +* [Tasks](concepts/tasks.md) — for scheduling jobs, incl. distributed ones (or running web apps) +* [Services](concepts/services.md) — for deploying models (or web apps) * [Fleets](concepts/fleets.md) — for managing cloud and on-prem clusters * [Volumes](concepts/volumes.md) — for managing network volumes (to persist data) * [Gateways](concepts/gateways.md) — for publishing services with a custom domain and HTTPS diff --git a/docs/docs/installation/index.md b/docs/docs/installation/index.md index f60b5d341..680c99ef7 100644 --- a/docs/docs/installation/index.md +++ b/docs/docs/installation/index.md @@ -11,10 +11,10 @@ To use the open-source version of `dstack` with your own cloud accounts or on-pr ### (Optional) Configure backends -To use `dstack` with your own cloud accounts, create the `~/.dstack/server/config.yml` file and -[configure backends](../reference/server/config.yml.md). Alternatively, you can configure backends via the control plane UI after you start the server. +To use `dstack` with specific providers, configure [backends](../concepts/backends.md). -You can skip backends configuration if you intend to run containers only on your on-prem servers. Use [SSH fleets](../concepts/fleets.md#ssh-fleets) for that. +> To use `dstack` with on-prem servers, +no backend configuration is needed. Use [SSH fleets](../concepts/fleets.md#ssh-fleets) for that. ## Start the server diff --git a/docs/docs/quickstart.md b/docs/docs/quickstart.md index b8434a6f4..33b7b0058 100644 --- a/docs/docs/quickstart.md +++ b/docs/docs/quickstart.md @@ -21,8 +21,6 @@ $ dstack init A dev environment lets you provision an instance and access it with your desktop IDE. - #### Define a configuration - Create the following configuration file inside the repo:
@@ -44,8 +42,6 @@ $ dstack init
- #### Run the configuration - Run the configuration via [`dstack apply`](reference/cli/dstack/apply.md):
@@ -77,8 +73,6 @@ $ dstack init A task allows you to schedule a job or run a web app. Tasks can be distributed and can forward ports. - #### Define a configuration - Create the following configuration file inside the repo:
@@ -110,8 +104,6 @@ $ dstack init [`nodes`](reference/dstack.yml/task.md#distributed-tasks), and `dstack` will run it on a cluster. - #### Run the configuration - Run the configuration via [`dstack apply`](reference/cli/dstack/apply.md):
@@ -142,8 +134,6 @@ $ dstack init A service allows you to deploy a model or any web app as an endpoint. - #### Define a configuration - Create the following configuration file inside the repo:
@@ -175,8 +165,6 @@ $ dstack init
- #### Run the configuration - Run the configuration via [`dstack apply`](reference/cli/dstack/apply.md):
@@ -218,7 +206,6 @@ Something not working? See the [troubleshooting](guides/troubleshooting.md) guid ## What's next? -1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), [services](services.md), - and [repos](concepts/repos.md) +1. Read about [backends](concepts/backends.md), [dev environments](concepts/dev-environments.md), [tasks](concepts/tasks.md), and [services](concepts/services.md) 2. Join [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd) 3. Browse [examples](https://dstack.ai/examples) diff --git a/docs/docs/reference/dstack.yml/dev-environment.md b/docs/docs/reference/dstack.yml/dev-environment.md index 00c8a6bc4..eca573edb 100644 --- a/docs/docs/reference/dstack.yml/dev-environment.md +++ b/docs/docs/reference/dstack.yml/dev-environment.md @@ -1,10 +1,80 @@ -# dev-environment +# `dev-environment` -The `dev-environment` configuration type allows running [dev environments](../../dev-environments.md). +The `dev-environment` configuration type allows running [dev environments](../../concepts/dev-environments.md). -> Configuration files must be inside the project repo, and their names must end with `.dstack.yml` -> (e.g. `.dstack.yml` or `dev.dstack.yml` are both acceptable). -> Any configuration can be run via [`dstack apply`](../cli/dstack/apply.md). +## Root reference + +#SCHEMA# dstack._internal.core.models.configurations.DevEnvironmentConfiguration + overrides: + show_root_heading: false + type: + required: true + +### `retry` + +#SCHEMA# dstack._internal.core.models.profiles.ProfileRetry + overrides: + show_root_heading: false + type: + required: true + +### `resources` + +#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema + overrides: + show_root_heading: false + type: + required: true + item_id_prefix: resources- + +#### `resources.gpu` { #resources-gpu data-toc-label="gpu" } + +#SCHEMA# dstack._internal.core.models.resources.GPUSpecSchema + overrides: + show_root_heading: false + type: + required: true + +#### `resources.disk` { #resources-disk data-toc-label="disk" } + +#SCHEMA# dstack._internal.core.models.resources.DiskSpecSchema + overrides: + show_root_heading: false + type: + required: true + +### `registry_auth` + +#SCHEMA# dstack._internal.core.models.configurations.RegistryAuth + overrides: + show_root_heading: false + type: + required: true + +### `volumes[n]` { #_volumes data-toc-label="volumes" } + +=== "Network volumes" + + #SCHEMA# dstack._internal.core.models.volumes.VolumeMountPoint + overrides: + show_root_heading: false + type: + required: true + +=== "Instance volumes" + + #SCHEMA# dstack._internal.core.models.volumes.InstanceMountPoint + overrides: + show_root_heading: false + type: + required: true + +??? info "Short syntax" + + The short syntax for volumes is a colon-separated string in the form of `source:destination` + + * `volume-name:/container/path` for network volumes + * `/instance/path:/container/path` for instance volumes ## Examples @@ -87,7 +157,7 @@ ide: vscode !!! info "Docker and Docker Compose" All backends except `runpod`, `vastai`, and `kubernetes` also allow using [Docker and Docker Compose](../../guides/protips.md#docker-and-docker-compose) inside `dstack` runs. -### Resources { #_resources } +### Resources { #resources_ } When you specify a resource value like `cpu` or `memory`, you can either use an exact value (e.g. `24GB`) or a @@ -163,15 +233,14 @@ If you don't assign a value to an environment variable (see `HF_TOKEN` above), `dstack` will require the value to be passed via the CLI or set in the current process. For instance, you can define environment variables in a `.envrc` file and utilize tools like `direnv`. -#### System environment variables - -The following environment variables are available in any run by default: - -| Name | Description | -|-------------------------|-----------------------------------------| -| `DSTACK_RUN_NAME` | The name of the run | -| `DSTACK_REPO_ID` | The ID of the repo | -| `DSTACK_GPUS_NUM` | The total number of GPUs in the run | +??? info "System environment variables" + The following environment variables are available in any run by default: + + | Name | Description | + |-------------------------|-----------------------------------------| + | `DSTACK_RUN_NAME` | The name of the run | + | `DSTACK_REPO_ID` | The ID of the repo | + | `DSTACK_GPUS_NUM` | The total number of GPUs in the run | ### Spot policy @@ -267,77 +336,3 @@ environment, and its contents will persist across runs. attach volumes to `/workflow` or any of its subdirectories. The `dev-environment` configuration type supports many other options. See below. - -## Root reference - -#SCHEMA# dstack._internal.core.models.configurations.DevEnvironmentConfiguration - overrides: - show_root_heading: false - type: - required: true - -## `retry` - -#SCHEMA# dstack._internal.core.models.profiles.ProfileRetry - overrides: - show_root_heading: false - type: - required: true - -## `resources` - -#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema - overrides: - show_root_heading: false - type: - required: true - item_id_prefix: resources- - -## `resources.gpu` { #resources-gpu data-toc-label="resources.gpu" } - -#SCHEMA# dstack._internal.core.models.resources.GPUSpecSchema - overrides: - show_root_heading: false - type: - required: true - -## `resources.disk` { #resources-disk data-toc-label="resources.disk" } - -#SCHEMA# dstack._internal.core.models.resources.DiskSpecSchema - overrides: - show_root_heading: false - type: - required: true - -## `registry_auth` - -#SCHEMA# dstack._internal.core.models.configurations.RegistryAuth - overrides: - show_root_heading: false - type: - required: true - -## `volumes[n]` { #_volumes data-toc-label="volumes" } - -=== "Network volumes" - - #SCHEMA# dstack._internal.core.models.volumes.VolumeMountPoint - overrides: - show_root_heading: false - type: - required: true - -=== "Instance volumes" - - #SCHEMA# dstack._internal.core.models.volumes.InstanceMountPoint - overrides: - show_root_heading: false - type: - required: true - -??? info "Short syntax" - - The short syntax for volumes is a colon-separated string in the form of `source:destination` - - * `volume-name:/container/path` for network volumes - * `/instance/path:/container/path` for instance volumes diff --git a/docs/docs/reference/dstack.yml/fleet.md b/docs/docs/reference/dstack.yml/fleet.md index ffcaad705..c421efa7c 100644 --- a/docs/docs/reference/dstack.yml/fleet.md +++ b/docs/docs/reference/dstack.yml/fleet.md @@ -1,10 +1,58 @@ -# fleet +# `fleet` The `fleet` configuration type allows creating and updating fleets. -> Configuration files must be inside the project repo, and their names must end with `.dstack.yml` -> (e.g. `.dstack.yml` or `fleet.dstack.yml` are both acceptable). -> Any configuration can be run via [`dstack apply`](../cli/dstack/apply.md). +## Root reference + +#SCHEMA# dstack._internal.core.models.fleets.FleetConfiguration + overrides: + show_root_heading: false + type: + required: true + +### `ssh_config` { data-toc-label="ssh_config" } + +#SCHEMA# dstack._internal.core.models.fleets.SSHParams + overrides: + show_root_heading: false + item_id_prefix: ssh_config- + +#### `ssh_config.hosts[n]` { #ssh_config-hosts data-toc-label="hosts" } + +#SCHEMA# dstack._internal.core.models.fleets.SSHHostParams + overrides: + show_root_heading: false + +### `resources` + +#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema + overrides: + show_root_heading: false + type: + required: true + item_id_prefix: resources- + +#### `resouces.gpu` { #resources-gpu data-toc-label="gpu" } + +#SCHEMA# dstack._internal.core.models.resources.GPUSpecSchema + overrides: + show_root_heading: false + type: + required: true + +#### `resouces.disk` { #resources-disk data-toc-label="disk" } + +#SCHEMA# dstack._internal.core.models.resources.DiskSpecSchema + overrides: + show_root_heading: false + type: + required: true + +### `retry` + +#SCHEMA# dstack._internal.core.models.profiles.ProfileRetry + overrides: + show_root_heading: false ## Examples @@ -60,55 +108,4 @@ ssh_config: [//]: # (TODO: a cluster, individual user and identity file, etc) -[//]: # (TODO: other examples, for all properties like in dev-environment/task/service) - -## Root reference - -#SCHEMA# dstack._internal.core.models.fleets.FleetConfiguration - overrides: - show_root_heading: false - type: - required: true - -## `ssh_config` - -#SCHEMA# dstack._internal.core.models.fleets.SSHParams - overrides: - show_root_heading: false - -## `ssh_config.hosts[n]` - -#SCHEMA# dstack._internal.core.models.fleets.SSHHostParams - overrides: - show_root_heading: false - -## `resources` - -#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema - overrides: - show_root_heading: false - type: - required: true - item_id_prefix: resources- - -## `resouces.gpu` { #resources-gpu data-toc-label="resources.gpu" } - -#SCHEMA# dstack._internal.core.models.resources.GPUSpecSchema - overrides: - show_root_heading: false - type: - required: true - -## `resouces.disk` { #resources-disk data-toc-label="resources.disk" } - -#SCHEMA# dstack._internal.core.models.resources.DiskSpecSchema - overrides: - show_root_heading: false - type: - required: true - -## `retry` - -#SCHEMA# dstack._internal.core.models.profiles.ProfileRetry - overrides: - show_root_heading: false +[//]: # (TODO: other examples, for all properties like in dev-environment/task/service) \ No newline at end of file diff --git a/docs/docs/reference/dstack.yml/gateway.md b/docs/docs/reference/dstack.yml/gateway.md index 73bb06d7f..61723ef71 100644 --- a/docs/docs/reference/dstack.yml/gateway.md +++ b/docs/docs/reference/dstack.yml/gateway.md @@ -1,10 +1,32 @@ -# gateway +# `gateway` The `gateway` configuration type allows creating and updating [gateways](../../concepts/gateways.md). -> Configuration files must be inside the project repo, and their names must end with `.dstack.yml` -> (e.g. `.dstack.yml` or `gateway.dstack.yml` are both acceptable). -> Any configuration can be run via [`dstack apply`](../cli/dstack/apply.md). +## Root reference + +#SCHEMA# dstack._internal.core.models.gateways.GatewayConfiguration + overrides: + show_root_heading: false + type: + required: true + +### `certificate` + +=== "Let's encrypt" + + #SCHEMA# dstack._internal.core.models.gateways.LetsEncryptGatewayCertificate + overrides: + show_root_heading: false + type: + required: true + +=== "ACM" + + #SCHEMA# dstack._internal.core.models.gateways.ACMGatewayCertificate + overrides: + show_root_heading: false + type: + required: true ## Examples @@ -27,28 +49,4 @@ domain: example.com
-[//]: # (TODO: other examples, e.g. private gateways) - -## Root reference - -#SCHEMA# dstack._internal.core.models.gateways.GatewayConfiguration - overrides: - show_root_heading: false - type: - required: true - -## `certificate[type=lets-encrypt]` - -#SCHEMA# dstack._internal.core.models.gateways.LetsEncryptGatewayCertificate - overrides: - show_root_heading: false - type: - required: true - -## `certificate[type=acm]` - -#SCHEMA# dstack._internal.core.models.gateways.ACMGatewayCertificate - overrides: - show_root_heading: false - type: - required: true +[//]: # (TODO: other examples, e.g. private \ No newline at end of file diff --git a/docs/docs/reference/dstack.yml/service.md b/docs/docs/reference/dstack.yml/service.md index 1ad737b11..683290991 100644 --- a/docs/docs/reference/dstack.yml/service.md +++ b/docs/docs/reference/dstack.yml/service.md @@ -1,10 +1,143 @@ -# service +# `service` -The `service` configuration type allows running [services](../../services.md). +The `service` configuration type allows running [services](../../concepts/services.md). -> Configuration files must be inside the project repo, and their names must end with `.dstack.yml` -> (e.g. `.dstack.yml` or `serve.dstack.yml` are both acceptable). -> Any configuration can be run via [`dstack apply`](../cli/dstack/apply.md). +## Root reference + +#SCHEMA# dstack._internal.core.models.configurations.ServiceConfiguration + overrides: + show_root_heading: false + type: + required: true + +### `model` { data-toc-label="model" } + +=== "OpenAI" + + #SCHEMA# dstack._internal.core.models.gateways.OpenAIChatModel + overrides: + show_root_heading: false + type: + required: true + +=== "TGI" + + > TGI provides an OpenAI-compatible API starting with version 1.4.0, + so models served by TGI can be defined with `format: openai` too. + + #SCHEMA# dstack._internal.core.models.gateways.TGIChatModel + overrides: + show_root_heading: false + type: + required: true + + ??? info "Chat template" + + By default, `dstack` loads the [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating) + from the model's repository. If it is not present there, manual configuration is required. + + ```yaml + type: service + + image: ghcr.io/huggingface/text-generation-inference:latest + env: + - MODEL_ID=TheBloke/Llama-2-13B-chat-GPTQ + commands: + - text-generation-launcher --port 8000 --trust-remote-code --quantize gptq + port: 8000 + + resources: + gpu: 80GB + + # Enable the OpenAI-compatible endpoint + model: + type: chat + name: TheBloke/Llama-2-13B-chat-GPTQ + format: tgi + chat_template: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' }}{% endif %}{% endfor %}" + eos_token: "" + ``` + + ##### Limitations + + Please note that model mapping is an experimental feature with the following limitations: + + 1. Doesn't work if your `chat_template` uses `bos_token`. As a workaround, replace `bos_token` inside `chat_template` with the token content itself. + 2. Doesn't work if `eos_token` is defined in the model repository as a dictionary. As a workaround, set `eos_token` manually, as shown in the example above (see Chat template). + + If you encounter any other issues, please make sure to file a [GitHub issue](https://github.com/dstackai/dstack/issues/new/choose). + +### `scaling` + +#SCHEMA# dstack._internal.core.models.configurations.ScalingSpec + overrides: + show_root_heading: false + type: + required: true + +### `retry` + +#SCHEMA# dstack._internal.core.models.profiles.ProfileRetry + overrides: + show_root_heading: false + +### `resources` + +#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema + overrides: + show_root_heading: false + type: + required: true + item_id_prefix: resources- + +#### `resouces.gpu` { #resources-gpu data-toc-label="gpu" } + +#SCHEMA# dstack._internal.core.models.resources.GPUSpecSchema + overrides: + show_root_heading: false + type: + required: true + +#### `resouces.disk` { #resources-disk data-toc-label="disk" } + +#SCHEMA# dstack._internal.core.models.resources.DiskSpecSchema + overrides: + show_root_heading: false + type: + required: true + +### `registry_auth` + +#SCHEMA# dstack._internal.core.models.configurations.RegistryAuth + overrides: + show_root_heading: false + type: + required: true + +### `volumes[n]` { #_volumes data-toc-label="volumes" } + +=== "Network volumes" + + #SCHEMA# dstack._internal.core.models.volumes.VolumeMountPoint + overrides: + show_root_heading: false + type: + required: true + +=== "Instance volumes" + + #SCHEMA# dstack._internal.core.models.volumes.InstanceMountPoint + overrides: + show_root_heading: false + type: + required: true + +??? info "Short syntax" + + The short syntax for volumes is a colon-separated string in the form of `source:destination` + + * `volume-name:/container/path` for network volumes + * `/instance/path:/container/path` for instance volumes ## Examples @@ -202,7 +335,7 @@ Setting the minimum number of replicas to `0` allows the service to scale down t [gateway](../../concepts/gateways.md). Auto-scaling is currently only supported for services running with a gateway. -### Resources { #_resources } +### Resources { #resources_ } If you specify memory size, you can either specify an explicit size (e.g. `24GB`) or a range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). @@ -304,15 +437,14 @@ resources: `dstack` will require the value to be passed via the CLI or set in the current process. For instance, you can define environment variables in a `.envrc` file and utilize tools like `direnv`. -#### System environment variables - -The following environment variables are available in any run by default: - -| Name | Description | -|-------------------------|-----------------------------------------| -| `DSTACK_RUN_NAME` | The name of the run | -| `DSTACK_REPO_ID` | The ID of the repo | -| `DSTACK_GPUS_NUM` | The total number of GPUs in the run | +??? info "System environment variables" + The following environment variables are available in any run by default: + + | Name | Description | + |-------------------------|-----------------------------------------| + | `DSTACK_RUN_NAME` | The name of the run | + | `DSTACK_REPO_ID` | The ID of the repo | + | `DSTACK_GPUS_NUM` | The total number of GPUs in the run | ### Spot policy @@ -422,139 +554,4 @@ and its contents will persist across runs. to `/workflow` (and sets that as the current working directory). Right now, `dstack` doesn't allow you to attach volumes to `/workflow` or any of its subdirectories. -The `service` configuration type supports many other options. See below. - -## Root reference - -#SCHEMA# dstack._internal.core.models.configurations.ServiceConfiguration - overrides: - show_root_heading: false - type: - required: true - -## `model[format=openai]` - -#SCHEMA# dstack._internal.core.models.gateways.OpenAIChatModel - overrides: - show_root_heading: false - type: - required: true - -## `model[format=tgi]` - -> TGI provides an OpenAI-compatible API starting with version 1.4.0, -so models served by TGI can be defined with `format: openai` too. - -#SCHEMA# dstack._internal.core.models.gateways.TGIChatModel - overrides: - show_root_heading: false - type: - required: true - -??? info "Chat template" - - By default, `dstack` loads the [chat template](https://huggingface.co/docs/transformers/main/en/chat_templating) - from the model's repository. If it is not present there, manual configuration is required. - - ```yaml - type: service - - image: ghcr.io/huggingface/text-generation-inference:latest - env: - - MODEL_ID=TheBloke/Llama-2-13B-chat-GPTQ - commands: - - text-generation-launcher --port 8000 --trust-remote-code --quantize gptq - port: 8000 - - resources: - gpu: 80GB - - # Enable the OpenAI-compatible endpoint - model: - type: chat - name: TheBloke/Llama-2-13B-chat-GPTQ - format: tgi - chat_template: "{% if messages[0]['role'] == 'system' %}{% set loop_messages = messages[1:] %}{% set system_message = messages[0]['content'] %}{% else %}{% set loop_messages = messages %}{% set system_message = false %}{% endif %}{% for message in loop_messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if loop.index0 == 0 and system_message != false %}{% set content = '<>\\n' + system_message + '\\n<>\\n\\n' + message['content'] %}{% else %}{% set content = message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + content.strip() + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ ' ' + content.strip() + ' ' }}{% endif %}{% endfor %}" - eos_token: "" - ``` - - ##### Limitations - - Please note that model mapping is an experimental feature with the following limitations: - - 1. Doesn't work if your `chat_template` uses `bos_token`. As a workaround, replace `bos_token` inside `chat_template` with the token content itself. - 2. Doesn't work if `eos_token` is defined in the model repository as a dictionary. As a workaround, set `eos_token` manually, as shown in the example above (see Chat template). - - If you encounter any other issues, please make sure to file a [GitHub issue](https://github.com/dstackai/dstack/issues/new/choose). - -## `scaling` - -#SCHEMA# dstack._internal.core.models.configurations.ScalingSpec - overrides: - show_root_heading: false - type: - required: true - -## `retry` - -#SCHEMA# dstack._internal.core.models.profiles.ProfileRetry - overrides: - show_root_heading: false - -## `resources` - -#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema - overrides: - show_root_heading: false - type: - required: true - item_id_prefix: resources- - -## `resouces.gpu` { #resources-gpu data-toc-label="resources.gpu" } - -#SCHEMA# dstack._internal.core.models.resources.GPUSpecSchema - overrides: - show_root_heading: false - type: - required: true - -## `resouces.disk` { #resources-disk data-toc-label="resources.disk" } - -#SCHEMA# dstack._internal.core.models.resources.DiskSpecSchema - overrides: - show_root_heading: false - type: - required: true - -## `registry_auth` - -#SCHEMA# dstack._internal.core.models.configurations.RegistryAuth - overrides: - show_root_heading: false - type: - required: true - -## `volumes[n]` { #_volumes data-toc-label="volumes" } - -=== "Network volumes" - - #SCHEMA# dstack._internal.core.models.volumes.VolumeMountPoint - overrides: - show_root_heading: false - type: - required: true - -=== "Instance volumes" - - #SCHEMA# dstack._internal.core.models.volumes.InstanceMountPoint - overrides: - show_root_heading: false - type: - required: true - -??? info "Short syntax" - - The short syntax for volumes is a colon-separated string in the form of `source:destination` - - * `volume-name:/container/path` for network volumes - * `/instance/path:/container/path` for instance volumes +The `service` configuration type supports many other options. See below. \ No newline at end of file diff --git a/docs/docs/reference/dstack.yml/task.md b/docs/docs/reference/dstack.yml/task.md index 3c99eb8a2..0f4d809a6 100644 --- a/docs/docs/reference/dstack.yml/task.md +++ b/docs/docs/reference/dstack.yml/task.md @@ -1,10 +1,80 @@ -# task +# `task` -The `task` configuration type allows running [tasks](../../tasks.md). +The `task` configuration type allows running [tasks](../../concepts/tasks.md). -> Configuration files must be inside the project repo, and their names must end with `.dstack.yml` -> (e.g. `.dstack.yml` or `train.dstack.yml` are both acceptable). -> Any configuration can be run via [`dstack apply`](../cli/dstack/apply.md). +## Root reference + +#SCHEMA# dstack._internal.core.models.configurations.TaskConfiguration + overrides: + show_root_heading: false + type: + required: true + +### `retry` + +#SCHEMA# dstack._internal.core.models.profiles.ProfileRetry + overrides: + show_root_heading: false + type: + required: true + +### `resources` + +#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema + overrides: + show_root_heading: false + type: + required: true + item_id_prefix: resources- + +#### `resouces.gpu` { #resources-gpu data-toc-label="gpu" } + +#SCHEMA# dstack._internal.core.models.resources.GPUSpecSchema + overrides: + show_root_heading: false + type: + required: true + +#### `resouces.disk` { #resources-disk data-toc-label="disk" } + +#SCHEMA# dstack._internal.core.models.resources.DiskSpecSchema + overrides: + show_root_heading: false + type: + required: true + +### `registry_auth` + +#SCHEMA# dstack._internal.core.models.configurations.RegistryAuth + overrides: + show_root_heading: false + type: + required: true + +### `volumes[n]` { #_volumes data-toc-label="volumes" } + +=== "Network volumes" + + #SCHEMA# dstack._internal.core.models.volumes.VolumeMountPoint + overrides: + show_root_heading: false + type: + required: true + +=== "Instance volumes" + + #SCHEMA# dstack._internal.core.models.volumes.InstanceMountPoint + overrides: + show_root_heading: false + type: + required: true + +??? info "Short syntax" + + The short syntax for volumes is a colon-separated string in the form of `source:destination` + + * `volume-name:/container/path` for network volumes + * `/instance/path:/container/path` for instance volumes ## Examples @@ -128,7 +198,7 @@ commands: !!! info "Docker and Docker Compose" All backends except `runpod`, `vastai`, and `kubernetes` also allow using [Docker and Docker Compose](../../guides/protips.md#docker-and-docker-compose) inside `dstack` runs. -### Resources { #_resources } +### Resources { #resources_ } If you specify memory size, you can either specify an explicit size (e.g. `24GB`) or a range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). @@ -215,20 +285,19 @@ If you don't assign a value to an environment variable (see `HF_TOKEN` above), `dstack` will require the value to be passed via the CLI or set in the current process. For instance, you can define environment variables in a `.envrc` file and utilize tools like `direnv`. -##### System environment variables - -The following environment variables are available in any run by default: - -| Name | Description | -|-------------------------|------------------------------------------------------------------| -| `DSTACK_RUN_NAME` | The name of the run | -| `DSTACK_REPO_ID` | The ID of the repo | -| `DSTACK_GPUS_NUM` | The total number of GPUs in the run | -| `DSTACK_NODES_NUM` | The number of nodes in the run | -| `DSTACK_GPUS_PER_NODE` | The number of GPUs per node | -| `DSTACK_NODE_RANK` | The rank of the node | -| `DSTACK_MASTER_NODE_IP` | The internal IP address the master node | -| `DSTACK_NODES_IPS` | The list of internal IP addresses of all nodes delimited by "\n" | +??? info "System environment variables" + The following environment variables are available in any run by default: + + | Name | Description | + |-------------------------|------------------------------------------------------------------| + | `DSTACK_RUN_NAME` | The name of the run | + | `DSTACK_REPO_ID` | The ID of the repo | + | `DSTACK_GPUS_NUM` | The total number of GPUs in the run | + | `DSTACK_NODES_NUM` | The number of nodes in the run | + | `DSTACK_GPUS_PER_NODE` | The number of GPUs per node | + | `DSTACK_NODE_RANK` | The rank of the node | + | `DSTACK_MASTER_NODE_IP` | The internal IP address the master node | + | `DSTACK_NODES_IPS` | The list of internal IP addresses of all nodes delimited by "\n" | ### Distributed tasks @@ -442,78 +511,4 @@ and its contents will persist across runs. to `/workflow` (and sets that as the current working directory). Right now, `dstack` doesn't allow you to attach volumes to `/workflow` or any of its subdirectories. -The `task` configuration type supports many other options. See below. - -## Root reference - -#SCHEMA# dstack._internal.core.models.configurations.TaskConfiguration - overrides: - show_root_heading: false - type: - required: true - -## `retry` - -#SCHEMA# dstack._internal.core.models.profiles.ProfileRetry - overrides: - show_root_heading: false - type: - required: true - -## `resources` - -#SCHEMA# dstack._internal.core.models.resources.ResourcesSpecSchema - overrides: - show_root_heading: false - type: - required: true - item_id_prefix: resources- - -## `resouces.gpu` { #resources-gpu data-toc-label="resources.gpu" } - -#SCHEMA# dstack._internal.core.models.resources.GPUSpecSchema - overrides: - show_root_heading: false - type: - required: true - -## `resouces.disk` { #resources-disk data-toc-label="resources.disk" } - -#SCHEMA# dstack._internal.core.models.resources.DiskSpecSchema - overrides: - show_root_heading: false - type: - required: true - -## `registry_auth` - -#SCHEMA# dstack._internal.core.models.configurations.RegistryAuth - overrides: - show_root_heading: false - type: - required: true - -## `volumes[n]` { #_volumes data-toc-label="volumes" } - -=== "Network volumes" - - #SCHEMA# dstack._internal.core.models.volumes.VolumeMountPoint - overrides: - show_root_heading: false - type: - required: true - -=== "Instance volumes" - - #SCHEMA# dstack._internal.core.models.volumes.InstanceMountPoint - overrides: - show_root_heading: false - type: - required: true - -??? info "Short syntax" - - The short syntax for volumes is a colon-separated string in the form of `source:destination` - - * `volume-name:/container/path` for network volumes - * `/instance/path:/container/path` for instance volumes +The `task` configuration type supports many other options. See below. \ No newline at end of file diff --git a/docs/docs/reference/dstack.yml/volume.md b/docs/docs/reference/dstack.yml/volume.md index 246270ab2..cfd330242 100644 --- a/docs/docs/reference/dstack.yml/volume.md +++ b/docs/docs/reference/dstack.yml/volume.md @@ -1,10 +1,14 @@ -# volume +# `volume` The `volume` configuration type allows creating, registering, and updating [volumes](../../concepts/volumes.md). -> Configuration files must be inside the project repo, and their names must end with `.dstack.yml` -> (e.g. `.dstack.yml` or `fleet.dstack.yml` are both acceptable). -> Any configuration can be run via [`dstack apply`](../cli/dstack/apply.md). +## Root reference + +#SCHEMA# dstack._internal.core.models.volumes.VolumeConfiguration + overrides: + show_root_heading: false + type: + required: true ## Examples @@ -30,7 +34,7 @@ size: 100GB ### Registering an existing volume { #existing-volume }
- + ```yaml type: volume # The name of the volume @@ -44,13 +48,4 @@ region: eu-central-1 volume_id: vol1235 ``` -
- - -## Root reference - -#SCHEMA# dstack._internal.core.models.volumes.VolumeConfiguration - overrides: - show_root_heading: false - type: - required: true +
\ No newline at end of file diff --git a/docs/docs/reference/misc/environment-variables.md b/docs/docs/reference/misc/environment-variables.md index 43b600a46..ee9024277 100644 --- a/docs/docs/reference/misc/environment-variables.md +++ b/docs/docs/reference/misc/environment-variables.md @@ -5,7 +5,7 @@ The following read-only environment variables are automatically propagated to configurations for dev environments, tasks, and services: -##### DSTACK_RUN_NAME { #DSTACK_RUN_NAME } +###### DSTACK_RUN_NAME { #DSTACK_RUN_NAME } The name of the run. @@ -21,11 +21,11 @@ commands: If `name` is not set in the configuration, it is assigned a random name (e.g. `wet-mangust-1`). -##### DSTACK_REPO_ID { #DSTACK_REPO_ID } +###### DSTACK_REPO_ID { #DSTACK_REPO_ID } The ID of the repo -##### DSTACK_GPUS_NUM { #DSTACK_GPUS_NUM } +###### DSTACK_GPUS_NUM { #DSTACK_GPUS_NUM } The total number of GPUs in the run @@ -49,19 +49,19 @@ resources: gpu: 24GB ``` -##### DSTACK_NODES_NUM { #DSTACK_NODES_NUM } +###### DSTACK_NODES_NUM { #DSTACK_NODES_NUM } The number of nodes in the run -##### DSTACK_GPUS_PER_NODE { #DSTACK_GPUS_PER_NODE } +###### DSTACK_GPUS_PER_NODE { #DSTACK_GPUS_PER_NODE } The number of GPUs per node -##### DSTACK_NODE_RANK { #DSTACK_NODE_RANK } +###### DSTACK_NODE_RANK { #DSTACK_NODE_RANK } The rank of the node -##### DSTACK_NODE_RANK { #DSTACK_NODE_RANK } +###### DSTACK_NODE_RANK { #DSTACK_NODE_RANK } The internal IP address the master node. @@ -90,7 +90,7 @@ resources: gpu: 24GB ``` -##### DSTACK_NODES_IPS { #DSTACK_NODES_IPS } +###### DSTACK_NODES_IPS { #DSTACK_NODES_IPS } The list of internal IP addresses of all nodes delimited by `"\n"` @@ -102,7 +102,7 @@ via `dstack server` or deployed using Docker. For more details on the options below, refer to the [server deployment](../../guides/server-deployment.md) guide. -##### DSTACK_SERVER_LOG_LEVEL { #DSTACK_SERVER_LOG_LEVEL } +###### DSTACK_SERVER_LOG_LEVEL { #DSTACK_SERVER_LOG_LEVEL } Has the same effect as `--log-level`. Defaults to `INFO`. @@ -117,43 +117,43 @@ $ DSTACK_SERVER_LOG_LEVEL=debug dstack server
-##### DSTACK_SERVER_LOG_FORMAT { #DSTACK_SERVER_LOG_FORMAT } +###### DSTACK_SERVER_LOG_FORMAT { #DSTACK_SERVER_LOG_FORMAT } Sets format of log output. Can be `rich`, `standard`, `json`. Defaults to `rich`. -##### DSTACK_SERVER_HOST { #DSTACK_SERVER_HOST } +###### DSTACK_SERVER_HOST { #DSTACK_SERVER_HOST } Has the same effect as `--host`. Defaults to `127.0.0.1`. -##### DSTACK_SERVER_PORT { #DSTACK_SERVER_PORT } +###### DSTACK_SERVER_PORT { #DSTACK_SERVER_PORT } Has the same effect as `--port`. Defaults to `3000`. -##### DSTACK_SERVER_ADMIN_TOKEN { #DSTACK_SERVER_ADMIN_TOKEN } +###### DSTACK_SERVER_ADMIN_TOKEN { #DSTACK_SERVER_ADMIN_TOKEN } Has the same effect as `--token`. Defaults to `None`. -##### DSTACK_SERVER_DIR { #DSTACK_SERVER_DIR } +###### DSTACK_SERVER_DIR { #DSTACK_SERVER_DIR } Sets path to store data and server configs. Defaults to `~/.dstack/server`. -##### DSTACK_DATABASE_URL { #DSTACK_DATABASE_URL } +###### DSTACK_DATABASE_URL { #DSTACK_DATABASE_URL } The database URL to use instead of default SQLite. Currently `dstack` supports Postgres. Example: `postgresql+asyncpg://myuser:mypassword@localhost:5432/mydatabase`. Defaults to `None`. -##### DSTACK_SERVER_CLOUDWATCH_LOG_GROUP { #DSTACK_SERVER_CLOUDWATCH_LOG_GROUP } +###### DSTACK_SERVER_CLOUDWATCH_LOG_GROUP { #DSTACK_SERVER_CLOUDWATCH_LOG_GROUP } The CloudWatch Logs group for workloads logs. If not set, the default file-based log storage is used. -##### DSTACK_SERVER_CLOUDWATCH_LOG_REGION { #DSTACK_SERVER_CLOUDWATCH_LOG_REGION } +###### DSTACK_SERVER_CLOUDWATCH_LOG_REGION { #DSTACK_SERVER_CLOUDWATCH_LOG_REGION } The CloudWatch Logs region. Defaults to `None`. -##### DSTACK_DEFAULT_SERVICE_CLIENT_MAX_BODY_SIZE { #DSTACK_DEFAULT_SERVICE_CLIENT_MAX_BODY_SIZE } +###### DSTACK_DEFAULT_SERVICE_CLIENT_MAX_BODY_SIZE { #DSTACK_DEFAULT_SERVICE_CLIENT_MAX_BODY_SIZE } Request body size limit for services, in bytes. Defaults to 64 MiB. -##### DSTACK_FORBID_SERVICES_WITHOUT_GATEWAY { #DSTACK_FORBID_SERVICES_WITHOUT_GATEWAY } +###### DSTACK_FORBID_SERVICES_WITHOUT_GATEWAY { #DSTACK_FORBID_SERVICES_WITHOUT_GATEWAY } Forbids registering new services without a gateway if set to any value. @@ -172,7 +172,7 @@ Forbids registering new services without a gateway if set to any value. The following environment variables are supported by the CLI. -##### DSTACK_CLI_LOG_LEVEL { #DSTACK_CLI_LOG_LEVEL } +###### DSTACK_CLI_LOG_LEVEL { #DSTACK_CLI_LOG_LEVEL } Configures CLI logging level. Defaults to `INFO`. @@ -186,6 +186,6 @@ $ DSTACK_CLI_LOG_LEVEL=debug dstack apply -f .dstack.yml
-##### DSTACK_PROJECT { #DSTACK_PROJECT } +###### DSTACK_PROJECT { #DSTACK_PROJECT } Has the same effect as `--project`. Defaults to `None`. diff --git a/docs/docs/reference/server/config.yml.md b/docs/docs/reference/server/config.yml.md index 20916b5ed..208bb9e30 100644 --- a/docs/docs/reference/server/config.yml.md +++ b/docs/docs/reference/server/config.yml.md @@ -4,936 +4,13 @@ The `~/.dstack/server/config.yml` file is used to [configure](../../installation/index.md#1-configure-backends) the `dstack` server cloud accounts and other sever-level settings such as encryption. -## Configure backends { #backends } - -> The `dstack` server allows you to configure backends for multiple projects. -> If you don't need multiple projects, use only the `main` project. - -Each cloud account must be configured under the `backends` property of the respective project. -See the examples below. - -### Cloud providers { #clouds } - -#### AWS - -There are two ways to configure AWS: using an access key or using the default credentials. - -=== "Default credentials" - - If you have default credentials set up (e.g. in `~/.aws/credentials`), configure the backend like this: - -
- - ```yaml - projects: - - name: main - backends: - - type: aws - creds: - type: default - ``` - -
- -=== "Access key" - - Create an access key by following the [this guide :material-arrow-top-right-thin:{ .external }](https://docs.aws.amazon.com/cli/latest/userguide/cli-authentication-user.html#cli-authentication-user-get). - Once you've downloaded the `.csv` file with your IAM user's Access key ID and Secret access key, proceed to - configure the backend. - -
- - ```yaml - projects: - - name: main - backends: - - type: aws - creds: - type: access_key - access_key: KKAAUKLIZ5EHKICAOASV - secret_key: pn158lMqSBJiySwpQ9ubwmI6VUU3/W2fdJdFwfgO - ``` - -
- -??? info "Required permissions" - The following AWS policy permissions are sufficient for `dstack` to work: - - ``` - { - "Version": "2012-10-17", - "Statement": [ - { - "Effect": "Allow", - "Action": [ - "ec2:AttachVolume", - "ec2:AuthorizeSecurityGroupEgress", - "ec2:AuthorizeSecurityGroupIngress", - "ec2:CreatePlacementGroup", - "ec2:CancelSpotInstanceRequests", - "ec2:CreateSecurityGroup", - "ec2:CreateTags", - "ec2:CreateVolume", - "ec2:DeletePlacementGroup", - "ec2:DeleteVolume", - "ec2:DescribeAvailabilityZones", - "ec2:DescribeCapacityReservations" - "ec2:DescribeImages", - "ec2:DescribeInstances", - "ec2:DescribeInstanceAttribute", - "ec2:DescribeInstanceTypes", - "ec2:DescribeRouteTables", - "ec2:DescribeSecurityGroups", - "ec2:DescribeSubnets", - "ec2:DescribeVpcs", - "ec2:DescribeVolumes", - "ec2:DetachVolume", - "ec2:RunInstances", - "ec2:TerminateInstances" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "servicequotas:ListServiceQuotas", - "servicequotas:GetServiceQuota" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "elasticloadbalancing:CreateLoadBalancer", - "elasticloadbalancing:CreateTargetGroup", - "elasticloadbalancing:CreateListener", - "elasticloadbalancing:RegisterTargets", - "elasticloadbalancing:AddTags", - "elasticloadbalancing:DeleteLoadBalancer", - "elasticloadbalancing:DeleteTargetGroup", - "elasticloadbalancing:DeleteListener", - "elasticloadbalancing:DeregisterTargets" - ], - "Resource": "*" - }, - { - "Effect": "Allow", - "Action": [ - "acm:DescribeCertificate", - "acm:ListCertificates" - ], - "Resource": "*" - } - ] - } - ``` - - The `elasticloadbalancing:*` and `acm:*` permissions are only needed for provisioning gateways with ACM (AWS Certificate Manager) certificates. - -??? info "VPC" - By default, `dstack` uses the default VPC. It's possible to customize it: - - === "vpc_name" - - ```yaml - projects: - - name: main - backends: - - type: aws - creds: - type: default - - vpc_name: my-vpc - ``` - - === "vpc_ids" - ```yaml - projects: - - name: main - backends: - - type: aws - creds: - type: default - - default_vpcs: true - vpc_ids: - us-east-1: vpc-0a2b3c4d5e6f7g8h - us-east-2: vpc-9i8h7g6f5e4d3c2b - us-west-1: vpc-4d3c2b1a0f9e8d7 - ``` - - For the regions without configured `vpc_ids`, enable default VPCs by setting `default_vpcs` to `true`. - -??? info "Private subnets" - By default, `dstack` provisions instances with public IPs and permits inbound SSH traffic. - If you want `dstack` to use private subnets and provision instances without public IPs, set `public_ips` to `false`. - - ```yaml - projects: - - name: main - backends: - - type: aws - creds: - type: default - - public_ips: false - ``` - - Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets. - Additionally, private subnets must have outbound internet connectivity provided by NAT Gateway, Transit Gateway, or other mechanism. - -??? info "OS images" - By default, `dstack` uses its own [AMI :material-arrow-top-right-thin:{ .external }](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/AMIs.html) - optimized for `dstack`. - To use your own or other third-party images, set the `os_images` property: - - ```yaml - projects: - - name: main - backends: - - type: aws - creds: - type: default - - os_images: - cpu: - name: my-ami-for-cpu-instances - owner: self - user: dstack - nvidia: - name: 'Some ThirdParty CUDA image' - owner: 123456789012 - user: ubuntu - ``` - - Here, both `cpu` and `nvidia` properties are optional, but if the property is not set, you won´t be able to use the corresponding instance types. - - The `name` is an AMI name. - The `owner` is either an AWS account ID (a 12-digit number) or a special value `self` indicating the current account. - The `user` specifies an OS user for instance provisioning. - - !!! info "Image requirements" - * SSH server listening on port 22 - * `user` with passwordless sudo access - * Docker is installed - * (For NVIDIA instances) NVIDIA/CUDA drivers and NVIDIA Container Toolkit are installed - -#### Azure - -There are two ways to configure Azure: using a client secret or using the default credentials. - -=== "Default credentials" - - If you have default credentials set up, configure the backend like this: - -
- - ```yaml - projects: - - name: main - backends: - - type: azure - subscription_id: 06c82ce3-28ff-4285-a146-c5e981a9d808 - tenant_id: f84a7584-88e4-4fd2-8e97-623f0a715ee1 - creds: - type: default - ``` - -
- - If you don't know your `subscription_id` and `tenant_id`, use [Azure CLI :material-arrow-top-right-thin:{ .external }](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli): - - ```shell - az account show --query "{subscription_id: id, tenant_id: tenantId}" - ``` - -=== "Client secret" - - A client secret can be created using the [Azure CLI :material-arrow-top-right-thin:{ .external }](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli): - - ```shell - SUBSCRIPTION_ID=... - az ad sp create-for-rbac - --name dstack-app \ - --role $DSTACK_ROLE \ - --scopes /subscriptions/$SUBSCRIPTION_ID \ - --query "{ tenant_id: tenant, client_id: appId, client_secret: password }" - ``` - - Once you have `tenant_id`, `client_id`, and `client_secret`, go ahead and configure the backend. - -
- - ```yaml - projects: - - name: main - backends: - - type: azure - subscription_id: 06c82ce3-28ff-4285-a146-c5e981a9d808 - tenant_id: f84a7584-88e4-4fd2-8e97-623f0a715ee1 - creds: - type: client - client_id: acf3f73a-597b-46b6-98d9-748d75018ed0 - client_secret: 1Kb8Q~o3Q2hdEvrul9yaj5DJDFkuL3RG7lger2VQ - ``` - -
- - If you don't know your `subscription_id`, use [Azure CLI :material-arrow-top-right-thin:{ .external }](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli): - - ```shell - az account show --query "{subscription_id: id}" - ``` - -??? info "Required permissions" - The following Azure permissions are sufficient for `dstack` to work: - - ```json - { - "properties": { - "roleName": "dstack-role", - "description": "Minimal required permissions for using Azure with dstack", - "assignableScopes": [ - "/subscriptions/${YOUR_SUBSCRIPTION_ID}" - ], - "permissions": [ - { - "actions": [ - "Microsoft.Authorization/*/read", - "Microsoft.Compute/availabilitySets/*", - "Microsoft.Compute/locations/*", - "Microsoft.Compute/virtualMachines/*", - "Microsoft.Compute/virtualMachineScaleSets/*", - "Microsoft.Compute/cloudServices/*", - "Microsoft.Compute/disks/write", - "Microsoft.Compute/disks/read", - "Microsoft.Compute/disks/delete", - "Microsoft.Network/networkSecurityGroups/*", - "Microsoft.Network/locations/*", - "Microsoft.Network/virtualNetworks/*", - "Microsoft.Network/networkInterfaces/*", - "Microsoft.Network/publicIPAddresses/*", - "Microsoft.Resources/subscriptions/resourceGroups/read", - "Microsoft.Resources/subscriptions/resourceGroups/write", - "Microsoft.Resources/subscriptions/read" - ], - "notActions": [], - "dataActions": [], - "notDataActions": [] - } - ] - } - } - ``` - -??? info "VPC" - By default, `dstack` creates new Azure networks and subnets for every configured region. - It's possible to use custom networks by specifying `vpc_ids`: - - ```yaml - projects: - - name: main - backends: - - type: azure - creds: - type: default - regions: [westeurope] - vpc_ids: - westeurope: myNetworkResourceGroup/myNetworkName - ``` - - -??? info "Private subnets" - By default, `dstack` provisions instances with public IPs and permits inbound SSH traffic. - If you want `dstack` to use private subnets and provision instances without public IPs, - specify custom networks using `vpc_ids` and set `public_ips` to `false`. - - ```yaml - projects: - - name: main - backends: - - type: azure - creds: - type: default - regions: [westeurope] - vpc_ids: - westeurope: myNetworkResourceGroup/myNetworkName - public_ips: false - ``` - - Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets. - Additionally, private subnets must have outbound internet connectivity provided by [NAT Gateway or other mechanism](https://learn.microsoft.com/en-us/azure/nat-gateway/nat-overview). - -#### GCP - -There are two ways to configure GCP: using a service account or using the default credentials. - -=== "Default credentials" - - Enable GCP application default credentials: - - ```shell - gcloud auth application-default login - ``` - - Then configure the backend like this: - -
- - ```yaml - projects: - - name: main - backends: - - type: gcp - project_id: gcp-project-id - creds: - type: default - ``` - -
- -=== "Service account" - - To create a service account, follow [this guide :material-arrow-top-right-thin:{ .external }](https://cloud.google.com/iam/docs/service-accounts-create). After setting up the service account [create a key :material-arrow-top-right-thin:{ .external }](https://cloud.google.com/iam/docs/keys-create-delete) for it and download the corresponding JSON file. - - Then go ahead and configure the backend by specifying the downloaded file path. - -
- - ```yaml - projects: - - name: main - backends: - - type: gcp - project_id: gcp-project-id - creds: - type: service_account - filename: ~/.dstack/server/gcp-024ed630eab5.json - ``` - -
- -If you don't know your GCP project ID, use [Google Cloud CLI :material-arrow-top-right-thin:{ .external }](https://cloud.google.com/sdk/docs/install-sdk): - -```shell -gcloud projects list --format="json(projectId)" -``` - -??? info "Required permissions" - The following GCP permissions are sufficient for `dstack` to work: - - ``` - compute.disks.create - compute.disks.delete - compute.disks.get - compute.disks.list - compute.disks.setLabels - compute.disks.use - compute.firewalls.create - compute.images.useReadOnly - compute.instances.attachDisk - compute.instances.create - compute.instances.delete - compute.instances.detachDisk - compute.instances.get - compute.instances.setLabels - compute.instances.setMetadata - compute.instances.setServiceAccount - compute.instances.setTags - compute.networks.get - compute.networks.updatePolicy - compute.regions.get - compute.regions.list - compute.routers.list - compute.subnetworks.list - compute.subnetworks.use - compute.subnetworks.useExternalIp - compute.zoneOperations.get - ``` - - If you plan to use TPUs, additional permissions are required: - - ``` - tpu.nodes.create - tpu.nodes.get - tpu.nodes.update - tpu.nodes.delete - tpu.operations.get - tpu.operations.list - ``` - - Also, the use of TPUs requires the `serviceAccountUser` role. - For TPU VMs, dstack will use the default service account. - -??? info "Required APIs" - First, ensure the required APIs are enabled in your GCP `project_id`. - - ```shell - PROJECT_ID=... - gcloud config set project $PROJECT_ID - gcloud services enable cloudapis.googleapis.com - gcloud services enable compute.googleapis.com - ``` - -??? info "VPC" - - === "VPC" - -
- - ```yaml - projects: - - name: main - backends: - - type: gcp - project_id: gcp-project-id - creds: - type: default - - vpc_name: my-custom-vpc - ``` - -
- - === "Shared VPC" - -
- - ```yaml - projects: - - name: main - backends: - - type: gcp - project_id: gcp-project-id - creds: - type: default - - vpc_name: my-custom-vpc - vpc_project_id: another-project-id - ``` - -
- - When using a Shared VPC, ensure there is a firewall rule allowing `INGRESS` traffic on port `22`. - You can limit this rule to `dstack` instances using the `dstack-runner-instance` target tag. - - When using GCP gateways with a Shared VPC, also ensure there is a firewall rule allowing `INGRESS` traffic on ports `22`, `80`, `443`. - You can limit this rule to `dstack` gateway instances using the `dstack-gateway-instance` target tag. - - To use TPUs with a Shared VPC, you need to grant the TPU Service Account in your service project permissions - to manage resources in the host project by granting the "TPU Shared VPC Agent" (roles/tpu.xpnAgent) role - ([more in the GCP docs](https://cloud.google.com/tpu/docs/shared-vpc-networks#vpc-shared-vpc)). - -??? info "Private subnets" - By default, `dstack` provisions instances with public IPs and permits inbound SSH traffic. - If you want `dstack` to use private subnets and provision instances without public IPs, set `public_ips` to `false`. - - ```yaml - projects: - - name: main - backends: - - type: gcp - creds: - type: default - - public_ips: false - ``` - - Using private subnets assumes that both the `dstack` server and users can access the configured VPC's private subnets. - Additionally, [Cloud NAT](https://cloud.google.com/nat/docs/overview) must be configured to provide access to external resources for provisioned instances. - -#### Lambda - -Log into your [Lambda Cloud :material-arrow-top-right-thin:{ .external }](https://lambdalabs.com/service/gpu-cloud) account, click API keys in the sidebar, and then click the `Generate API key` -button to create a new API key. - -Then, go ahead and configure the backend: - -
- -```yaml -projects: -- name: main - backends: - - type: lambda - creds: - type: api_key - api_key: eersct_yrpiey-naaeedst-tk-_cb6ba38e1128464aea9bcc619e4ba2a5.iijPMi07obgt6TZ87v5qAEj61RVxhd0p -``` - -
- -#### RunPod - -Log into your [RunPod :material-arrow-top-right-thin:{ .external }](https://www.runpod.io/console/) console, click Settings in the sidebar, expand the `API Keys` section, and click -the button to create a Read & Write key. - -Then proceed to configuring the backend. - -
- -```yaml -projects: - - name: main - backends: - - type: runpod - creds: - type: api_key - api_key: US9XTPDIV8AR42MMINY8TCKRB8S4E7LNRQ6CAUQ9 -``` - -
- -#### Vast.ai - -Log into your [Vast.ai :material-arrow-top-right-thin:{ .external }](https://cloud.vast.ai/) account, click Account in the sidebar, and copy your -API Key. - -Then, go ahead and configure the backend: - -
- -```yaml -projects: -- name: main - backends: - - type: vastai - creds: - type: api_key - api_key: d75789f22f1908e0527c78a283b523dd73051c8c7d05456516fc91e9d4efd8c5 -``` - -
- -Also, the `vastai` backend supports on-demand instances only. Spot instance support coming soon. - -#### TensorDock - -Log into your [TensorDock :material-arrow-top-right-thin:{ .external }](https://dashboard.tensordock.com/) account, click Developers in the sidebar, and use the `Create an Authorization` section to create a new authorization key. - -Then, go ahead and configure the backend: - -
- -```yaml -projects: - - name: main - backends: - - type: tensordock - creds: - type: api_key - api_key: 248e621d-9317-7494-dc1557fa5825b-98b - api_token: FyBI3YbnFEYXdth2xqYRnQI7hiusssBC -``` - -
- -The `tensordock` backend supports on-demand instances only. Spot instance support coming soon. - -#### CUDO - -Log into your [CUDO Compute :material-arrow-top-right-thin:{ .external }](https://compute.cudo.org/) account, click API keys in the sidebar, and click the `Create an API key` button. - -Ensure you've created a project with CUDO Compute, then proceed to configuring the backend. - -
- -```yaml -projects: - - name: main - backends: - - type: cudo - project_id: my-cudo-project - creds: - type: api_key - api_key: 7487240a466624b48de22865589 -``` - -
- -#### OCI - -There are two ways to configure OCI: using client credentials or using the default credentials. - -=== "Default credentials" - If you have default credentials set up in `~/.oci/config`, configure the backend like this: - -
- - ```yaml - projects: - - name: main - backends: - - type: oci - creds: - type: default - ``` - -
- -=== "Client credentials" - - Log into the [OCI Console :material-arrow-top-right-thin:{ .external }](https://cloud.oracle.com), go to `My profile`, - select `API keys`, and click `Add API key`. - - Once you add a key, you'll see the configuration file. Copy its values to configure the backend as follows: - -
- - ```yaml - projects: - - name: main - backends: - - type: oci - creds: - type: client - user: ocid1.user.oc1..g5vlaeqfu47akmaafq665xsgmyaqjktyfxtacfxc4ftjxuca7aohnd2ev66m - tenancy: ocid1.tenancy.oc1..ajqsftvk4qarcfaak3ha4ycdsaahxmaita5frdwg3tqo2bcokpd3n7oizwai - region: eu-frankfurt-1 - fingerprint: 77:32:77:00:49:7c:cb:56:84:75:8e:77:96:7d:53:17 - key_file: ~/.oci/private_key.pem - ``` - -
- - Make sure to include either the path to your private key via `key_file` or the contents of the key via `key_content`. - -??? info "Required permissions" - - This is an example of a restrictive policy for a group of `dstack` users: - - ``` - Allow group to read compartments in tenancy where target.compartment.name = '' - Allow group to read marketplace-community-listings in compartment - Allow group to manage app-catalog-listing in compartment - Allow group to manage instances in compartment - Allow group to manage compute-capacity-reports in compartment - Allow group to manage volumes in compartment - Allow group to manage volume-attachments in compartment - Allow group to manage virtual-network-family in compartment - ``` - - To use this policy, create a compartment for `dstack` and specify it in `~/.dstack/server/config.yml`. - - ```yaml - projects: - - name: main - backends: - - type: oci - creds: - type: default - compartment_id: ocid1.compartment.oc1..aaaaaaaa - ``` - -#### DataCrunch - -Log into your [DataCrunch :material-arrow-top-right-thin:{ .external }](https://cloud.datacrunch.io/) account, click Keys in the sidebar, find `REST API Credentials` area and then click the `Generate Credentials` button. - -Then, go ahead and configure the backend: - -
- -```yaml -projects: - - name: main - backends: - - type: datacrunch - creds: - type: api_key - client_id: xfaHBqYEsArqhKWX-e52x3HH7w8T - client_secret: B5ZU5Qx9Nt8oGMlmMhNI3iglK8bjMhagTbylZy4WzncZe39995f7Vxh8 -``` - -
- -### On-prem servers { #on-prem } - -#### SSH fleets - -> For using `dstack` with on-prem servers, no backend configuration is required. -See [SSH fleets](../../concepts/fleets.md#ssh-fleets) for more details. - -#### Kubernetes - -To configure a Kubernetes backend, specify the path to the kubeconfig file, -and the port that `dstack` can use for proxying SSH traffic. -In case of a self-managed cluster, also specify the IP address of any node in the cluster. - -[//]: # (TODO: Mention that the Kind context has to be selected via `current-context` ) - -=== "Self-managed" - - Here's how to configure the backend to use a self-managed cluster. - -
- - ```yaml - projects: - - name: main - backends: - - type: kubernetes - kubeconfig: - filename: ~/.kube/config - networking: - ssh_host: localhost # The external IP address of any node - ssh_port: 32000 # Any port accessible outside of the cluster - ``` - -
- - The port specified to `ssh_port` must be accessible outside of the cluster. - - ??? info "Kind" - If you are using [Kind](https://kind.sigs.k8s.io/), make sure to make - to set up `ssh_port` via `extraPortMappings` for proxying SSH traffic: - - ```yaml - kind: Cluster - apiVersion: kind.x-k8s.io/v1alpha4 - nodes: - - role: control-plane - extraPortMappings: - - containerPort: 32000 # Must be same as `ssh_port` - hostPort: 32000 # Must be same as `ssh_port` - ``` - - Go ahead and create the cluster like this: - - ```shell - kind create cluster --config examples/misc/kubernetes/kind-config.yml - ``` - -[//]: # (TODO: Elaborate on the Kind's IP address on Linux) - -=== "Managed" - Here's how to configure the backend to use a managed cluster (AWS, GCP, Azure). - -
- - ```yaml - projects: - - name: main - backends: - - type: kubernetes - kubeconfig: - filename: ~/.kube/config - networking: - ssh_port: 32000 # Any port accessible outside of the cluster - ``` - -
- - The port specified to `ssh_port` must be accessible outside of the cluster. - - ??? info "EKS" - For example, if you are using EKS, make sure to add it via an ingress rule - of the corresponding security group: - - ```shell - aws ec2 authorize-security-group-ingress --group-id --protocol tcp --port 32000 --cidr 0.0.0.0/0 - ``` - -[//]: # (TODO: Elaborate on gateways, and what backends allow configuring them) - -[//]: # (TODO: Should we automatically detect ~/.kube/config) - -??? info "NVIDIA GPU Operator" - To use GPUs with Kubernetes, the cluster must be installed with the - [NVIDIA GPU Operator :material-arrow-top-right-thin:{ .external }](https://docs.nvidia.com/datacenter/cloud-native/gpu-operator/latest/index.html). - - [//]: # (TODO: Provide short yet clear instructions. Elaborate on whether it works with Kind.) - -## Enable encryption { #encryption } - -By default, `dstack` stores data in plaintext. To enforce encryption, you -specify one or more encryption keys. - -`dstack` currently supports AES and identity (plaintext) encryption keys. -Support for external providers like HashiCorp Vault and AWS KMS is planned. - -=== "AES" - The `aes` encryption key encrypts data using [AES-256](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) in GCM mode. - To configure the `aes` encryption, generate a random 32-byte key: - -
- - ```shell - $ head -c 32 /dev/urandom | base64 - - opmx+r5xGJNVZeErnR0+n+ElF9ajzde37uggELxL - ``` - -
- - And specify it as `secret`: - - ```yaml - encryption: - keys: - - type: aes - name: key1 - secret: opmx+r5xGJNVZeErnR0+n+ElF9ajzde37uggELxL - ``` - -=== "Identity" - The `identity` encryption performs no encryption and stores data in plaintext. - You can specify an `identity` encryption key explicitly if you want to decrypt the data: - - ```yaml - encryption: - keys: - - type: identity - - type: aes - name: key1 - secret: opmx+r5xGJNVZeErnR0+n+ElF9ajzde37uggELxL - ``` - - With this configuration, the `aes` key will still be used to decrypt the old data, - but new writes will store the data in plaintext. - -??? info "Key rotation" - If multiple keys are specified, the first is used for encryption, and all are tried for decryption. This enables key - rotation by specifying a new encryption key. - - ```yaml - encryption: - keys: - - type: aes - name: key2 - secret: cR2r1JmkPyL6edBQeHKz6ZBjCfS2oWk87Gc2G3wHVoA= - - - type: aes - name: key1 - secret: E5yzN6V3XvBq/f085ISWFCdgnOGED0kuFaAkASlmmO4= - ``` - - Old keys may be deleted once all existing records have been updated to re-encrypt sensitive data. - Encrypted values are prefixed with key names, allowing DB admins to identify the keys used for encryption. - -[//]: # (## Default permissions) - -[//]: # (`dstack` supports changing default permissions. For example, by default all users) -[//]: # (can create and manage their own projects. You can specify `default_permissions`) -[//]: # (so that only global admins can create and manage projects:) - -[//]: # (
) - -[//]: # (```yaml) -[//]: # (default_permissions:) -[//]: # ( allow_non_admins_create_projects: false) -[//]: # (```) - -[//]: # (
) - -See the [reference table](#default-permissions) for all configurable permissions. - ## Root reference #SCHEMA# dstack._internal.server.services.config.ServerConfig overrides: show_root_heading: false -## `projects[n]` { #_projects data-toc-label="projects" } +### `projects[n]` { #projects data-toc-label="projects" } #SCHEMA# dstack._internal.server.services.config.ProjectConfig overrides: @@ -941,7 +18,9 @@ See the [reference table](#default-permissions) for all configurable permissions backends: type: 'Union[AWSConfigInfoWithCreds, AzureConfigInfoWithCreds, GCPConfigInfoWithCreds, LambdaConfigInfoWithCreds, TensorDockConfigInfoWithCreds, VastAIConfigInfoWithCreds, KubernetesConfig]' -## `projects[n].backends[type=aws]` { #_aws data-toc-label="backends[type=aws]" } +#### `projects[n].backends` { #backends data-toc-label="backends" } + +##### `projects[n].backends[type=aws]` { #aws data-toc-label="aws" } #SCHEMA# dstack._internal.server.services.config.AWSConfig overrides: @@ -950,7 +29,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: aws- -## `projects[n].backends[type=aws].creds` { #_aws-creds data-toc-label="backends[type=aws].creds" } +###### `projects[n].backends[type=aws].creds` { #aws-creds data-toc-label="creds" } === "Access key" #SCHEMA# dstack._internal.core.models.backends.aws.AWSAccessKeyCreds @@ -966,15 +45,24 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=aws].os_images` { #_aws-os-images data-toc-label="backends[type=aws].os_images" } +###### `projects[n].backends[type=aws].os_images` { #aws-os_images data-toc-label="os_images" } #SCHEMA# dstack._internal.core.models.backends.aws.AWSOSImageConfig + overrides: + show_root_heading: false + type: + required: true + item_id_prefix: aws-os_images- + +###### `projects[n].backends[type=aws].os_images.cpu` { #aws-os_images-cpu data-toc-label="cpu" } + +#SCHEMA# dstack._internal.core.models.backends.aws.AWSOSImage overrides: show_root_heading: false type: required: true -## `projects[n].backends[type=aws].os_images.*` { #_aws-os-image data-toc-label="backends[type=aws].os_images.*" } +###### `projects[n].backends[type=aws].os_images.nvidia` { #aws-os_images-nvidia data-toc-label="nvidia" } #SCHEMA# dstack._internal.core.models.backends.aws.AWSOSImage overrides: @@ -982,7 +70,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=azure]` { #_azure data-toc-label="backends[type=azure]" } +##### `projects[n].backends[type=azure]` { #azure data-toc-label="azure" } #SCHEMA# dstack._internal.server.services.config.AzureConfig overrides: @@ -991,7 +79,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: azure- -## `projects[n].backends[type=azure].creds` { #_azure-creds data-toc-label="backends[type=azure].creds" } +###### `projects[n].backends[type=azure].creds` { #azure-creds data-toc-label="creds" } === "Client" #SCHEMA# dstack._internal.core.models.backends.azure.AzureClientCreds @@ -1007,7 +95,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=gcp]` { #_gcp data-toc-label="backends[type=gcp]" } +##### `projects[n].backends[type=gcp]` { #gcp data-toc-label="gcp" } #SCHEMA# dstack._internal.server.services.config.GCPConfig overrides: @@ -1016,7 +104,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: gcp- -## `projects[n].backends[type=gcp].creds` { #_gcp-creds data-toc-label="backends[type=gcp].creds" } +###### `projects[n].backends[type=gcp].creds` { #gcp-creds data-toc-label="creds" } === "Service account" #SCHEMA# dstack._internal.server.services.config.GCPServiceAccountCreds @@ -1039,7 +127,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=lambda]` { #_lambda data-toc-label="backends[type=lambda]" } +##### `projects[n].backends[type=lambda]` { #lambda data-toc-label="lambda" } #SCHEMA# dstack._internal.server.services.config.LambdaConfig overrides: @@ -1048,7 +136,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: lambda- -## `projects[n].backends[type=lambda].creds` { #_lambda-creds data-toc-label="backends[type=lambda].creds" } +###### `projects[n].backends[type=lambda].creds` { #lambda-creds data-toc-label="creds" } #SCHEMA# dstack._internal.core.models.backends.lambdalabs.LambdaAPIKeyCreds overrides: @@ -1056,7 +144,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=runpod]` { #_runpod data-toc-label="backends[type=runpod]" } +###### `projects[n].backends[type=runpod]` { #runpod data-toc-label="runpod" } #SCHEMA# dstack._internal.server.services.config.RunpodConfig overrides: @@ -1065,7 +153,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: runpod- -## `projects[n].backends[type=runpod].creds` { #_runpod-creds data-toc-label="backends[type=runpod].creds" } +###### `projects[n].backends[type=runpod].creds` { #runpod-creds data-toc-label="creds" } #SCHEMA# dstack._internal.core.models.backends.runpod.RunpodAPIKeyCreds overrides: @@ -1073,7 +161,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=vastai]` { #_vastai data-toc-label="backends[type=vastai]" } +###### `projects[n].backends[type=vastai]` { #vastai data-toc-label="vastai" } #SCHEMA# dstack._internal.server.services.config.VastAIConfig overrides: @@ -1082,7 +170,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: vastai- -## `projects[n].backends[type=vastai].creds` { #_vastai-creds data-toc-label="backends[type=vastai].creds" } +###### `projects[n].backends[type=vastai].creds` { #vastai-creds data-toc-label="creds" } #SCHEMA# dstack._internal.core.models.backends.vastai.VastAIAPIKeyCreds overrides: @@ -1090,7 +178,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=tensordock]` { #_tensordock data-toc-label="backends[type=tensordock]" } +##### `projects[n].backends[type=tensordock]` { #tensordock data-toc-label="tensordock" } #SCHEMA# dstack._internal.server.services.config.TensorDockConfig overrides: @@ -1099,7 +187,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: tensordock- -## `projects[n].backends[type=tensordock].creds` { #_tensordock-creds data-toc-label="backends[type=tensordock].creds" } +###### `projects[n].backends[type=tensordock].creds` { #tensordock-creds data-toc-label="creds" } #SCHEMA# dstack._internal.core.models.backends.tensordock.TensorDockAPIKeyCreds overrides: @@ -1107,7 +195,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=oci]` { #_oci data-toc-label="backends[type=oci]" } +##### `projects[n].backends[type=oci]` { #oci data-toc-label="oci" } #SCHEMA# dstack._internal.server.services.config.OCIConfig overrides: @@ -1116,7 +204,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: oci- -## `projects[n].backends[type=oci].creds` { #_oci-creds data-toc-label="backends[type=oci].creds" } +###### `projects[n].backends[type=oci].creds` { #oci-creds data-toc-label="creds" } === "Client" #SCHEMA# dstack._internal.core.models.backends.oci.OCIClientCreds @@ -1132,7 +220,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=cudo]` { #_cudo data-toc-label="backends[type=cudo]" } +##### `projects[n].backends[type=cudo]` { #cudo data-toc-label="cudo" } #SCHEMA# dstack._internal.server.services.config.CudoConfig overrides: @@ -1141,7 +229,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: cudo- -## `projects[n].backends[type=cudo].creds` { #_cudo-creds data-toc-label="backends[type=cudo].creds" } +###### `projects[n].backends[type=cudo].creds` { #cudo-creds data-toc-label="creds" } #SCHEMA# dstack._internal.core.models.backends.cudo.CudoAPIKeyCreds overrides: @@ -1149,7 +237,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=datacrunch]` { #_datacrunch data-toc-label="backends[type=datacrunch]" } +##### `projects[n].backends[type=datacrunch]` { #datacrunch data-toc-label="datacrunch" } #SCHEMA# dstack._internal.server.services.config.DataCrunchConfig overrides: @@ -1158,7 +246,7 @@ See the [reference table](#default-permissions) for all configurable permissions required: true item_id_prefix: datacrunch- -## `projects[n].backends[type=datacrunch].creds` { #_datacrunch-creds data-toc-label="backends[type=datacrunch].creds" } +###### `projects[n].backends[type=datacrunch].creds` { #datacrunch-creds data-toc-label="creds" } #SCHEMA# dstack._internal.core.models.backends.datacrunch.DataCrunchAPIKeyCreds overrides: @@ -1166,15 +254,16 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `projects[n].backends[type=kubernetes]` { #_kubernetes data-toc-label="backends[type=kubernetes]" } +##### `projects[n].backends[type=kubernetes]` { #kubernetes data-toc-label="kubernetes" } #SCHEMA# dstack._internal.server.services.config.KubernetesConfig overrides: show_root_heading: false type: required: true + item_id_prefix: kubernetes- -## `projects[n].backends[type=kubernetes].kubeconfig` { #_kubeconfig data-toc-label="kubeconfig" } +###### `projects[n].backends[type=kubernetes].kubeconfig` { #kubernetes-kubeconfig data-toc-label="kubeconfig" } ##SCHEMA# dstack._internal.server.services.config.KubeconfigConfig overrides: @@ -1187,19 +276,21 @@ See the [reference table](#default-permissions) for all configurable permissions cat my-service-account-file.json | jq -c | jq -R ``` -## `projects[n].backends[type=kubernetes].networking` { #_networking data-toc-label="networking" } +###### `projects[n].backends[type=kubernetes].networking` { #kuberentes-networking data-toc-label="networking" } ##SCHEMA# dstack._internal.core.models.backends.kubernetes.KubernetesNetworkingConfig overrides: show_root_heading: false -## `encryption` { #_encryption data-toc-label="encryption" } +### `encryption` { #encryption data-toc-label="encryption" } #SCHEMA# dstack._internal.server.services.config.EncryptionConfig overrides: show_root_heading: false -## `encryption.keys[n][type=identity]` { #_encryption-keys-identity data-toc-label="encryption.keys.identity" } +#### `encryption.keys` { #encryption-keys data-toc-label="keys" } + +##### `encryption.keys[n][type=identity]` { #encryption-keys-identity data-toc-label="identity" } #SCHEMA# dstack._internal.server.services.encryption.keys.identity.IdentityEncryptionKeyConfig overrides: @@ -1207,7 +298,7 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `encryption.keys[n][type=aes]` { #_encryption-keys-aes data-toc-label="encryption.keys.aes" } +##### `encryption.keys[n][type=aes]` { #encryption-keys-aes data-toc-label="aes" } #SCHEMA# dstack._internal.server.services.encryption.keys.aes.AESEncryptionKeyConfig overrides: @@ -1215,8 +306,97 @@ See the [reference table](#default-permissions) for all configurable permissions type: required: true -## `default_permissions` { #_default-permissions data-toc-label="default-permissions" } +### `default_permissions` { #default_permissions data-toc-label="default_permissions" } #SCHEMA# dstack._internal.server.services.permissions.DefaultPermissions overrides: show_root_heading: false + +## Examples + +> The `dstack` server allows you to configure backends for multiple projects. +> If you don't need multiple projects, use only the `main` project. + +### Encryption keys { #examples-encryption } + +By default, `dstack` stores data in plaintext. To enforce encryption, you +specify one or more encryption keys. + +`dstack` currently supports AES and identity (plaintext) encryption keys. +Support for external providers like HashiCorp Vault and AWS KMS is planned. + +=== "AES" + The `aes` encryption key encrypts data using [AES-256](https://en.wikipedia.org/wiki/Advanced_Encryption_Standard) in GCM mode. + To configure the `aes` encryption, generate a random 32-byte key: + +
+ + ```shell + $ head -c 32 /dev/urandom | base64 + + opmx+r5xGJNVZeErnR0+n+ElF9ajzde37uggELxL + ``` + +
+ + And specify it as `secret`: + + ```yaml + encryption: + keys: + - type: aes + name: key1 + secret: opmx+r5xGJNVZeErnR0+n+ElF9ajzde37uggELxL + ``` + +=== "Identity" + The `identity` encryption performs no encryption and stores data in plaintext. + You can specify an `identity` encryption key explicitly if you want to decrypt the data: + + ```yaml + encryption: + keys: + - type: identity + - type: aes + name: key1 + secret: opmx+r5xGJNVZeErnR0+n+ElF9ajzde37uggELxL + ``` + + With this configuration, the `aes` key will still be used to decrypt the old data, + but new writes will store the data in plaintext. + +??? info "Key rotation" + If multiple keys are specified, the first is used for encryption, and all are tried for decryption. This enables key + rotation by specifying a new encryption key. + + ```yaml + encryption: + keys: + - type: aes + name: key2 + secret: cR2r1JmkPyL6edBQeHKz6ZBjCfS2oWk87Gc2G3wHVoA= + + - type: aes + name: key1 + secret: E5yzN6V3XvBq/f085ISWFCdgnOGED0kuFaAkASlmmO4= + ``` + + Old keys may be deleted once all existing records have been updated to re-encrypt sensitive data. + Encrypted values are prefixed with key names, allowing DB admins to identify the keys used for encryption. + +[//]: # (## Default permissions) + +[//]: # (`dstack` supports changing default permissions. For example, by default all users) +[//]: # (can create and manage their own projects. You can specify `default_permissions`) +[//]: # (so that only global admins can create and manage projects:) + +[//]: # (
) + +[//]: # (```yaml) +[//]: # (default_permissions:) +[//]: # ( allow_non_admins_create_projects: false) +[//]: # (```) + +[//]: # (
) + +See the [reference table](#default-permissions) for all configurable permissions. \ No newline at end of file diff --git a/docs/overrides/home.html b/docs/overrides/home.html index b6099eb53..0ca4f1b69 100644 --- a/docs/overrides/home.html +++ b/docs/overrides/home.html @@ -604,4 +604,4 @@

FAQ

-{% endblock %} +{% endblock %} \ No newline at end of file diff --git a/docs/overrides/main.html b/docs/overrides/main.html index f824ceb57..14110a795 100644 --- a/docs/overrides/main.html +++ b/docs/overrides/main.html @@ -134,4 +134,31 @@ +{% endblock %} + +{% block site_nav %} + {% if nav %} + {% if page.meta and page.meta.hide %} + {% set hidden = "hidden" if "navigation" in page.meta.hide %} + {% endif %} + + {% endif %} + {% if "toc.integrate" not in features %} + {% if page.meta and page.meta.hide %} + {% set hidden = "hidden" if "toc" in page.meta.hide %} + {% endif %} + + {% endif %} {% endblock %} \ No newline at end of file diff --git a/docs/overrides/toc-item.html b/docs/overrides/toc-item.html new file mode 100644 index 000000000..a4618bcbd --- /dev/null +++ b/docs/overrides/toc-item.html @@ -0,0 +1,25 @@ +{#- + This file was automatically generated - do not edit +-#} +
  • + + + {% if toc_item.typeset %} + + {{ toc_item.typeset.title }} + + {% else %} + {{ toc_item.title }} + {% endif %} + + + {% if toc_item.children %} + + {% endif %} +
  • diff --git a/docs/overrides/toc.html b/docs/overrides/toc.html new file mode 100644 index 000000000..577f4988a --- /dev/null +++ b/docs/overrides/toc.html @@ -0,0 +1,25 @@ +{#- + This file was automatically generated - do not edit +-#} +{% set title = lang.t("toc") %} +{% if config.mdx_configs.toc and config.mdx_configs.toc.title %} + {% set title = config.mdx_configs.toc.title %} +{% endif %} + diff --git a/mkdocs.yml b/mkdocs.yml index 448c24ef9..ebbb76c0c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -109,9 +109,10 @@ plugins: 'changelog/0.16.1.md': 'https://github.com/dstackai/dstack/releases/0.16.1' 'changelog/0.17.0.md': 'https://github.com/dstackai/dstack/releases/0.17.0' 'changelog/0.18.0.md': 'https://github.com/dstackai/dstack/releases/0.18.0' - 'docs/concepts/dev-environments.md': 'docs/dev-environments.md' - 'docs/concepts/tasks.md': 'docs/tasks.md' - 'docs/concepts/services.md': 'docs/services.md' + 'docs/concepts/projects.md': 'docs/guides/administration.md' + 'docs/dev-environments.md': 'docs/concepts/dev-environments.md' + 'docs/tasks.md': 'docs/concepts/tasks.md' + 'docs/services.md': 'docs/concepts/services.md' 'docs/fleets.md': 'docs/concepts/fleets.md' 'docs/examples/index.md': 'examples.md' 'docs/examples/llms/llama31.md': 'examples/llms/llama31/index.md' @@ -164,7 +165,7 @@ markdown_extensions: - pymdownx.tasklist: custom_checkbox: true - toc: - toc_depth: 3 + toc_depth: 5 permalink: true - attr_list - md_in_html @@ -208,16 +209,17 @@ nav: - Installation: docs/installation/index.md - Quickstart: docs/quickstart.md - Concepts: - - Dev environments: docs/dev-environments.md - - Tasks: docs/tasks.md - - Services: docs/services.md + - Backends: docs/concepts/backends.md + - Dev environments: docs/concepts/dev-environments.md + - Tasks: docs/concepts/tasks.md + - Services: docs/concepts/services.md - Repos: docs/concepts/repos.md - Fleets: docs/concepts/fleets.md - Volumes: docs/concepts/volumes.md - Gateways: docs/concepts/gateways.md - - Projects: docs/concepts/projects.md - Guides: - Protips: docs/guides/protips.md + - Administration: docs/guides/administration.md - Server deployment: docs/guides/server-deployment.md - Troubleshooting: docs/guides/troubleshooting.md - Reference: diff --git a/scripts/docs/gen_schema_reference.py b/scripts/docs/gen_schema_reference.py index 382bb4ae4..4337c0a76 100644 --- a/scripts/docs/gen_schema_reference.py +++ b/scripts/docs/gen_schema_reference.py @@ -77,11 +77,11 @@ def generate_schema_reference( if field_type: if field.annotation.__name__ == "Annotated": if field_type.__name__ == "Optional": - field_type = get_args(get_args(field.annotation)[0])[0] + field_type = get_args(field_type)[0] if field_type.__name__ == "List": - field_type = get_args(get_args(field.annotation)[0])[0] + field_type = get_args(field_type)[0] if field_type.__name__ == "Union": - field_type = get_args(get_args(field.annotation)[0])[0] + field_type = get_args(field_type)[0] base_model = ( inspect.isclass(field_type) and issubclass(field_type, BaseModel) @@ -122,7 +122,7 @@ def generate_schema_reference( prefix + " ".join( [ - f"#### {item_header}", + f"###### {item_header}", "-", item_optional_marker, item_description, diff --git a/src/dstack/_internal/core/models/gateways.py b/src/dstack/_internal/core/models/gateways.py index 71b859866..db39ba3c1 100644 --- a/src/dstack/_internal/core/models/gateways.py +++ b/src/dstack/_internal/core/models/gateways.py @@ -132,7 +132,9 @@ class TGIChatModel(BaseChatModel): eos_token (Optional[str]): The custom end of sentence token. If not specified, the default end of sentence token from the HuggingFace Hub configuration will be used. """ - format: Annotated[Literal["tgi"], Field(description="The serving format")] + format: Annotated[ + Literal["tgi"], Field(description="The serving format. Must be set to `tgi`") + ] chat_template: Annotated[ Optional[str], Field( @@ -166,7 +168,9 @@ class OpenAIChatModel(BaseChatModel): prefix (str): The `base_url` prefix: `http://hostname/{prefix}/chat/completions`. Defaults to `/v1`. """ - format: Annotated[Literal["openai"], Field(description="The serving format")] + format: Annotated[ + Literal["openai"], Field(description="The serving format. Must be set to `openai`") + ] prefix: Annotated[str, Field(description="The `base_url` prefix (after hostname)")] = "/v1" From 5aff432c940a0af297d2a56eca88f1cef57d983f Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Sat, 4 Jan 2025 21:53:44 +0100 Subject: [PATCH 02/13] [Docs] Many docs improvements #2170 --- README.md | 2 +- docker/server/README.md | 2 +- docs/assets/stylesheets/extra.css | 23 +- .../posts/amd-mi300x-inference-benchmark.md | 6 +- docs/docs/concepts/backends.md | 11 +- docs/docs/concepts/dev-environments.md | 250 ++++++++--- docs/docs/concepts/fleets.md | 407 ++++++++++------- docs/docs/concepts/gateways.md | 16 +- docs/docs/concepts/services.md | 415 +++++++++++++----- docs/docs/concepts/snippets/manage-fleets.ext | 30 ++ docs/docs/concepts/snippets/manage-runs.ext | 23 + docs/docs/concepts/tasks.md | 375 +++++++++++++--- docs/docs/concepts/volumes.md | 179 ++++---- docs/docs/guides/protips.md | 2 +- docs/docs/guides/server-deployment.md | 2 +- docs/docs/index.md | 41 +- docs/docs/installation/index.md | 18 +- docs/docs/quickstart.md | 9 +- .../reference/dstack.yml/dev-environment.md | 261 ----------- docs/docs/reference/dstack.yml/gateway.md | 23 - docs/docs/reference/dstack.yml/service.md | 99 ----- docs/docs/reference/dstack.yml/task.md | 310 ------------- docs/docs/reference/dstack.yml/volume.md | 40 -- docs/overrides/home.html | 2 +- examples/accelerators/amd/README.md | 2 +- mkdocs.yml | 6 +- 26 files changed, 1262 insertions(+), 1292 deletions(-) create mode 100644 docs/docs/concepts/snippets/manage-fleets.ext create mode 100644 docs/docs/concepts/snippets/manage-runs.ext diff --git a/README.md b/README.md index 093076d68..4f849c393 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ for AI workloads both in the cloud and on-prem, speeding up the development, tra To use `dstack` with your own cloud accounts, create the `~/.dstack/server/config.yml` file and [configure backends](https://dstack.ai/docs/reference/server/config.yml). Alternatively, you can configure backends via the control plane UI after you start the server. -You can skip backends configuration if you intend to run containers only on your on-prem servers. Use [SSH fleets](https://dstack.ai/docs/concepts/fleets#ssh-fleets) for that. +You can skip backends configuration if you intend to run containers only on your on-prem servers. Use [SSH fleets](https://dstack.ai/docs/concepts/fleets#ssh) for that. ### Start the server diff --git a/docker/server/README.md b/docker/server/README.md index a608dbe86..ed96b9a32 100644 --- a/docker/server/README.md +++ b/docker/server/README.md @@ -39,7 +39,7 @@ Configuration is updated at ~/.dstack/config.yml ## Create SSH fleets If you want the `dstack` server to run containers on your on-prem servers, -use [fleets](https://dstack.ai/docs/concepts/fleets#ssh-fleets). +use [fleets](https://dstack.ai/docs/concepts/fleets#ssh). ## More information diff --git a/docs/assets/stylesheets/extra.css b/docs/assets/stylesheets/extra.css index 6f33b5eb2..34b40b9e0 100644 --- a/docs/assets/stylesheets/extra.css +++ b/docs/assets/stylesheets/extra.css @@ -23,6 +23,11 @@ font-size: 0.75rem; } } + +[dir=ltr] .md-typeset :is(.admonition,details) pre, [dir=ltr] .md-typeset :is(.admonition,details) :is(.admonition,details) { + margin-left: 32px; +} + @media screen and (max-width: 76.1875em) { .md-header { background-color: rgb(255, 255, 255); @@ -160,7 +165,7 @@ background-color: var(--md-default-fg-color); } -[dir=ltr] .md-typeset :is(.admonition,details) { +[dir=ltr] .md-typeset :is(details) { border-style: solid; /*border-width: 1px;*/ border-width: 0; @@ -170,6 +175,16 @@ background: -webkit-linear-gradient(45deg, rgba(0, 42, 255, 0.1), rgb(0 114 255 / 1%), rgba(0, 42, 255, 0.05)); } +[dir=ltr] .md-typeset :is(.admonition) { + border-style: solid; + border-color: rgba(0, 0, 0, 0.87); + border-width: 1px; + border-radius: 6px; + box-shadow: none; + padding: .6rem .8rem; + /*background: -webkit-linear-gradient(45deg, rgba(0, 42, 255, 0.1), rgb(0 114 255 / 1%), rgba(0, 42, 255, 0.05));*/ +} + .md-typeset iframe { border-radius: 6px; } @@ -178,7 +193,7 @@ margin-left: 32px; } -[dir=ltr] .md-typeset :is(.admonition,details):not(blockquote) > :is(.highlight,.termy,.md-typeset__scrollwrap,p,h4,h3,.tabbed-set):not(.admonition-title) { +[dir=ltr] .md-typeset :is(.admonition,details):not(blockquote) > :is(.md-typeset__scrollwrap,p,h4,h3,.tabbed-set):not(.admonition-title) { padding-left: 32px; } @@ -1091,7 +1106,7 @@ html .md-footer-meta.md-typeset a:is(:focus,:hover) { background: none; z-index: 1; padding: 5px; - border-radius: 12px; + border-radius: 6px; border: 1px solid black; bottom: -0.7px; top: -0.7px; @@ -1128,7 +1143,7 @@ html .md-footer-meta.md-typeset a:is(:focus,:hover) { height: 100%; background: -webkit-linear-gradient(45deg, rgba(0, 42, 255, 0.025), rgb(0 114 255 / 0.25%), rgba(0, 42, 255, 0.0125)); z-index: 1; - border-radius: 12px; + border-radius: 6px; border: 0.5px solid rgba(0,0,0, 0.5); overflow: unset; } diff --git a/docs/blog/posts/amd-mi300x-inference-benchmark.md b/docs/blog/posts/amd-mi300x-inference-benchmark.md index 58b978be4..494a98a8f 100644 --- a/docs/blog/posts/amd-mi300x-inference-benchmark.md +++ b/docs/blog/posts/amd-mi300x-inference-benchmark.md @@ -11,7 +11,7 @@ categories: # Benchmarking Llama 3.1 405B on 8x AMD MI300X GPUs -At `dstack`, we've been adding support for AMD GPUs with [SSH fleets](../../docs/concepts/fleets.md#ssh-fleets), +At `dstack`, we've been adding support for AMD GPUs with [SSH fleets](../../docs/concepts/fleets.md#ssh), so we saw this as a great chance to test our integration by benchmarking AMD GPUs. Our friends at [Hot Aisle :material-arrow-top-right-thin:{ .external }](https://hotaisle.xyz/){:target="_blank"}, who build top-tier bare metal compute for AMD GPUs, kindly provided the hardware for the benchmark. @@ -35,7 +35,7 @@ Here is the spec of the bare metal machine we got: ??? info "Set up an SSH fleet" Hot Aisle provided us with SSH access to the machine. To make it accessible via `dstack`, - we created an [SSH fleet](../../docs/concepts/fleets.md#ssh-fleets) using the following configuration: + we created an [SSH fleet](../../docs/concepts/fleets.md#ssh) using the following configuration:
    @@ -216,7 +216,7 @@ If you have questions, feedback, or want to help improve the benchmark, please r is the primary sponsor of this benchmark, and we are sincerely grateful for their hardware and support. If you'd like to use top-tier bare metal compute with AMD GPUs, we recommend going -with Hot Aisle. Once you gain access to a cluster, it can be easily accessed via `dstack`'s [SSH fleet](../../docs/concepts/fleets.md#ssh-fleets) easily. +with Hot Aisle. Once you gain access to a cluster, it can be easily accessed via `dstack`'s [SSH fleet](../../docs/concepts/fleets.md#ssh) easily. ### RunPod If you’d like to use on-demand compute with AMD GPUs at affordable prices, you can configure `dstack` to diff --git a/docs/docs/concepts/backends.md b/docs/docs/concepts/backends.md index 936e16182..52bb1bab5 100644 --- a/docs/docs/concepts/backends.md +++ b/docs/docs/concepts/backends.md @@ -1,12 +1,9 @@ # Backends -`dstack` can provision and manage compute across a variety of providers. +To use `dstack` with cloud providers, configure the appropriate backends. +This can be done either through `~/.dstack/server/config.yml` before starting the server, or via UI after the server is up. -To use `dstack` with specific providers, configure backends in the -`~/.dstack/server/config.yml` file before starting the server. -Alternatively, you can configure them via the control plane UI once the server is up. - -Below are examples of how to configure backends for each provider. +Below are examples of how to configure them via `~/.dstack/server/config.yml`. ## Cloud providers @@ -747,7 +744,7 @@ projects: ### SSH fleets > For using `dstack` with on-prem servers, no backend configuration is required. -> See [SSH fleets](fleets.md#ssh-fleets) for more details. +> See [SSH fleets](fleets.md#ssh) for more details. ### Kubernetes diff --git a/docs/docs/concepts/dev-environments.md b/docs/docs/concepts/dev-environments.md index b3859b15b..0b20d9c60 100644 --- a/docs/docs/concepts/dev-environments.md +++ b/docs/docs/concepts/dev-environments.md @@ -28,13 +28,155 @@ resources:
    -!!! info "Docker image" - If you don't specify your Docker image, `dstack` uses the [base](https://hub.docker.com/r/dstackai/base/tags) image - pre-configured with Python, Conda, and essential CUDA drivers. +### Resources + +When you specify a resource value like `cpu` or `memory`, +you can either use an exact value (e.g. `24GB`) or a +range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). + +
    + +```yaml +type: dev-environment +# The name is optional, if not specified, generated randomly +name: vscode + +ide: vscode + +resources: + # 200GB or more RAM + memory: 200GB.. + # 4 GPUs from 40GB to 80GB + gpu: 40GB..80GB:4 + # Shared memory (required by multi-gpu) + shm_size: 16GB + # Disk size + disk: 500GB +``` + +
    + +The `gpu` property allows specifying not only memory size but also GPU vendor, names +and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), +`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), +`A100:40GB:2` (two A100 GPUs of 40GB). + +??? info "Google Cloud TPU" + To use TPUs, specify its architecture via the `gpu` property. + + ```yaml + type: dev-environment + # The name is optional, if not specified, generated randomly + name: vscode + + ide: vscode + + resources: + gpu: v2-8 + ``` + + Currently, only 8 TPU cores can be specified, supporting single TPU device workloads. Multi-TPU support is coming soon. + +??? info "Shared memory" + If you are using parallel communicating processes (e.g., dataloaders in PyTorch), you may need to configure + `shm_size`, e.g. set it to `16GB`. + +### Python version + +If you don't specify `image`, `dstack` uses its base Docker image pre-configured with +`python`, `pip`, `conda` (Miniforge), and essential CUDA drivers. +The `python` property determines which default Docker image is used. + +??? info "nvcc" + By default, the base Docker image doesn’t include `nvcc`, which is required for building custom CUDA kernels. + If you need `nvcc`, set the [`nvcc`](../reference/dstack.yml/dev-environment.md#nvcc) property to true. + +### Docker + +If you want, you can specify your own Docker image via `image`. + +
    + +```yaml +type: dev-environment +# The name is optional, if not specified, generated randomly +name: vscode + +# Any custom Docker image +image: ghcr.io/huggingface/text-generation-inference:latest + +ide: vscode +``` + +
    + +??? info "Private registry" + + Use the `registry_auth` property to provide credentials for a private Docker registry. + + ```yaml + type: dev-environment + # The name is optional, if not specified, generated randomly + name: vscode + + # Any private Docker image + image: ghcr.io/huggingface/text-generation-inference:latest + # Credentials of the private Docker registry + registry_auth: + username: peterschmidt85 + password: ghp_e49HcZ9oYwBzUbcSk2080gXZOU2hiT9AeSR5 + + ide: vscode + ``` + +??? info "Privileged mode" + All backends except `runpod`, `vastai`, and `kubernetes` support running containers in privileged mode. + This mode enables features like using [Docker and Docker Compose](../guides/protips.md#docker-and-docker-compose) + inside `dstack` runs. + +### Environment variables + +
    + +```yaml +type: dev-environment +# The name is optional, if not specified, generated randomly +name: vscode + +# Environment variables +env: + - HF_TOKEN + - HF_HUB_ENABLE_HF_TRANSFER=1 + +ide: vscode +``` + +
    + +If you don't assign a value to an environment variable (see `HF_TOKEN` above), +`dstack` will require the value to be passed via the CLI or set in the current process. + +??? info "System environment variables" + The following environment variables are available in any run by default: + + | Name | Description | + |-------------------------|-----------------------------------------| + | `DSTACK_RUN_NAME` | The name of the run | + | `DSTACK_REPO_ID` | The ID of the repo | + | `DSTACK_GPUS_NUM` | The total number of GPUs in the run | + +### Spot policy + +By default, `dstack` uses on-demand instances. However, you can change that +via the [`spot_policy`](../reference/dstack.yml/dev-environment.md#spot_policy) property. It accepts `spot`, `on-demand`, and `auto`. !!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/dev-environment.md) for all the options supported by - dev environments, along with multiple examples. + Dev environments support many more configuration options, + incl. [`backends`](../reference/dstack.yml/dev-environment.md#backends), + [`regions`](../reference/dstack.yml/dev-environment.md#regions), + [`max_price`](../reference/dstack.yml/dev-environment.md#max_price), and + [`max_duration`](../reference/dstack.yml/dev-environment.md#max_duration), + among [others](../reference/dstack.yml/dev-environment.md). ## Run a configuration @@ -68,90 +210,52 @@ and sets up an IDE on the instance. On Windows, `dstack` works both natively and inside WSL. But, for dev environments, it's recommended _not to use_ `dstack apply` _inside WSL_ due to a [VS Code issue :material-arrow-top-right-thin:{ .external }](https://github.com/microsoft/vscode-remote-release/issues/937){:target="_blank"}. -### VS Code - To open the dev environment in your desktop IDE, use the link from the output (such as `vscode://vscode-remote/ssh-remote+fast-moth-1/workflow`). ![](../../assets/images/dstack-vscode-jupyter.png){ width=800 } -### SSH - -Alternatively, while the CLI is attached to the run, you can connect to the dev environment via SSH: - -
    +??? info "SSH" -```shell -$ ssh fast-moth-1 -``` - -
    + Alternatively, while the CLI is attached to the run, you can connect to the dev environment via SSH: + +
    + + ```shell + $ ssh fast-moth-1 + ``` + +
    -## Manage runs +### Retry policy -### List runs +By default, if `dstack` can't find capacity or the instance is interrupted, the run will fail. -The [`dstack ps`](../reference/cli/dstack/ps.md) command lists all running jobs and their statuses. -Use `--watch` (or `-w`) to monitor the live status of runs. +If you'd like `dstack` to automatically retry, configure the +[retry](../reference/dstack.yml/dev-environment.md#retry) property accordingly: -### Stop a run +
    -A dev environment runs until you stop it or its lifetime exceeds [`max_duration`](../reference/dstack.yml/dev-environment.md#max_duration). -To gracefully stop a dev environment, use [`dstack stop`](../reference/cli/dstack/stop.md). -Pass `--abort` or `-x` to stop without waiting for a graceful shutdown. - -### Attach to a run - -By default, `dstack apply` runs in attached mode – it establishes the SSH tunnel to the run, forwards ports, and shows real-time logs. -If you detached from a run, you can reattach to it using [`dstack attach`](../reference/cli/dstack/attach.md). - -### See run logs - -To see the logs of a run without attaching, use [`dstack logs`](../reference/cli/dstack/logs.md). -Pass `--diagnose`/`-d` to `dstack logs` to see the diagnostics logs. It may be useful if a run fails. -For more information on debugging failed runs, see the [troubleshooting](../guides/troubleshooting.md) guide. - -## Manage fleets - -Fleets are groups of cloud instances or SSH machines that you use to run dev environments, tasks, and services. -You can let `dstack apply` provision fleets or [create and manage them directly](../concepts/fleets.md). - -### Creation policy - -By default, when you run `dstack apply` with a dev environment, task, or service, -`dstack` reuses `idle` instances from an existing [fleet](../concepts/fleets.md). -If no `idle` instances match the requirements, `dstack` automatically creates a new fleet -using configured backends. - -To ensure `dstack apply` doesn't create a new fleet but reuses an existing one, -pass `-R` (or `--reuse`) to `dstack apply`. +```yaml +type: dev-environment +# The name is optional, if not specified, generated randomly +name: vscode -
    +ide: vscode -```shell -$ dstack apply -R -f examples/.dstack.yml +retry: + # Retry on specific events + on_events: [no-capacity, error, interruption] + # Retry for up to 1 hour + duration: 1h ```
    -Alternatively, set [`creation_policy`](../reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. +--8<-- "docs/concepts/snippets/manage-fleets.ext" -### Termination policy +--8<-- "docs/concepts/snippets/manage-runs.ext" -If a fleet is created automatically, it remains `idle` for 5 minutes and can be reused within that time. -To change the default idle duration, set -[`termination_idle_time`](../reference/dstack.yml/fleet.md#termination_idle_time) in the run configuration (e.g., to 0 or a -longer duration). - -!!! info "Fleets" - For greater control over fleet provisioning, configuration, and lifecycle management, it is recommended to use - [fleets](fleets.md) directly. - -## What's next? - -1. Read about [tasks](tasks.md), [services](services.md), and [repos](repos.md) -2. Learn how to manage [fleets](fleets.md) - -!!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/dev-environment.md) for all the options supported by - dev environments, along with multiple examples. +!!! info "What's next?" + 1. Read about [tasks](tasks.md), [services](services.md), and [repos](repos.md) + 2. Learn how to manage [fleets](fleets.md) \ No newline at end of file diff --git a/docs/docs/concepts/fleets.md b/docs/docs/concepts/fleets.md index c4fd8fd86..abe17221f 100644 --- a/docs/docs/concepts/fleets.md +++ b/docs/docs/concepts/fleets.md @@ -1,105 +1,223 @@ # Fleets -Fleets are groups of cloud instances or SSH machines that you use to run dev environments, tasks, and services. +Fleets are groups of instances used to run dev environments, tasks, and services. +Depending on the fleet configuration, instances can be interconnected clusters or standalone instances. -By default, when you run `dstack apply` to start a new dev environment, task, or service, -`dstack` reuses `idle` instances from an existing fleet. -If no `idle` instances match the requirements, `dstack` automatically creates a new fleet -using configured backends. +`dstack` supports two kinds of fleets: -If you need more control over instance configuration and lifecycle, or if you want to use on-prem servers, -`dstack` also offers you a way to create and manage fleets directly. +* [Cloud fleets](#cloud) – dynamically provisioned through configured backends +* [SSH fleets](#ssh) – created using on-prem servers -## Define a configuration +## Cloud fleets { #cloud } -To create a fleet, define its configuration as a YAML file in your project folder. -The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `fleet.dstack.yml` are both acceptable). +When you call `dstack apply` to run a dev environment, task, or service, `dstack` reuses `idle` instances +from an existing fleet. If none match the requirements, `dstack` creates a new cloud fleet. -=== "Cloud fleets" +For greater control over cloud fleet provisioning, create fleets explicitly using configuration files. - !!! info "What is a cloud fleet?" - By default, when running dev environments, tasks, and services, `dstack` - reuses `idle` instances from existing fleets or creates a new cloud fleet on the fly. - - If you want more control over the lifecycle of cloud instances, you can create a cloud fleet manually. - This allows you to reuse a fleet over a longer period and across multiple runs. You can also delete the fleet only when needed. +### Define a configuration - To create a cloud fleet, specify `resources`, `nodes`, - and other optional parameters. - -
    +Define a fleet configuration as a YAML file in your project directory. The file must have a +`.dstack.yml` extension (e.g. `.dstack.yml` or `fleet.dstack.yml`). + +
    ```yaml type: fleet # The name is optional, if not specified, generated randomly - name: fleet-distrib + name: my-fleet - # Number of instances + # Specify the number of instances nodes: 2 - # Ensure instances are inter-connected - placement: cluster + # Uncomment to ensure instances are inter-connected + #placement: cluster - # Terminate if idle for 3 days - termination_idle_time: 3d + resources: + gpu: 24GB + ``` + +
    + +#### Placement + +To ensure instances are interconnected (e.g., for +[distributed tasks](tasks.md#distributed-tasks)), set `placement` to `cluster`. +This ensures all instances are provisioned in the same backend and region with optimal inter-node connectivity + +??? info "AWS" + `dstack` automatically enables [Elastic Fabric Adapter :material-arrow-top-right-thin:{ .external }](https://aws.amazon.com/hpc/efa/){:target="_blank"} + for the instance types that support it: + `p5.48xlarge`, `p4d.24xlarge`, `g4dn.12xlarge`, `g4dn.16xlarge`, `g4dn.8xlarge`, `g4dn.metal`, + `g5.12xlarge`, `g5.16xlarge`, `g5.24xlarge`, `g5.48xlarge`, `g5.8xlarge`, `g6.12xlarge`, + `g6.16xlarge`, `g6.24xlarge`, `g6.48xlarge`, `g6.8xlarge`, and `gr6.8xlarge`. + + Currently, only one EFA interface is enabled per instance, regardless of its maximum capacity. + This will change once [this issue :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/issues/1804){:target="_blank"} is resolved. + +> The `cluster` placement is supported only for `aws`, `azure`, `gcp`, and `oci` +> backends. + +#### Resources + +When you specify a resource value like `cpu` or `memory`, +you can either use an exact value (e.g. `24GB`) or a +range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). + +
    +```yaml +type: fleet +# The name is optional, if not specified, generated randomly +name: my-fleet + +nodes: 2 + +resources: + # 200GB or more RAM + memory: 200GB.. + # 4 GPUs from 40GB to 80GB + gpu: 40GB..80GB:4 + # Disk size + disk: 500GB +``` + +
    + +The `gpu` property allows specifying not only memory size but also GPU vendor, names +and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), +`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), +`A100:40GB:2` (two A100 GPUs of 40GB). + +??? info "Google Cloud TPU" + To use TPUs, specify its architecture via the `gpu` property. + + ```yaml + type: dev-environment + # The name is optional, if not specified, generated randomly + name: vscode + + ide: vscode + resources: - gpu: - # 24GB or more vRAM - memory: 24GB.. - # Two or more GPUs - count: 2.. + gpu: v2-8 ``` + + Currently, only 8 TPU cores can be specified, supporting single TPU device workloads. Multi-TPU support is coming soon. + +#### Termination policy + +By default, fleet instances remain active until the fleet is explicitly deleted via `dstack fleet delete`. + +To automatically terminate `idle` instances after a certain period, configure `termination_idle_time`. + +
    -
    + ```yaml + type: fleet + # The name is optional, if not specified, generated randomly + name: my-fleet + + nodes: 2 + + # Terminate instances idle for more than 1 hour + termination_idle_time: 1h + + resources: + gpu: 24GB + ``` - When you apply this configuration, `dstack` will create cloud instances using the configured backends according - to the specified parameters. +
    + +#### Spot policy + +By default, `dstack` uses on-demand instances. However, you can change that +via the [`spot_policy`](../reference/dstack.yml/dev-environment.md#spot_policy) property. It accepts `spot`, `on-demand`, and `auto`. + +#### Retry policy - !!! info "Cluster placement" - To ensure the nodes of the fleet are interconnected (e.g., if you'd like to use them for - [multi-node tasks](../reference/dstack.yml/task.md#distributed-tasks)), - set `placement` to `cluster`. - - In this case, `dstack` will provision all nodes in the same backend and region and configure optimal - inter-node connectivity. +By default, if `dstack` fails to provision an instance or an instance is interrupted, no retry is attempted. - !!! info "Backends" - The `cluster` value of the `placement` property is supported only by the `aws`, `azure`, `gcp`, and `oci` - backends. +If you'd like `dstack` to do it, configure the +[retry](../reference/dstack.yml/dev-environment.md#retry) property accordingly: - ??? info "AWS" - `dstack` automatically enables [Elastic Fabric Adapter :material-arrow-top-right-thin:{ .external }](https://aws.amazon.com/hpc/efa/){:target="_blank"} - for instance types that support it. The following instance types with EFA are supported: - `p5.48xlarge`, `p4d.24xlarge`, `g4dn.12xlarge`, `g4dn.16xlarge`, `g4dn.8xlarge`, `g4dn.metal`, - `g5.12xlarge`, `g5.16xlarge`, `g5.24xlarge`, `g5.48xlarge`, `g5.8xlarge`, `g6.12xlarge`, - `g6.16xlarge`, `g6.24xlarge`, `g6.48xlarge`, `g6.8xlarge`, `gr6.8xlarge` +
    - Currently, only one EFA interface is enabled regardless of the maximum number of interfaces supported by the instance type. - This limitation will be lifted once [this issue :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/issues/1804){:target="_blank"} is fixed. +```yaml +type: fleet +# The name is optional, if not specified, generated randomly +name: my-fleet - !!! info "Backends" - Cloud fleets are supported for all backends except `kubernetes`, `vastai`, and `runpod`. +nodes: 1 -=== "SSH fleets" +resources: + gpu: 24GB - !!! info "What is an SSH fleet?" - If you’d like to run dev environments, tasks, and services on arbitrary on-prem servers via `dstack`, you can - create an SSH fleet. +retry: + # Retry on specific events + on_events: [no-capacity, interruption] + # Retry for up to 1 hour + duration: 1h +``` + +
    - To create an SSH fleet, specify `ssh_config` to allow the `dstack` server to connect to these servers - via SSH. +> Cloud fleets are supported by all backends except `kubernetes`, `vastai`, and `runpod`. -
    +!!! info "Reference" + Cloud fleets support many more configuration options, + incl. [`backends`](../reference/dstack.yml/fleet.md#backends), + [`regions`](../reference/dstack.yml/fleet.md#regions), + [`max_price`](../reference/dstack.yml/fleet.md#max_price), and + among [others](../reference/dstack.yml/fleet.md). + +### Create or update a fleet + +To create or update the fleet, pass the fleet configuration to [`dstack apply`](../reference/cli/dstack/apply.md): + +
    + +```shell +$ dstack apply -f examples/misc/fleets/.dstack.yml +``` + +
    + +To ensure the fleet is created, you can use the `dstack fleet` command: + +
    + +```shell +$ dstack fleet + + FLEET INSTANCE BACKEND GPU PRICE STATUS CREATED + my-fleet 0 gcp (europe-west-1) L4:24GB (spot) $0.1624 idle 3 mins ago + 1 gcp (europe-west-1) L4:24GB (spot) $0.1624 idle 3 mins ago +``` + +
    + +Once the status of instances changes to `idle`, they can be used by dev environments, tasks, and services. + +## SSH fleets { #ssh } + +If you have a group of on-prem servers accessible via SSH, you can create an SSH fleet. + +### Define a configuration + +Define a fleet configuration as a YAML file in your project directory. The file must have a +`.dstack.yml` extension (e.g. `.dstack.yml` or `fleet.dstack.yml`). + +
    ```yaml type: fleet # The name is optional, if not specified, generated randomly - name: fleet-distrib-ssh + name: my-fleet - # Ensure instances are inter-connected - placement: cluster + # Uncomment if instances are interconnected + #placement: cluster - # The user, private SSH key, and hostnames of the on-prem servers + # SSH credentials for the on-prem servers ssh_config: user: ubuntu identity_file: ~/.ssh/id_rsa @@ -108,114 +226,101 @@ The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `fleet.dstack.ym - 3.255.177.52 ``` -
    - - When you apply this configuration, `dstack` will connect to the specified hosts using the provided SSH credentials, - install the dependencies, and configure these servers as a fleet. - - !!! info "Requirements" - Hosts should be pre-installed with Docker. - - === "NVIDIA" - Systems with NVIDIA GPUs should also be pre-installed with CUDA 12.1 and - [NVIDIA Container Toolkit :material-arrow-top-right-thin:{ .external }](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html). - - === "AMD" - Systems with AMD GPUs should also be pre-installed with AMDGPU-DKMS kernel driver (e.g. via - [native package manager :material-arrow-top-right-thin:{ .external }](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/native-install/index.html) - or [AMDGPU installer :material-arrow-top-right-thin:{ .external }](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/amdgpu-install.html).) - - The user should have passwordless `sudo` access. - - ??? info "Environment variables" - For SSH fleets, it's possible to pre-configure environment variables. - These variables will be used when installing the `dstack-shim` service on hosts - and running containers. - - For example, these variables can be used to configure a proxy: - - ```yaml - type: fleet - name: my-fleet - - placement: cluster - - env: - - HTTP_PROXY=http://proxy.example.com:80 - - HTTPS_PROXY=http://proxy.example.com:80 - - NO_PROXY=localhost,127.0.0.1 - - ssh_config: - user: ubuntu - identity_file: ~/.ssh/id_rsa - hosts: - - 3.255.177.51 - - 3.255.177.52 - ``` - - !!! info "Cluster placement" - Set `placement` to `cluster` if the hosts are interconnected - (e.g. if you'd like to use them for [multi-node tasks](../reference/dstack.yml/task.md#distributed-tasks)). - - !!! info "Network" - By default, `dstack` automatically detects the private network for the specified hosts. - However, it's possible to configure it explicitelly via - the [`network`](../reference/dstack.yml/fleet.md#network) property. - - !!! info "Backends" - To use SSH fleets, you don't need to configure any backends at all. +
    + +??? info "Requirements" + 1. Hosts should be pre-installed with Docker. + + === "NVIDIA" + 2. Hosts with NVIDIA GPUs should also be pre-installed with CUDA 12.1 and + [NVIDIA Container Toolkit :material-arrow-top-right-thin:{ .external }](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html). + + === "AMD" + 2. Hosts with AMD GPUs should also be pre-installed with AMDGPU-DKMS kernel driver (e.g. via + [native package manager :material-arrow-top-right-thin:{ .external }](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/native-install/index.html) + or [AMDGPU installer :material-arrow-top-right-thin:{ .external }](https://rocm.docs.amd.com/projects/install-on-linux/en/latest/install/amdgpu-install.html).) + + 3. The user specified should have passwordless `sudo` access. + +#### Placement + +If the hosts are interconnected (i.e. share the same network), set `placement` to `cluster`. +This is required if you'd like to use the fleet for [distributed tasks](tasks.md#distributed-tasks). + +##### Network + +By default, `dstack` automatically detects the network shared by the hosts. +However, it's possible to configure it explicitly via +the [`network`](../reference/dstack.yml/fleet.md#network) property. + +[//]: # (TODO: Provide an example and more detail) + +#### Environment variables + +If needed, you can specify environment variables that will be used by `dstack-shim` and passed to containers. + +[//]: # (TODO: Explain what dstack-shim is) + +For example, these variables can be used to configure a proxy: + +```yaml +type: fleet +name: my-fleet + +env: + - HTTP_PROXY=http://proxy.example.com:80 + - HTTPS_PROXY=http://proxy.example.com:80 + - NO_PROXY=localhost,127.0.0.1 + +ssh_config: + user: ubuntu + identity_file: ~/.ssh/id_rsa + hosts: + - 3.255.177.51 + - 3.255.177.52 +``` !!! info "Reference" - See [`.dstack.yml`](../reference/dstack.yml/fleet.md) for all the options supported by - the fleet configuration. + For all SSH fleet configuration options, refer to the [reference](../reference/dstack.yml/fleet.md). -## Create or update a fleet +### Create or update a fleet To create or update the fleet, pass the fleet configuration to [`dstack apply`](../reference/cli/dstack/apply.md):
    ```shell -$ dstack apply -f examples/misc/fleets/distrib.dstack.yml +$ dstack apply -f examples/misc/fleets/.dstack.yml ```
    -### Ensure the fleet is created - -To ensure the fleet is created, use the `dstack fleet` command: +To ensure the fleet is created, you can use the `dstack fleet` command:
    ```shell $ dstack fleet - FLEET INSTANCE BACKEND GPU PRICE STATUS CREATED - my-fleet 0 gcp (europe-west-1) L4:24GB (spot) $0.1624 idle 3 mins ago - 1 gcp (europe-west-1) L4:24GB (spot) $0.1624 idle 3 mins ago + FLEET INSTANCE GPU PRICE STATUS CREATED + my-fleet 0 L4:24GB (spot) $0 idle 3 mins ago + 1 L4:24GB (spot) $0 idle 3 mins ago ```
    Once the status of instances changes to `idle`, they can be used by dev environments, tasks, and services. -!!! info "Termination policy" - If you want a fleet to be automatically deleted after a certain idle time, - you can set the [`termination_idle_time`](../reference/dstack.yml/fleet.md#termination_idle_time) property. - -[//]: # (Add Idle time example to the reference page) - -### Troubleshooting SSH fleets +#### Troubleshooting !!! info "Resources" - If you're creating an SSH fleet, ensure that the GPU, memory, and disk size are detected properly. - If GPU isn't detected, ensure that the hosts meet the requirements (see above). + Once the fleet is created, double-check that the GPU, memory, and disk are detected correctly. -If the status doesn't change to `idle` after a few minutes, ensure that -the hosts meet the requirements (see above). +If the status does not change to `idle` after a few minutes or the resources are not displayed correctly, ensure that +all host requirements are satisfied. -If the requirements are met but the fleet still fails to be created, check `/root/.dstack/shim.log` for logs -on the hosts specified in `ssh_config`. +If the requirements are met but the fleet still fails to be created correctly, check the logs at +`/root/.dstack/shim.log` on the hosts for error details. ## Manage fleets @@ -252,12 +357,6 @@ Fleet my-gcp-fleet deleted Alternatively, you can delete a fleet by passing the fleet name to `dstack fleet delete`. To terminate and delete specific instances from a fleet, pass `-i INSTANCE_NUM`. -## What's next? - -1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), and - [services](services.md) -2. Join the community via [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd) - -!!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/fleet.md) for all the options supported by - fleets, along with multiple examples. +!!! info "What's next?" + 1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), and + [services](services.md) \ No newline at end of file diff --git a/docs/docs/concepts/gateways.md b/docs/docs/concepts/gateways.md index 1e19feda0..c301a1f06 100644 --- a/docs/docs/concepts/gateways.md +++ b/docs/docs/concepts/gateways.md @@ -1,8 +1,8 @@ # Gateways Gateways manage the ingress traffic of running [services](services.md) -and provide them with an HTTPS endpoint mapped to your domain, -handling authentication, load distribution, and auto-scaling. +provide an HTTPS endpoint mapped to your domain, +and handling auto-scaling. > If you're using [dstack Sky :material-arrow-top-right-thin:{ .external }](https://sky.dstack.ai){:target="_blank"}, > the gateway is already set up for you. @@ -32,8 +32,7 @@ domain: example.com A domain name is required to create a gateway. !!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/gateway.md) for all the options supported by - gateways, along with multiple examples. + For all gateway configuration options, refer to the [reference](../reference/dstack.yml/gateway.md). ## Create or update a gateway @@ -83,10 +82,5 @@ Alternatively, you can delete a gateway by passing the gateway name to `dstack [//]: # (TODO: ## Accessing endpoints) -## What's next? - -1. See [services](services.md) on how to run services - -!!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/gateway.md) for all the options supported by - gateways, along with multiple examples. +!!! info "What's next?" + 1. See [services](services.md) on how to run services \ No newline at end of file diff --git a/docs/docs/concepts/services.md b/docs/docs/concepts/services.md index 7bb194362..07971e7cd 100644 --- a/docs/docs/concepts/services.md +++ b/docs/docs/concepts/services.md @@ -1,8 +1,6 @@ # Services -Services allow you to deploy models or any web app as a secure and scalable endpoint. - -When running models, services provide access through the unified OpenAI-compatible endpoint. +Services allow you to deploy models or web apps as secure and scalable endpoints. ## Define a configuration @@ -27,7 +25,7 @@ commands: --max-model-len $MAX_MODEL_LEN --tensor-parallel-size $DSTACK_GPUS_NUM port: 8000 -# Register the model +# (Optional) Register the model model: meta-llama/Meta-Llama-3.1-8B-Instruct # Uncomment to leverage spot instances @@ -39,23 +37,297 @@ resources:
    -Note, the `model` property is optional and not needed when deploying a non-OpenAI-compatible model or a regular web app. +### Replicas and scaling + +By default, `dstack` runs a single replica of the service. +You can configure the number of replicas as well as the auto-scaling rules. + +
    + +```yaml +type: service +# The name is optional, if not specified, generated randomly +name: llama31-service + +python: "3.10" + +# Required environment variables +env: + - HF_TOKEN +commands: + - pip install vllm + - vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct --max-model-len 4096 +# Expose the port of the service +port: 8000 + +resources: + # Change to what is required + gpu: 24GB + +# Minimum and maximum number of replicas +replicas: 1..4 +scaling: + # Requests per seconds + metric: rps + # Target metric value + target: 10 +``` + +
    + +The [`replicas`](#replicas) property can be a number or a range. + +The [`metric`](#metric) property of [`scaling`](#scaling) only supports the `rps` metric (requests per second). In this +case `dstack` adjusts the number of replicas (scales up or down) automatically based on the load. + +Setting the minimum number of replicas to `0` allows the service to scale down to zero when there are no requests. + +!!! info "Gateways" + The `scaling` property currently requires creating a [gateway](#manage-gateways). + This requirement is expected to be removed soon. + +### Authorization + +By default, the service enables authorization, meaning the service endpoint requires a `dstack` user token. +This can be disabled by setting `auth` to `false`. + +
    + +```yaml +type: service +# The name is optional, if not specified, generated randomly +name: http-server-service + +# Disable authorization +auth: false + +python: "3.10" + +# Commands of the service +commands: + - python3 -m http.server +# The port of the service +port: 8000 +``` + +
    + +### Model + +If the service is running a chat model with an OpenAI-compatible interface, +set the [`model`](#model) property to make the model accessible via `dstack`'s +global the OpenAI-compatible endpoint, and also accessible via `dstack`'s UI. + +### Resources + +If you specify memory size, you can either specify an explicit size (e.g. `24GB`) or a +range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). + +
    + +```yaml +type: service +# The name is optional, if not specified, generated randomly +name: http-server-service + +python: "3.10" + +# Commands of the service +commands: + - pip install vllm + - python -m vllm.entrypoints.openai.api_server + --model mistralai/Mixtral-8X7B-Instruct-v0.1 + --host 0.0.0.0 + --tensor-parallel-size $DSTACK_GPUS_NUM +# Expose the port of the service +port: 8000 + +resources: + # 2 GPUs of 80GB + gpu: 80GB:2 + + # Minimum disk size + disk: 200GB +``` + +
    + +The `gpu` property allows specifying not only memory size but also GPU vendor, names +and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), +`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), +`A100:40GB:2` (two A100 GPUs of 40GB). + +??? info "Shared memory" + If you are using parallel communicating processes (e.g., dataloaders in PyTorch), you may need to configure + `shm_size`, e.g. set it to `16GB`. + +### Python version + +If you don't specify `image`, `dstack` uses its base Docker image pre-configured with +`python`, `pip`, `conda` (Miniforge), and essential CUDA drivers. +The `python` property determines which default Docker image is used. + +
    + +```yaml +type: service +# The name is optional, if not specified, generated randomly +name: http-server-service + +# If `image` is not specified, dstack uses its base image +python: "3.10" + +# Commands of the service +commands: + - python3 -m http.server +# The port of the service +port: 8000 +``` + +
    + +??? info "nvcc" + By default, the base Docker image doesn’t include `nvcc`, which is required for building custom CUDA kernels. + If you need `nvcc`, set the corresponding property to true. + +
    + + ```yaml + type: service + # The name is optional, if not specified, generated randomly + name: http-server-service + + # If `image` is not specified, dstack uses its base image + python: "3.10" + # Ensure nvcc is installed (req. for Flash Attention) + nvcc: true + + # Commands of the service + commands: + - python3 -m http.server + # The port of the service + port: 8000 + ``` + +
    + +### Docker + +If you want, you can specify your own Docker image via `image`. + +
    + + ```yaml + type: service + # The name is optional, if not specified, generated randomly + name: http-server-service + + # Any custom Docker image + image: dstackai/base:py3.13-0.6-cuda-12.1 + + # Commands of the service + commands: + - python3 -m http.server + # The port of the service + port: 8000 + ``` + +
    + +??? info "Private registry" + + Use the `registry_auth` property to provide credentials for a private Docker registry. + + ```yaml + type: service + # The name is optional, if not specified, generated randomly + name: http-server-service + + # Any private Docker iamge + image: dstackai/base:py3.13-0.6-cuda-12.1 + # Credentials of the private registry + registry_auth: + username: peterschmidt85 + password: ghp_e49HcZ9oYwBzUbcSk2080gXZOU2hiT9AeSR5 + + # Commands of the service + commands: + - python3 -m http.server + # The port of the service + port: 8000 + ``` + +??? info "Privileged mode" + All backends except `runpod`, `vastai`, and `kubernetes` support running containers in privileged mode. + This mode enables features like using [Docker and Docker Compose](../guides/protips.md#docker-and-docker-compose) + inside `dstack` runs. + +### Environment variables + +
    + +```yaml +type: service +# The name is optional, if not specified, generated randomly +name: llama-2-7b-service + +python: "3.10" + +# Environment variables +env: + - HF_TOKEN + - MODEL=NousResearch/Llama-2-7b-chat-hf +# Commands of the service +commands: + - pip install vllm + - python -m vllm.entrypoints.openai.api_server --model $MODEL --port 8000 +# The port of the service +port: 8000 + +resources: + # Required GPU vRAM + gpu: 24GB +``` + +
    + +> If you don't assign a value to an environment variable (see `HF_TOKEN` above), +`dstack` will require the value to be passed via the CLI or set in the current process. + +??? info "System environment variables" + The following environment variables are available in any run by default: + + | Name | Description | + |-------------------------|-----------------------------------------| + | `DSTACK_RUN_NAME` | The name of the run | + | `DSTACK_REPO_ID` | The ID of the repo | + | `DSTACK_GPUS_NUM` | The total number of GPUs in the run | -!!! info "Docker image" - If you don't specify your Docker image, `dstack` uses the [base](https://hub.docker.com/r/dstackai/base/tags) image - pre-configured with Python, Conda, and essential CUDA drivers. +### Spot policy -!!! info "Gateway" - To enable [auto-scaling](../reference/dstack.yml/service.md#auto-scaling), or use a custom domain with HTTPS, - set up a [gateway](gateways.md) before running the service. - If you're using [dstack Sky :material-arrow-top-right-thin:{ .external }](https://sky.dstack.ai){:target="_blank"}, - a gateway is pre-configured for you. +By default, `dstack` uses on-demand instances. However, you can change that +via the [`spot_policy`](../reference/dstack.yml/task.md#spot_policy) property. It accepts `spot`, `on-demand`, and `auto`. !!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/service.md) for all the options supported by - services, along with multiple examples. + Services support many more configuration options, + incl. [`backends`](../reference/dstack.yml/service.md#backends), + [`regions`](../reference/dstack.yml/service.md#regions), + [`max_price`](../reference/dstack.yml/service.md#max_price), and + among [others](../reference/dstack.yml/service.md). + +## (Optional) Set up a gateway + +Running services doesn't require [gateways](gateways.md) unless you need to enable auto-scaling or want the endpoint to +use HTTPS and map it to your domain. + +!!! info "Websockets and base path" + A [gateways](gateways.md) may also be required if the service needs Websockets or cannot be used with + a base path. + +> If you're using [dstack Sky :material-arrow-top-right-thin:{ .external }](https://sky.dstack.ai){:target="_blank"}, +> a gateway is already pre-configured for you. -## Run a service +## Run a configuration To run a service, pass the configuration to [`dstack apply`](../reference/cli/dstack/apply.md): @@ -86,14 +358,10 @@ Model meta-llama/Meta-Llama-3.1-8B-Instruct is published at: `dstack apply` automatically provisions instances, uploads the contents of the repo (incl. your local uncommitted changes), and runs the service. -## Access the endpoint - -### Service +### Service endpoint If a [gateway](gateways.md) is not configured, the service’s endpoint will be accessible at `/proxy/services///`. -If a [gateway](gateways.md) is configured, the service endpoint will be accessible at -`https://.`.
    @@ -114,88 +382,25 @@ $ curl http://localhost:3000/proxy/services/main/llama31/v1/chat/completions \
    -!!! info "Auth" - By default, the service endpoint requires the `Authorization` header with `Bearer `. - Authorization can be disabled by setting [`auth`](../reference/dstack.yml/service.md#authorization) to `false` in the - service configuration file. - -### Model - If the service defines the `model` property, the model can be accessed with -the OpenAI-compatible endpoint at `/proxy/models//`, -or via the control plane UI's playground. - -When a [gateway](gateways.md) is configured, the OpenAI-compatible endpoint is available at `https://gateway./`. - -## Manage runs - -### List runs - -The [`dstack ps`](../reference/cli/dstack/ps.md) command lists all running jobs and their statuses. -Use `--watch` (or `-w`) to monitor the live status of runs. - -### Stop a run - -A service runs until you stop it or its lifetime exceeds [`max_duration`](../reference/dstack.yml/dev-environment.md#max_duration). -To gracefully stop a service, use [`dstack stop`](../reference/cli/dstack/stop.md). -Pass `--abort` or `-x` to stop without waiting for a graceful shutdown. - -### Attach to a run - -By default, `dstack apply` runs in attached mode – it establishes the SSH tunnel to the run, forwards ports, and shows real-time logs. -If you detached from a run, you can reattach to it using [`dstack attach`](../reference/cli/dstack/attach.md). - -### See run logs - -To see the logs of a run without attaching, use [`dstack logs`](../reference/cli/dstack/logs.md). -Pass `--diagnose`/`-d` to `dstack logs` to see the diagnostics logs. It may be useful if a run fails. -For more information on debugging failed runs, see the [troubleshooting](../guides/troubleshooting.md) guide. - -## Manage fleets - -Fleets are groups of cloud instances or SSH machines that you use to run dev environments, tasks, and services. -You can let `dstack apply` provision fleets or [create and manage them directly](fleets.md). - -### Creation policy - -By default, when you run `dstack apply` with a dev environment, task, or service, -`dstack` reuses `idle` instances from an existing [fleet](fleets.md). -If no `idle` instances match the requirements, it automatically creates a new fleet -using backends. - -To ensure `dstack apply` doesn't create a new fleet but reuses an existing one, -pass `-R` (or `--reuse`) to `dstack apply`. - -
    - -```shell -$ dstack apply -R -f examples/.dstack.yml -``` - -
    - -Alternatively, set [`creation_policy`](../reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. - -### Termination policy - -If a fleet is created automatically, it remains `idle` for 5 minutes and can be reused within that time. -To change the default idle duration, set -[`termination_idle_time`](../reference/dstack.yml/fleet.md#termination_idle_time) in the run configuration (e.g., to 0 or a -longer duration). - -!!! info "Fleets" - For greater control over fleet provisioning, configuration, and lifecycle management, it is recommended to use - [fleets](fleets.md) directly. - -## What's next? - -1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), and [repos](repos.md) -2. Learn how to manage [fleets](fleets.md) -3. See how to set up [gateways](gateways.md) -4. Check the [TGI :material-arrow-top-right-thin:{ .external }](../../examples/deployment/tgi/index.md){:target="_blank"}, - [vLLM :material-arrow-top-right-thin:{ .external }](../../examples/deployment/vllm/index.md){:target="_blank"}, and - [NIM :material-arrow-top-right-thin:{ .external }](../../examples/deployment/nim/index.md){:target="_blank"} examples - -!!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/service.md) for all the options supported by - services, along with multiple examples. +the global OpenAI-compatible endpoint at `/proxy/models//`, +or via `dstack` UI. + +??? info "Gateway" + If a [gateway](gateways.md) is configured, the service endpoint will be accessible at + `https://./`. + + If the service defines the `model` property, the model will be available via the global OpenAI-compatible endpoint + at `https://gateway./`. + +[//]: # (By default, the service endpoint requires the `Authorization` header with `Bearer `.) +[//]: # (Authorization can be disabled by setting [`auth`](../reference/dstack.yml/service.md#authorization) to `false` in the) +[//]: # (service configuration file.) + +!!! info "What's next?" + 1. Read about [dev environments](dev-environments.md), [tasks](tasks.md), and [repos](repos.md) + 2. Learn how to manage [fleets](fleets.md) + 3. See how to set up [gateways](gateways.md) + 4. Check the [TGI :material-arrow-top-right-thin:{ .external }](../../examples/deployment/tgi/index.md){:target="_blank"}, + [vLLM :material-arrow-top-right-thin:{ .external }](../../examples/deployment/vllm/index.md){:target="_blank"}, and + [NIM :material-arrow-top-right-thin:{ .external }](../../examples/deployment/nim/index.md){:target="_blank"} examples \ No newline at end of file diff --git a/docs/docs/concepts/snippets/manage-fleets.ext b/docs/docs/concepts/snippets/manage-fleets.ext new file mode 100644 index 000000000..df11d6be1 --- /dev/null +++ b/docs/docs/concepts/snippets/manage-fleets.ext @@ -0,0 +1,30 @@ +### Creation policy + +By default, when you run `dstack apply` with a dev environment, task, or service, +if no `idle` instances from the available fleets meet the requirements, `dstack` creates a new fleet +using configured backends. + +To ensure `dstack apply` doesn't create a new fleet but reuses an existing one, +pass `-R` (or `--reuse`) to `dstack apply`. + +
    + +```shell +$ dstack apply -R -f examples/.dstack.yml +``` + +
    + +Or, set [`creation_policy`](../reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. + +### Termination policy + +If a fleet is created automatically, once the run is finished, by default, it remains `idle` for 5 minutes +and can be reused by other runs. +To change the default idle duration, set +[`termination_idle_time`](../reference/dstack.yml/fleet.md#termination_idle_time) in the run configuration (e.g., to `0` or a +longer duration). + +!!! info "Fleets" + For greater control over fleet provisioning, it is recommended to create + [fleets](fleets.md) explicitly. \ No newline at end of file diff --git a/docs/docs/concepts/snippets/manage-runs.ext b/docs/docs/concepts/snippets/manage-runs.ext new file mode 100644 index 000000000..13afd8403 --- /dev/null +++ b/docs/docs/concepts/snippets/manage-runs.ext @@ -0,0 +1,23 @@ +## Manage runs + +### List runs + +The [`dstack ps`](../reference/cli/dstack/ps.md) command lists all running jobs and their statuses. +Use `--watch` (or `-w`) to monitor the live status of runs. + +### Stop a run + +A dev environment runs until you stop it or its lifetime exceeds [`max_duration`](../reference/dstack.yml/dev-environment.md#max_duration). +To gracefully stop a dev environment, use [`dstack stop`](../reference/cli/dstack/stop.md). +Pass `--abort` or `-x` to stop without waiting for a graceful shutdown. + +### Attach to a run + +By default, `dstack apply` runs in attached mode – it establishes the SSH tunnel to the run, forwards ports, and shows real-time logs. +If you detached from a run, you can reattach to it using [`dstack attach`](../reference/cli/dstack/attach.md). + +### See run logs + +To see the logs of a run without attaching, use [`dstack logs`](../reference/cli/dstack/logs.md). +Pass `--diagnose`/`-d` to `dstack logs` to see the diagnostics logs. It may be useful if a run fails. +For more information on debugging failed runs, see the [troubleshooting](../guides/troubleshooting.md) guide. \ No newline at end of file diff --git a/docs/docs/concepts/tasks.md b/docs/docs/concepts/tasks.md index cc7ef72cc..1e158fa7a 100644 --- a/docs/docs/concepts/tasks.md +++ b/docs/docs/concepts/tasks.md @@ -1,8 +1,7 @@ # Tasks A task allows you to run arbitrary commands on one or more nodes. -They are best suited for one-off jobs like training or batch processing, -but can also be used for serving apps if features supported by [services](`services.md`) are not required. +They are best suited for jobs like training or batch processing. ## Define a configuration @@ -39,18 +38,286 @@ resources: -!!! info "Docker image" - If you don't specify your Docker image, `dstack` uses the [base](https://hub.docker.com/r/dstackai/base/tags) image - pre-configured with Python, Conda, and essential CUDA drivers. +### Ports -!!! info "Distributed tasks" - By default, tasks run on a single instance. However, you can specify - the [number of nodes](../reference/dstack.yml/task.md#distributed-tasks). - In this case, the task will run on a cluster of instances. +A task can configure ports. In this case, if the task is running an application on a port, `dstack apply` +will securely allow you to access this port from your local machine through port forwarding. + +
    + +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: streamlit-hello + +python: "3.10" + +# Commands of the task +commands: + - pip3 install streamlit + - streamlit hello +# Expose the port to access the web app +ports: + - 8501 +``` + +
    + +When running it, `dstack apply` forwards `8501` port to `localhost:8501`, enabling secure access to the running +application. + +### Distributed tasks + +By default, a task runs on a single node. +However, you can run it on a cluster of nodes by specifying `nodes`. + +
    + +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: train-distrib + +# The size of the cluster +nodes: 2 + +python: "3.10" + +# Commands of the task +commands: + - pip install -r requirements.txt + - torchrun + --nproc_per_node=$DSTACK_GPUS_PER_NODE + --node_rank=$DSTACK_NODE_RANK + --nnodes=$DSTACK_NODES_NUM + --master_addr=$DSTACK_MASTER_NODE_IP + --master_port=8008 resnet_ddp.py + --num_epochs 20 + +resources: + gpu: 24GB +``` + +
    + +All you need to do is pass the corresponding environment variables such as +`DSTACK_GPUS_PER_NODE`, `DSTACK_NODE_RANK`, `DSTACK_NODES_NUM`, +`DSTACK_MASTER_NODE_IP`, and `DSTACK_GPUS_NUM` (see [System environment variables](#system-environment-variables)). + +!!! info "Fleets" + To ensure all nodes are provisioned into a cluster placement group and to enable the highest level of inter-node + connectivity (incl. support for [EFA :material-arrow-top-right-thin:{ .external }](https://aws.amazon.com/hpc/efa/){:target="_blank"}), + create a [fleet](fleets.md) via a configuration before running a disstributed task. + +`dstack` is easy to use with `accelerate`, `torchrun`, Ray, Spark, and any other distributed frameworks. + +### Resources + +When you specify a resource value like `cpu` or `memory`, +you can either use an exact value (e.g. `24GB`) or a +range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). + +
    + +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: train + +# Commands of the task +commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py + +resources: + # 200GB or more RAM + memory: 200GB.. + # 4 GPUs from 40GB to 80GB + gpu: 40GB..80GB:4 + # Shared memory (required by multi-gpu) + shm_size: 16GB + # Disk size + disk: 500GB +``` + +
    + +The `gpu` property allows specifying not only memory size but also GPU vendor, names +and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), +`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), +`A100:40GB:2` (two A100 GPUs of 40GB). + +??? info "Google Cloud TPU" + To use TPUs, specify its architecture via the `gpu` property. + + ```yaml + type: task + # The name is optional, if not specified, generated randomly + name: train + + python: "3.10" + + # Commands of the task + commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py + + resources: + gpu: v2-8 + ``` + + Currently, only 8 TPU cores can be specified, supporting single TPU device workloads. Multi-TPU support is coming soon. + +??? info "Shared memory" + If you are using parallel communicating processes (e.g., dataloaders in PyTorch), you may need to configure + `shm_size`, e.g. set it to `16GB`. + +### Python version + +If you don't specify `image`, `dstack` uses its base Docker image pre-configured with +`python`, `pip`, `conda` (Miniforge), and essential CUDA drivers. +The `python` property determines which default Docker image is used. + +
    + +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: train + +# If `image` is not specified, dstack uses its base image +python: "3.10" + +# Commands of the task +commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py +``` + +
    + +??? info "nvcc" + By default, the base Docker image doesn’t include `nvcc`, which is required for building custom CUDA kernels. + If you need `nvcc`, set the corresponding property to true. + + + ```yaml + type: task + # The name is optional, if not specified, generated randomly + name: train + + # If `image` is not specified, dstack uses its base image + python: "3.10" + # Ensure nvcc is installed (req. for Flash Attention) + nvcc: true + + commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py + ``` + +### Docker + +If you want, you can specify your own Docker image via `image`. + +
    + +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: train + +# Any custom Docker image +image: dstackai/base:py3.13-0.6-cuda-12.1 + +# Commands of the task +commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py +``` + +
    + +??? info "Private registry" + Use the `registry_auth` property to provide credentials for a private Docker registry. + + ```yaml + type: dev-environment + # The name is optional, if not specified, generated randomly + name: train + + # Any private Docker image + image: dstackai/base:py3.13-0.6-cuda-12.1 + # Credentials of the private Docker registry + registry_auth: + username: peterschmidt85 + password: ghp_e49HcZ9oYwBzUbcSk2080gXZOU2hiT9AeSR5 + + # Commands of the task + commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py + ``` + +??? info "Privileged mode" + All backends except `runpod`, `vastai`, and `kubernetes` support running containers in privileged mode. + This mode enables features like using [Docker and Docker Compose](../guides/protips.md#docker-and-docker-compose) + inside `dstack` runs. + +### Environment variables + +
    + +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: train + +python: "3.10" + +# Environment variables +env: + - HF_TOKEN + - HF_HUB_ENABLE_HF_TRANSFER=1 + +# Commands of the task +commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py +``` + +
    + +If you don't assign a value to an environment variable (see `HF_TOKEN` above), +`dstack` will require the value to be passed via the CLI or set in the current process. + + +??? info "System environment variables" + The following environment variables are available in any run by default: + + | Name | Description | + |-------------------------|------------------------------------------------------------------| + | `DSTACK_RUN_NAME` | The name of the run | + | `DSTACK_REPO_ID` | The ID of the repo | + | `DSTACK_GPUS_NUM` | The total number of GPUs in the run | + | `DSTACK_NODES_NUM` | The number of nodes in the run | + | `DSTACK_GPUS_PER_NODE` | The number of GPUs per node | + | `DSTACK_NODE_RANK` | The rank of the node | + | `DSTACK_MASTER_NODE_IP` | The internal IP address the master node | + | `DSTACK_NODES_IPS` | The list of internal IP addresses of all nodes delimited by "\n" | + +### Spot policy + +By default, `dstack` uses on-demand instances. However, you can change that +via the [`spot_policy`](../reference/dstack.yml/dev-environment.md#spot_policy) property. It accepts `spot`, `on-demand`, and `auto`. !!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/task.md) for all the options supported by - tasks, along with multiple examples. + Tasks support many more configuration options, + incl. [`backends`](../reference/dstack.yml/task.md#backends), + [`regions`](../reference/dstack.yml/task.md#regions), + [`max_price`](../reference/dstack.yml/task.md#max_price), and + [`max_duration`](../reference/dstack.yml/task.md#max_duration), + among [others](../reference/dstack.yml/task.md). ## Run a configuration @@ -83,81 +350,43 @@ Launching `axolotl-train`... `dstack apply` automatically provisions instances, uploads the contents of the repo (incl. your local uncommitted changes), and runs the commands. -!!! info "Ports" - If the task specifies [`ports`](../reference/dstack.yml/task.md#_ports), `dstack apply` automatically forwards them to your - local machine for convenient and secure access. - -!!! info "Queueing tasks" - By default, if `dstack apply` cannot find capacity, the task fails. - To queue the task and wait for capacity, specify the [`retry`](../reference/dstack.yml/task.md#queueing-tasks) - property in the task configuration. - -## Manage runs - -### List runs - -The [`dstack ps`](../reference/cli/dstack/ps.md) command lists all running jobs and their statuses. -Use `--watch` (or `-w`) to monitor the live status of runs. - -### Stop a run - -A task runs until it's completed or its lifetime exceeds [`max_duration`](../reference/dstack.yml/dev-environment.md#max_duration). -You can also gracefully stop a task using [`dstack stop`](../reference/cli/dstack/stop.md). -Pass `--abort` or `-x` to stop without waiting for a graceful shutdown. - -### Attach to a run - -By default, `dstack apply` runs in attached mode – it establishes the SSH tunnel to the run, forwards ports, and shows real-time logs. -If you detached from a run, you can reattach to it using [`dstack attach`](../reference/cli/dstack/attach.md). +### Retry policy -### See run logs +By default, if `dstack` can't find capacity, the task exits with an error, or the instance is interrupted, +the run will fail. -To see the logs of a run without attaching, use [`dstack logs`](../reference/cli/dstack/logs.md). -Pass `--diagnose`/`-d` to `dstack logs` to see the diagnostics logs. It may be useful if a run fails. -For more information on debugging failed runs, see the [troubleshooting](../guides/troubleshooting.md) guide. +If you'd like `dstack` to automatically retry, configure the +[retry](../reference/dstack.yml/task.md#retry) property accordingly: -## Manage fleets +
    -Fleets are groups of cloud instances or SSH machines that you use to run dev environments, tasks, and services. -You can let `dstack apply` provision fleets or [create and manage them directly](fleets.md). - -### Creation policy - -By default, when you run `dstack apply` with a dev environment, task, or service, -`dstack` reuses `idle` instances from an existing [fleet](fleets.md). -If no `idle` instances match the requirements, `dstack` automatically creates a new fleet -using configured backends. - -To ensure `dstack apply` doesn't create a new fleet but reuses an existing one, -pass `-R` (or `--reuse`) to `dstack apply`. +```yaml +type: task +# The name is optional, if not specified, generated randomly +name: train -
    +python: "3.10" -```shell -$ dstack apply -R -f examples/.dstack.yml +# Commands of the task +commands: + - pip install -r fine-tuning/qlora/requirements.txt + - python fine-tuning/qlora/train.py + +retry: + # Retry on specific events + on_events: [no-capacity, error, interruption] + # Retry for up to 1 hour + duration: 1h ```
    -Alternatively, set [`creation_policy`](../reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. +--8<-- "docs/concepts/snippets/manage-fleets.ext" -### Termination policy +--8<-- "docs/concepts/snippets/manage-runs.ext" -If a fleet is created automatically, it remains `idle` for 5 minutes and can be reused within that time. -To change the default idle duration, set -[`termination_idle_time`](../reference/dstack.yml/fleet.md#termination_idle_time) in the run configuration (e.g., to 0 or a -longer duration). - -!!! info "Fleets" - For greater control over fleet provisioning, configuration, and lifecycle management, it is recommended to use - [fleets](fleets.md) directly. - -## What's next? +!!! info "What's next?" 1. Read about [dev environments](dev-environments.md), [services](services.md), and [repos](repos.md) 2. Learn how to manage [fleets](fleets.md) 3. Check the [Axolotl](/examples/fine-tuning/axolotl) example - -!!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/task.md) for all the options supported by - tasks, along with multiple examples. diff --git a/docs/docs/concepts/volumes.md b/docs/docs/concepts/volumes.md index 1dfb2fb17..a79fbe72e 100644 --- a/docs/docs/concepts/volumes.md +++ b/docs/docs/concepts/volumes.md @@ -1,27 +1,29 @@ # Volumes -Volumes allow you to persist data between runs. `dstack` supports two kinds of volumes: [network volumes](#network-volumes) -and [instance volumes](#instance-volumes). +Volumes enable data persistence between runs of dev environments, tasks, and services. -## Network volumes +`dstack` supports two kinds of volumes: + +* [Network volumes](#network-volumes) — provisioned via backends and mounted to specific container directories. + Ideal for persistent storage. +* [Instance volumes](#instance-volumes) — bind directories on the host instance to container directories. +Useful as a cache for cloud fleets or for persistent storage with SSH fleets. -`dstack` allows to create and attach network volumes to dev environments, tasks, and services. +## Network volumes -!!! info "Backends" - Network volumes are currently supported for the `aws`, `gcp`, and `runpod` backends. - Support for other backends is on the roadmap. +Network volumes are currently supported for the `aws`, `gcp`, and `runpod` backends. ### Define a configuration First, define a volume configuration as a YAML file in your project folder. -The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `vol.dstack.yml` are both acceptable). +The filename must end with `.dstack.yml` (e.g. `.dstack.yml` or `volume.dstack.yml` are both acceptable). -
    +
    ```yaml type: volume # A name of the volume -name: my-new-volume +name: my-volume # Volumes are bound to a specific backend and region backend: aws @@ -35,16 +37,32 @@ size: 100GB If you use this configuration, `dstack` will create a new volume based on the specified options. -!!! info "Registering existing volumes" +??? info "Register existing volumes" If you prefer not to create a new volume but to reuse an existing one (e.g., created manually), you can [specify its ID via `volume_id`](../reference/dstack.yml/volume.md#existing-volume). In this case, `dstack` will register the specified volume so that you can use it with dev environments, tasks, and services. +
    + + ```yaml + type: volume + # The name of the volume + name: my-volume + + # Volumes are bound to a specific backend and region + backend: aws + region: eu-central-1 + + # The ID of the volume in AWS + volume_id: vol1235 + ``` + +
    + !!! info "Filesystem" If you register an existing volume, you must ensure the volume already has a filesystem. !!! info "Reference" - See [.dstack.yml](../reference/dstack.yml/volume.md) for all the options supported by - volumes, along with multiple examples. + For all volume configuration options, refer to the [reference](../reference/dstack.yml/volume.md). ### Create, register, or update a volume @@ -54,10 +72,10 @@ To create or register the volume, pass the volume configuration to `dstack apply ```shell $ dstack apply -f volume.dstack.yml -Volume my-new-volume does not exist yet. Create the volume? [y/n]: y +Volume my-volume does not exist yet. Create the volume? [y/n]: y NAME BACKEND REGION STATUS CREATED - my-new-volume aws eu-central-1 submitted now + my-volume aws eu-central-1 submitted now ``` @@ -66,8 +84,7 @@ Volume my-new-volume does not exist yet. Create the volume? [y/n]: y Once created, the volume can be attached to dev environments, tasks, and services. -!!! info "Filesystem" - When creating a network volume, `dstack` automatically creates an `ext4` filesystem on it. +> When creating a network volume, `dstack` automatically creates an `ext4` filesystem on it. ### Attach a volume { #attach-network-volume } @@ -86,12 +103,12 @@ ide: vscode # Map the name of the volume to any path volumes: - - name: my-new-volume + - name: my-volume path: /volume_data # You can also use the short syntax in the `name:path` form # volumes: -# - my-new-volume:/volume_data +# - my-volume:/volume_data ```
    @@ -99,7 +116,7 @@ volumes: Once you run this configuration, the contents of the volume will be attached to `/volume_data` inside the dev environment, and its contents will persist across runs. -!!! info "Attaching volumes across regions and backends" +!!! info "Attach volumes across regions and backends" If you're unsure in advance which region or backend you'd like to use (or which is available), you can specify multiple volumes for the same path. @@ -115,7 +132,7 @@ and its contents will persist across runs. `dstack` will attach one of the volumes based on the region and backend of the run. -??? info "Limitations" +??? info "Container path" When you're running a dev environment, task, or service with `dstack`, it automatically mounts the project folder contents to `/workflow` (and sets that as the current working directory). Right now, `dstack` doesn't allow you to attach volumes to `/workflow` or any of its subdirectories. @@ -126,49 +143,62 @@ and its contents will persist across runs. The [`dstack volume list`](../reference/cli/dstack/volume.md#dstack-volume-list) command lists created and registered volumes: -``` +
    + +```shell $ dstack volume list -NAME BACKEND REGION STATUS CREATED - my-new-volume aws eu-central-1 active 3 weeks ago +NAME BACKEND REGION STATUS CREATED + my-volume aws eu-central-1 active 3 weeks ago ``` +
    + #### Delete volumes When the volume isn't attached to any active dev environment, task, or service, you can delete it by passing the volume configuration to `dstack delete`: +
    + ```shell $ dstack delete -f vol.dstack.yaml ``` +
    + Alternatively, you can delete a volume by passing the volume name to `dstack volume delete`. If the volume was created using `dstack`, it will be physically destroyed along with the data. If you've registered an existing volume, it will be de-registered with `dstack` but will keep the data. +### FAQs -## Instance volumes +??? info "Can I use network volumes across backends?" + + Since volumes are backed up by cloud network disks, you can only use them within the same cloud. If you need to access + data across different backends, you should either use object storage or replicate the data across multiple volumes. -Unlike [network volumes](#network-volumes), which are persistent external resources mounted over network, -instance volumes are part of the instance storage. Basically, the instance volume is a filesystem path -(a directory or a file) mounted inside the run container. +??? info "Can I use network volumes across regions?" -As a consequence, the contents of the instance volume are specific to the instance -where the run is executed, and data persistence, integrity, and even existence are guaranteed only if the subsequent run -is executed on the same exact instance, and there is no other runs in between. + Typically, network volumes are associated with specific regions, so you can't use them in other regions. Often, + volumes are also linked to availability zones, but some providers support volumes that can be used across different + availability zones within the same region. + + If you don't want to limit a run to one particular region, you can create different volumes for different regions + and specify them for the same mount point as [documented above](#attach-network-volume). -!!! info "Backends" - Instance volumes are currently supported for all backends except `runpod`, `vastai` and `kubernetes`. +??? info "Can I attach network volumes to multiple runs or instances?" + You can mount a volume in multiple runs. This feature is currently supported only by the `runpod` backend. -### Manage volumes { #manage-instance-volumes } +## Instance volumes -You don't need to create or delete instance volumes, and they are not displayed in the -[`dstack volume list`](../reference/cli/dstack/volume.md#dstack-volume-list) command output. +Instance volumes allow mapping any directory on the instance where the run is executed to any path inside the container. +This means that the data in instance volumes is persisted only if the run is executed on the same instance. -### Attach a volume { #attach-instance-volume } +### Attach a volume -Dev environments, tasks, and services let you attach any number of instance volumes. -To attach an instance volume, specify the `instance_path` and `path` in the `volumes` property: +A run can configure any number of instance volumes. To attach an instance volume, +specify the `instance_path` and `path` in the `volumes` property:
    @@ -191,59 +221,44 @@ volumes:
    -### Use cases { #instance-volumes-use-cases } - -Despite the limitations, instance volumes can still be useful in some cases: - -=== "Cache" - - For example, if runs regularly install packages with `pip install`, include the instance volume in the run configuration - to reuse pip cache between runs: +Since persistence isn't guaranteed (instances may be interrupted or runs may occur on different instances), use instance +volumes only for caching or with directories manually mounted to network storage. -
    - - ```yaml - type: task - - volumes: - - /dstack-cache/pip:/root/.cache/pip - ``` +> Instance volumes are currently supported for all backends except `runpod`, `vastai` and `kubernetes`, +> and can also be used with [SSH fleets](fleets.md#ssh). -
    - -=== "Network storage with SSH fleet" - - If you manage your own instances, you can mount network storages (e.g., NFS or SMB) to the hosts and access them in the runs. - Imagine you mounted the same network storage to all the fleet instances using the same path `/mnt/nfs-storage`, - then you can treat the instance volume as a shared persistent storage: +### Use instance volumes for caching -
    - - ```yaml - type: task +For example, if a run regularly installs packages with `pip install`, +you can mount the `/root/.cache/pip` folder inside the container to a folder on the instance for +reuse. - volumes: - - /mnt/nfs-storage:/storage - ``` +
    -
    +```yaml +type: task -## FAQ +volumes: + - /dstack-cache/pip:/root/.cache/pip +``` -##### Can I use network volumes across backends? +
    -Since volumes are backed up by cloud network disks, you can only use them within the same cloud. If you need to access -data across different backends, you should either use object storage or replicate the data across multiple volumes. +### Use instance volumes with SSH fleets + +If you control the instances (e.g. they are on-prem servers configured via [SSH fleets](fleets.md#ssh)), +you can mount network storage (e.g., NFS or SMB) and use the mount points as instance volumes. -##### Can I use network volumes across regions? +For example, if you mount a network storage to `/mnt/nfs-storage` on all hosts of your SSH fleet, +you can map this directory via instance volumes and be sure the data is persisted. -Typically, network volumes are associated with specific regions, so you can't use them in other regions. Often, -volumes are also linked to availability zones, but some providers support volumes that can be used across different -availability zones within the same region. +
    -If you don't want to limit a run to one particular region, you can create different volumes for different regions -and specify them for the same mount point as [documented above](#attach-network-volume). +```yaml +type: task -##### Can I attach network volumes to multiple runs or instances? +volumes: + - /mnt/nfs-storage:/storage +``` -You can mount a volume in multiple runs. This feature is currently supported only by the `runpod` backend. +
    \ No newline at end of file diff --git a/docs/docs/guides/protips.md b/docs/docs/guides/protips.md index d01ad4fad..daf47004e 100644 --- a/docs/docs/guides/protips.md +++ b/docs/docs/guides/protips.md @@ -40,7 +40,7 @@ To persist data across runs, it is recommended to use volumes. (for persisting data even if the instance is interrupted) and [instance](../concepts/volumes.md#instance-volumes) (useful for persisting cached data across runs while the instance remains active). -> If you use [SSH fleets](../concepts/fleets.md#ssh-fleets), you can mount network storage (e.g., NFS or SMB) to the hosts and access it in runs via instance volumes. +> If you use [SSH fleets](../concepts/fleets.md#ssh), you can mount network storage (e.g., NFS or SMB) to the hosts and access it in runs via instance volumes. ## Dev environments diff --git a/docs/docs/guides/server-deployment.md b/docs/docs/guides/server-deployment.md index 67d3f24e7..f8e1722e3 100644 --- a/docs/docs/guides/server-deployment.md +++ b/docs/docs/guides/server-deployment.md @@ -64,7 +64,7 @@ The server loads this file on startup. Alternatively, you can configure backends on the [project settings page](../guides/projects/#project-backends) via the control plane's UI. > For using `dstack` with on-prem servers, no backend configuration is required. -> See [SSH fleets](../concepts/fleets.md#ssh-fleets) for more details. +> See [SSH fleets](../concepts/fleets.md#ssh) for more details. ## State persistence diff --git a/docs/docs/index.md b/docs/docs/index.md index e8ce7eaea..ccbe19a88 100644 --- a/docs/docs/index.md +++ b/docs/docs/index.md @@ -48,28 +48,16 @@ cloud platforms or on-premise servers. ## How does it compare to other tools? -??? info "Kubernetes" - #### How does dstack compare to Kubernetes? - - `dstack` and Kubernetes are both container orchestrators for cloud and on-premises environments. - - However, `dstack` is more lightweight, and is designed specifically for AI, enabling AI engineers to handle development, training, and +??? info "How does dstack compare to Kubernetes?" + `dstack` is more lightweight, and is designed specifically for AI, enabling AI engineers to handle development, training, and deployment without needing extra tools or Ops support. With `dstack`, you don't need Kubeflow or other ML platforms on top—everything is available out of the box. - Additionally, `dstack` is much easier to use for on-premises servers—just provide hostnames and SSH credentials, + Additionally, `dstack` is much easier to use with on-prem servers—just provide hostnames and SSH credentials, and `dstack` will automatically create a fleet ready for use with development environments, tasks, and services. - #### How does dstack compare to KubeFlow? - `dstack` can be used entirely instead of Kubeflow. It covers everything that Kubeflow does, and much more on top, - including development environments, services, and additional features. - - `dstack` is easier to set up with on-premises servers, doesn't require Kubernetes, and works with multiple cloud - providers out of the box. - - #### Can dstack and Kubernetes be used together? - +??? info "Can dstack and Kubernetes be used together?" For AI development, it’s more efficient to use `dstack` directly with your cloud accounts or on-prem servers—without Kubernetes. However, if you prefer, you can set up the `dstack` server with a Kubernetes backend to provision through Kubernetes. @@ -77,17 +65,22 @@ cloud platforms or on-premise servers. Does your Ops team insist on using Kubernetes for production-grade deployment? You can use `dstack` and Kubernetes side by side; `dstack` for development and Kubernetes for production-grade deployment. -??? info "Slurm" - #### How does dstack compare to Slurm? +??? info "How does dstack compare to KubeFlow?" + `dstack` can be used entirely instead of Kubeflow. It covers everything that Kubeflow does, and much more on top, + including development environments, services, and additional features. + + `dstack` is easier to set up with on-premises servers, doesn't require Kubernetes, and works with multiple cloud + providers out of the box. + +??? info "How does dstack compare to Slurm?" `dstack` can be used entirely instead of Slurm. It covers everything that Slurm does, and a lot more on top, including dev environments, services, out-of-the-box cloud support, easier setup with on-premises servers, and much more. [//]: # (??? info "Cloud platforms") [//]: # ( TBA) -## Where do I start? - -1. Proceed to [installation](installation/index.md) -2. See [quickstart](quickstart.md) -3. Browse [examples](/examples) -4. Join [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd){:target="_blank"} \ No newline at end of file +!!! info "Where do I start?" + 1. Proceed to [installation](installation/index.md) + 2. See [quickstart](quickstart.md) + 3. Browse [examples](/examples) + 4. Join [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd){:target="_blank"} \ No newline at end of file diff --git a/docs/docs/installation/index.md b/docs/docs/installation/index.md index 680c99ef7..c02a6df3f 100644 --- a/docs/docs/installation/index.md +++ b/docs/docs/installation/index.md @@ -11,10 +11,9 @@ To use the open-source version of `dstack` with your own cloud accounts or on-pr ### (Optional) Configure backends -To use `dstack` with specific providers, configure [backends](../concepts/backends.md). +To use `dstack` with cloud providers, configure the appropriate [backends](../concepts/backends.md). -> To use `dstack` with on-prem servers, -no backend configuration is needed. Use [SSH fleets](../concepts/fleets.md#ssh-fleets) for that. +> For using `dstack` with on-prem servers, create [SSH fleets](../concepts/fleets.md#ssh) instead. ## Start the server @@ -95,10 +94,9 @@ Configuration is updated at ~/.dstack/config.yml This configuration is stored in `~/.dstack/config.yml`. -## What's next? - -1. Check the [server/config.yml reference](../reference/server/config.yml.md) on how to configure backends -2. Check [SSH fleets](../concepts/fleets.md#ssh-fleets) to learn about running on your on-prem servers -3. Follow [quickstart](../quickstart.md) -4. Browse [examples](/examples) -5. Join the community via [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd) \ No newline at end of file +!!! info "What's next?" + 1. Check the [server/config.yml reference](../reference/server/config.yml.md) on how to configure backends + 2. Check [SSH fleets](../concepts/fleets.md#ssh) to learn about running on your on-prem servers + 3. Follow [quickstart](../quickstart.md) + 4. Browse [examples](/examples) + 5. Join the community via [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd) \ No newline at end of file diff --git a/docs/docs/quickstart.md b/docs/docs/quickstart.md index 33b7b0058..b4fb106ef 100644 --- a/docs/docs/quickstart.md +++ b/docs/docs/quickstart.md @@ -204,8 +204,7 @@ and runs the configuration. Something not working? See the [troubleshooting](guides/troubleshooting.md) guide. -## What's next? - -1. Read about [backends](concepts/backends.md), [dev environments](concepts/dev-environments.md), [tasks](concepts/tasks.md), and [services](concepts/services.md) -2. Join [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd) -3. Browse [examples](https://dstack.ai/examples) +!!! info "What's next?" + 1. Read about [backends](concepts/backends.md), [dev environments](concepts/dev-environments.md), [tasks](concepts/tasks.md), and [services](concepts/services.md) + 2. Join [Discord :material-arrow-top-right-thin:{ .external }](https://discord.gg/u8SmfwPpMd) + 3. Browse [examples](https://dstack.ai/examples) diff --git a/docs/docs/reference/dstack.yml/dev-environment.md b/docs/docs/reference/dstack.yml/dev-environment.md index eca573edb..2bcd9794c 100644 --- a/docs/docs/reference/dstack.yml/dev-environment.md +++ b/docs/docs/reference/dstack.yml/dev-environment.md @@ -75,264 +75,3 @@ The `dev-environment` configuration type allows running [dev environments](../.. * `volume-name:/container/path` for network volumes * `/instance/path:/container/path` for instance volumes - -## Examples - -### Python version - -If you don't specify `image`, `dstack` uses its base Docker image pre-configured with -`python`, `pip`, `conda` (Miniforge), and essential CUDA drivers. -The `python` property determines which default Docker image is used. - -
    - -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -# If `image` is not specified, dstack uses its base image -python: "3.10" - -ide: vscode -``` - -
    - -??? info "nvcc" - By default, the base Docker image doesn’t include `nvcc`, which is required for building custom CUDA kernels. - If you need `nvcc`, set the corresponding property to true. - - ```yaml - type: dev-environment - # The name is optional, if not specified, generated randomly - name: vscode - - # If `image` is not specified, dstack uses its base image - python: "3.10" - # Ensure nvcc is installed (req. for Flash Attention) - nvcc: true - - ide: vscode - ``` - -### Docker - -If you want, you can specify your own Docker image via `image`. - -
    - -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -# Any custom Docker image -image: ghcr.io/huggingface/text-generation-inference:latest - -ide: vscode -``` - -
    - -??? info "Private registry" - - Use the `registry_auth` property to provide credentials for a private Docker registry. - - ```yaml - type: dev-environment - # The name is optional, if not specified, generated randomly - name: vscode - - # Any private Docker image - image: ghcr.io/huggingface/text-generation-inference:latest - # Credentials of the private Docker registry - registry_auth: - username: peterschmidt85 - password: ghp_e49HcZ9oYwBzUbcSk2080gXZOU2hiT9AeSR5 - - ide: vscode - ``` - -!!! info "Docker and Docker Compose" - All backends except `runpod`, `vastai`, and `kubernetes` also allow using [Docker and Docker Compose](../../guides/protips.md#docker-and-docker-compose) inside `dstack` runs. - -### Resources { #resources_ } - -When you specify a resource value like `cpu` or `memory`, -you can either use an exact value (e.g. `24GB`) or a -range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). - -
    - -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -ide: vscode - -resources: - # 200GB or more RAM - memory: 200GB.. - # 4 GPUs from 40GB to 80GB - gpu: 40GB..80GB:4 - # Shared memory (required by multi-gpu) - shm_size: 16GB - # Disk size - disk: 500GB -``` - -
    - -The `gpu` property allows specifying not only memory size but also GPU vendor, names -and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), -`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), -`A100:40GB:2` (two A100 GPUs of 40GB). - -??? info "Google Cloud TPU" - To use TPUs, specify its architecture via the `gpu` property. - - ```yaml - type: dev-environment - # The name is optional, if not specified, generated randomly - name: vscode - - ide: vscode - - resources: - gpu: v2-8 - ``` - - Currently, only 8 TPU cores can be specified, supporting single TPU device workloads. Multi-TPU support is coming soon. - -??? info "Shared memory" - If you are using parallel communicating processes (e.g., dataloaders in PyTorch), you may need to configure - `shm_size`, e.g. set it to `16GB`. - -### Environment variables - -
    - -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -# Environment variables -env: - - HF_TOKEN - - HF_HUB_ENABLE_HF_TRANSFER=1 - -ide: vscode -``` - -
    - -If you don't assign a value to an environment variable (see `HF_TOKEN` above), -`dstack` will require the value to be passed via the CLI or set in the current process. -For instance, you can define environment variables in a `.envrc` file and utilize tools like `direnv`. - -??? info "System environment variables" - The following environment variables are available in any run by default: - - | Name | Description | - |-------------------------|-----------------------------------------| - | `DSTACK_RUN_NAME` | The name of the run | - | `DSTACK_REPO_ID` | The ID of the repo | - | `DSTACK_GPUS_NUM` | The total number of GPUs in the run | - -### Spot policy - -You can choose whether to use spot instances, on-demand instances, or any available type. - -
    - -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -ide: vscode - -# Uncomment to leverage spot instances -#spot_policy: auto -``` - -
    - -The `spot_policy` accepts `spot`, `on-demand`, and `auto`. The default for dev environments is `on-demand`. - -### Backends - -By default, `dstack` provisions instances in all configured backends. However, you can specify the list of backends: - -
    - -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -ide: vscode - -# Use only listed backends -backends: [aws, gcp] -``` - -
    - -### Regions - -By default, `dstack` uses all configured regions. However, you can specify the list of regions: - -
    - -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -ide: vscode - -# Use only listed regions -regions: [eu-west-1, eu-west-2] -``` - -
    - -### Volumes - -Volumes allow you to persist data between runs. -To attach a volume, simply specify its name using the `volumes` property and specify where to mount its contents: - -
    - -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: vscode - -ide: vscode - -# Map the name of the volume to any path -volumes: - - name: my-new-volume - path: /volume_data -``` - -
    - -Once you run this configuration, the contents of the volume will be attached to `/volume_data` inside the dev -environment, and its contents will persist across runs. - -??? Info "Instance volumes" - If data persistence is not a strict requirement, use can also use - ephemeral [instance volumes](../../concepts/volumes.md#instance-volumes). - -!!! info "Limitations" - When you're running a dev environment, task, or service with `dstack`, it automatically mounts the project folder contents - to `/workflow` (and sets that as the current working directory). Right now, `dstack` doesn't allow you to - attach volumes to `/workflow` or any of its subdirectories. - -The `dev-environment` configuration type supports many other options. See below. diff --git a/docs/docs/reference/dstack.yml/gateway.md b/docs/docs/reference/dstack.yml/gateway.md index 61723ef71..4d81d5d50 100644 --- a/docs/docs/reference/dstack.yml/gateway.md +++ b/docs/docs/reference/dstack.yml/gateway.md @@ -27,26 +27,3 @@ The `gateway` configuration type allows creating and updating [gateways](../../c show_root_heading: false type: required: true - -## Examples - -### Creating a new gateway { #new-gateway } - -
    - -```yaml -type: gateway -# A name of the gateway -name: example-gateway - -# Gateways are bound to a specific backend and region -backend: aws -region: eu-west-1 - -# This domain will be used to access the endpoint -domain: example.com -``` - -
    - -[//]: # (TODO: other examples, e.g. private \ No newline at end of file diff --git a/docs/docs/reference/dstack.yml/service.md b/docs/docs/reference/dstack.yml/service.md index 683290991..54e1f57f6 100644 --- a/docs/docs/reference/dstack.yml/service.md +++ b/docs/docs/reference/dstack.yml/service.md @@ -141,105 +141,6 @@ The `service` configuration type allows running [services](../../concepts/servic ## Examples -### Python version - -If you don't specify `image`, `dstack` uses its base Docker image pre-configured with -`python`, `pip`, `conda` (Miniforge), and essential CUDA drivers. -The `python` property determines which default Docker image is used. - -
    - -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -# If `image` is not specified, dstack uses its base image -python: "3.10" - -# Commands of the service -commands: - - python3 -m http.server -# The port of the service -port: 8000 -``` - -
    - -??? info "nvcc" - By default, the base Docker image doesn’t include `nvcc`, which is required for building custom CUDA kernels. - If you need `nvcc`, set the corresponding property to true. - -
    - - ```yaml - type: service - # The name is optional, if not specified, generated randomly - name: http-server-service - - # If `image` is not specified, dstack uses its base image - python: "3.10" - # Ensure nvcc is installed (req. for Flash Attention) - nvcc: true - - # Commands of the service - commands: - - python3 -m http.server - # The port of the service - port: 8000 - ``` - -
    - -### Docker - -If you want, you can specify your own Docker image via `image`. - -
    - - ```yaml - type: service - # The name is optional, if not specified, generated randomly - name: http-server-service - - # Any custom Docker image - image: dstackai/base:py3.13-0.6-cuda-12.1 - - # Commands of the service - commands: - - python3 -m http.server - # The port of the service - port: 8000 - ``` - -
    - -??? info "Private Docker registry" - - Use the `registry_auth` property to provide credentials for a private Docker registry. - - ```yaml - type: service - # The name is optional, if not specified, generated randomly - name: http-server-service - - # Any private Docker iamge - image: dstackai/base:py3.13-0.6-cuda-12.1 - # Credentials of the private registry - registry_auth: - username: peterschmidt85 - password: ghp_e49HcZ9oYwBzUbcSk2080gXZOU2hiT9AeSR5 - - # Commands of the service - commands: - - python3 -m http.server - # The port of the service - port: 8000 - ``` - -!!! info "Docker and Docker Compose" - All backends except `runpod`, `vastai`, and `kubernetes` also allow using [Docker and Docker Compose](../../guides/protips.md#docker-and-docker-compose) inside `dstack` runs. - ### Models { #model-mapping } If you are running a chat model with an OpenAI-compatible interface, diff --git a/docs/docs/reference/dstack.yml/task.md b/docs/docs/reference/dstack.yml/task.md index 0f4d809a6..84cc54c0a 100644 --- a/docs/docs/reference/dstack.yml/task.md +++ b/docs/docs/reference/dstack.yml/task.md @@ -78,226 +78,10 @@ The `task` configuration type allows running [tasks](../../concepts/tasks.md). ## Examples -### Python version - -If you don't specify `image`, `dstack` uses its base Docker image pre-configured with -`python`, `pip`, `conda` (Miniforge), and essential CUDA drivers. -The `python` property determines which default Docker image is used. - -
    - -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -# If `image` is not specified, dstack uses its base image -python: "3.10" - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py -``` - -
    - -??? info "nvcc" - By default, the base Docker image doesn’t include `nvcc`, which is required for building custom CUDA kernels. - If you need `nvcc`, set the corresponding property to true. - - - ```yaml - type: task - # The name is optional, if not specified, generated randomly - name: train - - # If `image` is not specified, dstack uses its base image - python: "3.10" - # Ensure nvcc is installed (req. for Flash Attention) - nvcc: true - - commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - ``` - -### Ports { #_ports } - -A task can configure ports. In this case, if the task is running an application on a port, `dstack run` -will securely allow you to access this port from your local machine through port forwarding. - -
    - -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -python: "3.10" - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - tensorboard --logdir results/runs & - - python fine-tuning/qlora/train.py -# Expose the port to access TensorBoard -ports: - - 6000 -``` - -
    - -When running it, `dstack run` forwards `6000` port to `localhost:6000`, enabling secure access. [//]: # (See [tasks](../../tasks.md#configure-ports) for more detail.) -### Docker - -If you want, you can specify your own Docker image via `image`. - -
    - -```yaml -type: dev-environment -# The name is optional, if not specified, generated randomly -name: train - -# Any custom Docker image -image: dstackai/base:py3.13-0.6-cuda-12.1 - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py -``` - -
    - -??? info "Private registry" - Use the `registry_auth` property to provide credentials for a private Docker registry. - - ```yaml - type: dev-environment - # The name is optional, if not specified, generated randomly - name: train - - # Any private Docker image - image: dstackai/base:py3.13-0.6-cuda-12.1 - # Credentials of the private Docker registry - registry_auth: - username: peterschmidt85 - password: ghp_e49HcZ9oYwBzUbcSk2080gXZOU2hiT9AeSR5 - - # Commands of the task - commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - ``` - -!!! info "Docker and Docker Compose" - All backends except `runpod`, `vastai`, and `kubernetes` also allow using [Docker and Docker Compose](../../guides/protips.md#docker-and-docker-compose) inside `dstack` runs. - -### Resources { #resources_ } - -If you specify memory size, you can either specify an explicit size (e.g. `24GB`) or a -range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). - -
    - -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - -resources: - # 200GB or more RAM - memory: 200GB.. - # 4 GPUs from 40GB to 80GB - gpu: 40GB..80GB:4 - # Shared memory (required by multi-gpu) - shm_size: 16GB - # Disk size - disk: 500GB -``` - -
    - -The `gpu` property allows specifying not only memory size but also GPU vendor, names -and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), -`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), -`A100:40GB:2` (two A100 GPUs of 40GB). - -??? info "Google Cloud TPU" - To use TPUs, specify its architecture via the `gpu` property. - - ```yaml - type: task - # The name is optional, if not specified, generated randomly - name: train - - python: "3.10" - - # Commands of the task - commands: - - pip install torch~=2.3.0 torch_xla[tpu]~=2.3.0 torchvision -f https://storage.googleapis.com/libtpu-releases/index.html - - git clone --recursive https://github.com/pytorch/xla.git - - python3 xla/test/test_train_mp_imagenet.py --fake_data --model=resnet50 --num_epochs=1 - - resources: - gpu: v2-8 - ``` - - Currently, only 8 TPU cores can be specified, supporting single host workloads. Multi-host support is coming soon. - -??? info "Shared memory" - If you are using parallel communicating processes (e.g., dataloaders in PyTorch), you may need to configure - `shm_size`, e.g. set it to `16GB`. -### Environment variables - -
    - -```yaml -type: task - -python: "3.10" - -# Environment variables -env: - - HF_TOKEN - - HF_HUB_ENABLE_HF_TRANSFER=1 - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py -``` - -
    - -If you don't assign a value to an environment variable (see `HF_TOKEN` above), -`dstack` will require the value to be passed via the CLI or set in the current process. -For instance, you can define environment variables in a `.envrc` file and utilize tools like `direnv`. - -??? info "System environment variables" - The following environment variables are available in any run by default: - - | Name | Description | - |-------------------------|------------------------------------------------------------------| - | `DSTACK_RUN_NAME` | The name of the run | - | `DSTACK_REPO_ID` | The ID of the repo | - | `DSTACK_GPUS_NUM` | The total number of GPUs in the run | - | `DSTACK_NODES_NUM` | The number of nodes in the run | - | `DSTACK_GPUS_PER_NODE` | The number of GPUs per node | - | `DSTACK_NODE_RANK` | The rank of the node | - | `DSTACK_MASTER_NODE_IP` | The internal IP address the master node | - | `DSTACK_NODES_IPS` | The list of internal IP addresses of all nodes delimited by "\n" | ### Distributed tasks @@ -332,101 +116,7 @@ resources:
    -If you run the task, `dstack` first provisions the master node and then runs the other nodes of the cluster. - -??? info "Network" - To ensure all nodes are provisioned into a cluster placement group and to enable the highest level of inter-node - connectivity, it is recommended to manually create a [fleet](../../concepts/fleets.md) before running a task. - This won’t be needed once [this issue :material-arrow-top-right-thin:{ .external }](https://github.com/dstackai/dstack/issues/1805){:target="_blank"} - is fixed. - -> `dstack` is easy to use with `accelerate`, `torchrun`, and other distributed frameworks. All you need to do -is pass the corresponding environment variables such as `DSTACK_GPUS_PER_NODE`, `DSTACK_NODE_RANK`, `DSTACK_NODES_NUM`, -`DSTACK_MASTER_NODE_IP`, and `DSTACK_GPUS_NUM` (see [System environment variables](#system-environment-variables)). - -??? info "Backends" - Running on multiple nodes is supported only with the `aws`, `gcp`, `azure`, `oci` backends, or - [SSH fleets](../../concepts/fleets.md#ssh-fleets). - - Additionally, the `aws` backend supports [Elastic Fabric Adapter :material-arrow-top-right-thin:{ .external }](https://aws.amazon.com/hpc/efa/){:target="_blank"}. - For a list of instance types with EFA support see [Fleets](../../concepts/fleets.md#cloud-fleets). - -### Web applications - -Here's an example of using `ports` to run web apps with `tasks`. - -
    - -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: streamlit-hello - -python: "3.10" - -# Commands of the task -commands: - - pip3 install streamlit - - streamlit hello -# Expose the port to access the web app -ports: - - 8501 - -``` - -
    -### Spot policy - -You can choose whether to use spot instances, on-demand instances, or any available type. - -
    - -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - -# Uncomment to leverage spot instances -#spot_policy: auto -``` - -
    - -The `spot_policy` accepts `spot`, `on-demand`, and `auto`. The default for tasks is `on-demand`. - -### Queueing tasks { #queueing-tasks } - -By default, if `dstack apply` cannot find capacity, the task fails. - -To queue the task and wait for capacity, specify the [`retry`](#retry) -property: - -
    - -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - -retry: - # Retry on no-capacity errors - on_events: [no-capacity] - # Retry within 1 day - duration: 1d -``` - -
    ### Backends diff --git a/docs/docs/reference/dstack.yml/volume.md b/docs/docs/reference/dstack.yml/volume.md index cfd330242..af34a166a 100644 --- a/docs/docs/reference/dstack.yml/volume.md +++ b/docs/docs/reference/dstack.yml/volume.md @@ -9,43 +9,3 @@ The `volume` configuration type allows creating, registering, and updating [volu show_root_heading: false type: required: true - -## Examples - -### Creating a new volume { #new-volume } - -
    - -```yaml -type: volume -# The name of the volume -name: my-new-volume - -# Volumes are bound to a specific backend and region -backend: aws -region: eu-central-1 - -# The size of the volume -size: 100GB -``` - -
    - -### Registering an existing volume { #existing-volume } - -
    - -```yaml -type: volume -# The name of the volume -name: my-existing-volume - -# Volumes are bound to a specific backend and region -backend: aws -region: eu-central-1 - -# The ID of the volume in AWS -volume_id: vol1235 -``` - -
    \ No newline at end of file diff --git a/docs/overrides/home.html b/docs/overrides/home.html index 0ca4f1b69..4a73df03b 100644 --- a/docs/overrides/home.html +++ b/docs/overrides/home.html @@ -395,7 +395,7 @@

    Get started in under a minute

    - +
    SSH fleets
    diff --git a/examples/accelerators/amd/README.md b/examples/accelerators/amd/README.md index bb0dd67db..7abad7f19 100644 --- a/examples/accelerators/amd/README.md +++ b/examples/accelerators/amd/README.md @@ -1,7 +1,7 @@ # AMD `dstack` supports running dev environments, tasks, and services on AMD GPUs. -You can do that by setting up an [SSH fleet](https://dstack.ai/docs/concepts/fleets#ssh-fleets) +You can do that by setting up an [SSH fleet](https://dstack.ai/docs/concepts/fleets#ssh) with on-prem AMD GPUs or configuring a backend that offers AMD GPUs such as the `runpod` backend. ## Deployment diff --git a/mkdocs.yml b/mkdocs.yml index ebbb76c0c..162e3868d 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -157,6 +157,8 @@ markdown_extensions: - pymdownx.highlight - pymdownx.details - pymdownx.superfences + - pymdownx.snippets: + base_path: ["docs"] - pymdownx.tabbed: alternate_style: true slugify: !!python/object/apply:pymdownx.slugs.slugify @@ -213,15 +215,15 @@ nav: - Dev environments: docs/concepts/dev-environments.md - Tasks: docs/concepts/tasks.md - Services: docs/concepts/services.md - - Repos: docs/concepts/repos.md - Fleets: docs/concepts/fleets.md - Volumes: docs/concepts/volumes.md - Gateways: docs/concepts/gateways.md + - Repos: docs/concepts/repos.md - Guides: - Protips: docs/guides/protips.md - - Administration: docs/guides/administration.md - Server deployment: docs/guides/server-deployment.md - Troubleshooting: docs/guides/troubleshooting.md + - Administration: docs/guides/administration.md - Reference: - .dstack.yml: - dev-environment: docs/reference/dstack.yml/dev-environment.md From 19c58b19713fb54b801e29ecfb91fbb14aac13cd Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Sun, 5 Jan 2025 12:21:29 +0100 Subject: [PATCH 03/13] [Docs] Many docs improvements #2170 --- docs/docs/concepts/backends.md | 16 +- docs/docs/guides/administration.md | 6 +- docs/docs/guides/server-deployment.md | 21 +- docs/docs/installation/index.md | 2 +- docs/docs/reference/dstack.yml/fleet.md | 56 ---- docs/docs/reference/dstack.yml/service.md | 318 ---------------------- docs/docs/reference/dstack.yml/task.md | 127 --------- docs/docs/reference/server/config.yml.md | 3 +- 8 files changed, 27 insertions(+), 522 deletions(-) diff --git a/docs/docs/concepts/backends.md b/docs/docs/concepts/backends.md index 52bb1bab5..c2df1c34e 100644 --- a/docs/docs/concepts/backends.md +++ b/docs/docs/concepts/backends.md @@ -1,9 +1,15 @@ # Backends -To use `dstack` with cloud providers, configure the appropriate backends. -This can be done either through `~/.dstack/server/config.yml` before starting the server, or via UI after the server is up. +To use `dstack` with cloud providers, configure [backends](../concepts/backends.md) +via the `~/.dstack/server/config.yml` file. +The server loads this file on startup. -Below are examples of how to configure them via `~/.dstack/server/config.yml`. +Alternatively, you can configure backends on the [project settings page](../guides/administration.md#backends) via UI. + +> For using `dstack` with on-prem servers, no backend configuration is required. +> Use [SSH fleets](../concepts/fleets.md#ssh) instead. + +Below are examples of how to configure backends via `~/.dstack/server/config.yml`. ## Cloud providers @@ -841,4 +847,6 @@ In case of a self-managed cluster, also specify the IP address of any node in th If you're using [dstack Sky :material-arrow-top-right-thin:{ .external }](https://sky.dstack.ai){:target="_blank"}, backends are pre-configured to use compute from `dstack`'s marketplace. -You can reconfigure backends via the UI, to use your own cloud accounts instead. \ No newline at end of file +You can reconfigure backends via the UI, to use your own cloud accounts instead. + +[//]: # (TODO: Add link to the server config reference page) \ No newline at end of file diff --git a/docs/docs/guides/administration.md b/docs/docs/guides/administration.md index 394d15de2..c2f2ce433 100644 --- a/docs/docs/guides/administration.md +++ b/docs/docs/guides/administration.md @@ -3,10 +3,10 @@ Projects enable the isolation of different teams and their resources. Each project can configure its own backends and control which users have access to it. -> While project backends can be configured via [`~/.dstack/server/config.yml`](../reference/server/config.yml.md), use the control plane UI to fully manage -> projects, users, and user permissions. +> While project backends can be configured via [`~/.dstack/server/config.yml`](../reference/server/config.yml.md), +> use the UI to fully manage projects, users, and user permissions. -## Project backends +## Project backends { #backends } In addition to [`~/.dstack/server/config.yml`](../reference/server/config.yml.md), a global admin or a project admin can configure backends on the project settings page. diff --git a/docs/docs/guides/server-deployment.md b/docs/docs/guides/server-deployment.md index f8e1722e3..9472ee688 100644 --- a/docs/docs/guides/server-deployment.md +++ b/docs/docs/guides/server-deployment.md @@ -38,8 +38,7 @@ You can run the server either through `pip` or using Docker.
    -=== "AWS CloudFormation" - +??? info "AWS CloudFormation" If you'd like to deploy the server to a private AWS VPC, you can use our CloudFormation [template :material-arrow-top-right-thin:{ .external }](https://console.aws.amazon.com/cloudformation/home#/stacks/quickcreate?templateURL=https://get-dstack.s3.eu-west-1.amazonaws.com/cloudformation/template.yaml){:target="_blank"}. @@ -57,14 +56,14 @@ You can run the server either through `pip` or using Docker. ## Backend configuration -To use `dstack` with your own cloud accounts, create the `~/.dstack/server/config.yml` file and -[configure backends](../reference/server/config.yml.md). +To use `dstack` with cloud providers, configure [backends](../concepts/backends.md) +via the `~/.dstack/server/config.yml` file. The server loads this file on startup. -Alternatively, you can configure backends on the [project settings page](../guides/projects/#project-backends) via the control plane's UI. +Alternatively, you can configure backends on the [project settings page](../guides/administration.md#backends) via UI. > For using `dstack` with on-prem servers, no backend configuration is required. -> See [SSH fleets](../concepts/fleets.md#ssh) for more details. +> Use [SSH fleets](../concepts/fleets.md#ssh) instead. ## State persistence @@ -183,12 +182,12 @@ If you want backend credentials and user tokens to be encrypted, set up encrypti === "Client" The client backward compatibility is maintained across patch releases. A new minor release indicates that the release breaks client backward compatibility. This means you don't need to update the server when you update the client to a new patch release. Still, upgrading a client to a new minor version requires upgrading the server too. -## FAQ +## FAQs -##### Can I run multiple replicas of the dstack server? +??? info "Can I run multiple replicas of dstack server?" -Yes, you can if you configure `dstack` to use [PostgreSQL](#postgresql) and [AWS CloudWatch](#aws-cloudwatch). + Yes, you can if you configure `dstack` to use [PostgreSQL](#postgresql) and [AWS CloudWatch](#aws-cloudwatch). -##### Does the dstack server support blue-green or rolling deployments? +??? info "Does dstack server support blue-green or rolling deployments?" -Yes, it does if you configure `dstack` to use [PostgreSQL](#postgresql) and [AWS CloudWatch](#aws-cloudwatch). + Yes, it does if you configure `dstack` to use [PostgreSQL](#postgresql) and [AWS CloudWatch](#aws-cloudwatch). diff --git a/docs/docs/installation/index.md b/docs/docs/installation/index.md index c02a6df3f..dac5d6951 100644 --- a/docs/docs/installation/index.md +++ b/docs/docs/installation/index.md @@ -11,7 +11,7 @@ To use the open-source version of `dstack` with your own cloud accounts or on-pr ### (Optional) Configure backends -To use `dstack` with cloud providers, configure the appropriate [backends](../concepts/backends.md). +To use `dstack` with cloud providers, configure [backends](../concepts/backends.md). > For using `dstack` with on-prem servers, create [SSH fleets](../concepts/fleets.md#ssh) instead. diff --git a/docs/docs/reference/dstack.yml/fleet.md b/docs/docs/reference/dstack.yml/fleet.md index c421efa7c..537ddb109 100644 --- a/docs/docs/reference/dstack.yml/fleet.md +++ b/docs/docs/reference/dstack.yml/fleet.md @@ -53,59 +53,3 @@ The `fleet` configuration type allows creating and updating fleets. #SCHEMA# dstack._internal.core.models.profiles.ProfileRetry overrides: show_root_heading: false - -## Examples - -### Cloud fleet - -
    - -```yaml -type: fleet -# The name is optional, if not specified, generated randomly -name: my-fleet - -# The number of instances -nodes: 4 -# Ensure the instances are interconnected -placement: cluster - -# Uncomment to leverage spot instances -#spot_policy: auto - -resources: - gpu: - # 24GB or more vRAM - memory: 24GB.. - # One or more GPU - count: 1.. -``` - -
    - -### SSH fleet - -
    - -```yaml -type: fleet -# The name is optional, if not specified, generated randomly -name: my-ssh-fleet - -# Ensure instances are interconnected -placement: cluster - -# The user, private SSH key, and hostnames of the on-prem servers -ssh_config: - user: ubuntu - identity_file: ~/.ssh/id_rsa - hosts: - - 3.255.177.51 - - 3.255.177.52 -``` - -
    - -[//]: # (TODO: a cluster, individual user and identity file, etc) - -[//]: # (TODO: other examples, for all properties like in dev-environment/task/service) \ No newline at end of file diff --git a/docs/docs/reference/dstack.yml/service.md b/docs/docs/reference/dstack.yml/service.md index 54e1f57f6..8d661743c 100644 --- a/docs/docs/reference/dstack.yml/service.md +++ b/docs/docs/reference/dstack.yml/service.md @@ -138,321 +138,3 @@ The `service` configuration type allows running [services](../../concepts/servic * `volume-name:/container/path` for network volumes * `/instance/path:/container/path` for instance volumes - -## Examples - -### Models { #model-mapping } - -If you are running a chat model with an OpenAI-compatible interface, -set the [`model`](#model) property to make the model accessible via -the OpenAI-compatible endpoint provided by `dstack`. - -
    - -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: llama31-service - -python: "3.10" - -# Required environment variables -env: - - HF_TOKEN -commands: - - pip install vllm - - vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct --max-model-len 4096 -# Expose the port of the service -port: 8000 - -resources: - # Change to what is required - gpu: 24GB - -# Register the model -model: meta-llama/Meta-Llama-3.1-8B-Instruct - -# Alternatively, use this syntax to set more model settings: -# model: -# type: chat -# name: meta-llama/Meta-Llama-3.1-8B-Instruct -# format: openai -# prefix: /v1 -``` - -
    - -Once the service is up, the model will be available via the OpenAI-compatible endpoint -at `/proxy/models/` -or at `https://gateway.` if your project has a gateway. - -### Auto-scaling - -By default, `dstack` runs a single replica of the service. -You can configure the number of replicas as well as the auto-scaling rules. - -
    - -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: llama31-service - -python: "3.10" - -# Required environment variables -env: - - HF_TOKEN -commands: - - pip install vllm - - vllm serve meta-llama/Meta-Llama-3.1-8B-Instruct --max-model-len 4096 -# Expose the port of the service -port: 8000 - -resources: - # Change to what is required - gpu: 24GB - -# Minimum and maximum number of replicas -replicas: 1..4 -scaling: - # Requests per seconds - metric: rps - # Target metric value - target: 10 -``` - -
    - -The [`replicas`](#replicas) property can be a number or a range. - -> The [`metric`](#metric) property of [`scaling`](#scaling) only supports the `rps` metric (requests per second). In this -> case `dstack` adjusts the number of replicas (scales up or down) automatically based on the load. - -Setting the minimum number of replicas to `0` allows the service to scale down to zero when there are no requests. - -!!! info "Gateway" - Services with a fixed number of replicas are supported both with and without a - [gateway](../../concepts/gateways.md). - Auto-scaling is currently only supported for services running with a gateway. - -### Resources { #resources_ } - -If you specify memory size, you can either specify an explicit size (e.g. `24GB`) or a -range (e.g. `24GB..`, or `24GB..80GB`, or `..80GB`). - -
    - -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -python: "3.10" - -# Commands of the service -commands: - - pip install vllm - - python -m vllm.entrypoints.openai.api_server - --model mistralai/Mixtral-8X7B-Instruct-v0.1 - --host 0.0.0.0 - --tensor-parallel-size $DSTACK_GPUS_NUM -# Expose the port of the service -port: 8000 - -resources: - # 2 GPUs of 80GB - gpu: 80GB:2 - - # Minimum disk size - disk: 200GB -``` - -
    - -The `gpu` property allows specifying not only memory size but also GPU vendor, names -and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10G,A100` (either A10G or A100), -`A100:80GB` (one A100 of 80GB), `A100:2` (two A100), `24GB..40GB:2` (two GPUs between 24GB and 40GB), -`A100:40GB:2` (two A100 GPUs of 40GB). - -??? info "Shared memory" - If you are using parallel communicating processes (e.g., dataloaders in PyTorch), you may need to configure - `shm_size`, e.g. set it to `16GB`. - -### Authorization - -By default, the service endpoint requires the `Authorization` header with `"Bearer "`. -Authorization can be disabled by setting `auth` to `false`. - -
    - -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -# Disable authorization -auth: false - -python: "3.10" - -# Commands of the service -commands: - - python3 -m http.server -# The port of the service -port: 8000 -``` - -
    - -### Environment variables - -
    - -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: llama-2-7b-service - -python: "3.10" - -# Environment variables -env: - - HF_TOKEN - - MODEL=NousResearch/Llama-2-7b-chat-hf -# Commands of the service -commands: - - pip install vllm - - python -m vllm.entrypoints.openai.api_server --model $MODEL --port 8000 -# The port of the service -port: 8000 - -resources: - # Required GPU vRAM - gpu: 24GB -``` - -
    - -> If you don't assign a value to an environment variable (see `HF_TOKEN` above), -`dstack` will require the value to be passed via the CLI or set in the current process. -For instance, you can define environment variables in a `.envrc` file and utilize tools like `direnv`. - -??? info "System environment variables" - The following environment variables are available in any run by default: - - | Name | Description | - |-------------------------|-----------------------------------------| - | `DSTACK_RUN_NAME` | The name of the run | - | `DSTACK_REPO_ID` | The ID of the repo | - | `DSTACK_GPUS_NUM` | The total number of GPUs in the run | - -### Spot policy - -You can choose whether to use spot instances, on-demand instances, or any available type. - -
    - -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -commands: - - python3 -m http.server -# The port of the service -port: 8000 - -# Uncomment to leverage spot instances -#spot_policy: auto -``` - -
    - -The `spot_policy` accepts `spot`, `on-demand`, and `auto`. The default for services is `on-demand`. - -### Backends - -By default, `dstack` provisions instances in all configured backends. However, you can specify the list of backends: - -
    - -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -# Commands of the service -commands: - - python3 -m http.server -# The port of the service -port: 8000 - -# Use only listed backends -backends: [aws, gcp] -``` - -
    - -### Regions - -By default, `dstack` uses all configured regions. However, you can specify the list of regions: - -
    - -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -# Commands of the service -commands: - - python3 -m http.server -# The port of the service -port: 8000 - -# Use only listed regions -regions: [eu-west-1, eu-west-2] -``` - -
    - -### Volumes - -Volumes allow you to persist data between runs. -To attach a volume, simply specify its name using the `volumes` property and specify where to mount its contents: - -
    - -```yaml -type: service -# The name is optional, if not specified, generated randomly -name: http-server-service - -# Commands of the service -commands: - - python3 -m http.server -# The port of the service -port: 8000 - -# Map the name of the volume to any path -volumes: - - name: my-new-volume - path: /volume_data -``` - -
    - -Once you run this configuration, the contents of the volume will be attached to `/volume_data` inside the service, -and its contents will persist across runs. - -??? Info "Instance volumes" - If data persistence is not a strict requirement, use can also use - ephemeral [instance volumes](../../concepts/volumes.md#instance-volumes). - -!!! info "Limitations" - When you're running a dev environment, task, or service with `dstack`, it automatically mounts the project folder contents - to `/workflow` (and sets that as the current working directory). Right now, `dstack` doesn't allow you to - attach volumes to `/workflow` or any of its subdirectories. - -The `service` configuration type supports many other options. See below. \ No newline at end of file diff --git a/docs/docs/reference/dstack.yml/task.md b/docs/docs/reference/dstack.yml/task.md index 84cc54c0a..f08cf06cb 100644 --- a/docs/docs/reference/dstack.yml/task.md +++ b/docs/docs/reference/dstack.yml/task.md @@ -75,130 +75,3 @@ The `task` configuration type allows running [tasks](../../concepts/tasks.md). * `volume-name:/container/path` for network volumes * `/instance/path:/container/path` for instance volumes - -## Examples - - -[//]: # (See [tasks](../../tasks.md#configure-ports) for more detail.) - - - -### Distributed tasks - -By default, a task runs on a single node. However, you can run it on a cluster of nodes by specifying `nodes`: - -
    - -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train-distrib - -# The size of the cluster -nodes: 2 - -python: "3.10" - -# Commands of the task -commands: - - pip install -r requirements.txt - - torchrun - --nproc_per_node=$DSTACK_GPUS_PER_NODE - --node_rank=$DSTACK_NODE_RANK - --nnodes=$DSTACK_NODES_NUM - --master_addr=$DSTACK_MASTER_NODE_IP - --master_port=8008 resnet_ddp.py - --num_epochs 20 - -resources: - gpu: 24GB -``` - -
    - - - -### Backends - -By default, `dstack` provisions instances in all configured backends. However, you can specify the list of backends: - -
    - -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - -# Use only listed backends -backends: [aws, gcp] -``` - -
    - -### Regions - -By default, `dstack` uses all configured regions. However, you can specify the list of regions: - -
    - -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: train - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - -# Use only listed regions -regions: [eu-west-1, eu-west-2] -``` - -
    - -### Volumes - -Volumes allow you to persist data between runs. -To attach a volume, simply specify its name using the `volumes` property and specify where to mount its contents: - -
    - -```yaml -type: task -# The name is optional, if not specified, generated randomly -name: vscode - -python: "3.10" - -# Commands of the task -commands: - - pip install -r fine-tuning/qlora/requirements.txt - - python fine-tuning/qlora/train.py - -# Map the name of the volume to any path -volumes: - - name: my-new-volume - path: /volume_data -``` - -
    - -Once you run this configuration, the contents of the volume will be attached to `/volume_data` inside the task, -and its contents will persist across runs. - -??? Info "Instance volumes" - If data persistence is not a strict requirement, use can also use - ephemeral [instance volumes](../../concepts/volumes.md#instance-volumes). - -!!! info "Limitations" - When you're running a dev environment, task, or service with `dstack`, it automatically mounts the project folder contents - to `/workflow` (and sets that as the current working directory). Right now, `dstack` doesn't allow you to - attach volumes to `/workflow` or any of its subdirectories. - -The `task` configuration type supports many other options. See below. \ No newline at end of file diff --git a/docs/docs/reference/server/config.yml.md b/docs/docs/reference/server/config.yml.md index 208bb9e30..b329dd843 100644 --- a/docs/docs/reference/server/config.yml.md +++ b/docs/docs/reference/server/config.yml.md @@ -1,8 +1,7 @@ # ~/.dstack/server/config.yml The `~/.dstack/server/config.yml` file is used -to [configure](../../installation/index.md#1-configure-backends) the `dstack` server cloud accounts -and other sever-level settings such as encryption. +to configure [backends](../../concepts/backends.md) and other [sever-level settings](../../guides/server-deployment.md). ## Root reference From fcedfec25bbc1c0684efeceb618bb2ecb71fabe9 Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Mon, 6 Jan 2025 14:43:01 +0100 Subject: [PATCH 04/13] Merge branch 'master' into 2170-docs-many-docs-improvements --- docs/docs/concepts/fleets.md | 6 +++--- docs/docs/concepts/snippets/manage-fleets.ext | 10 +++++----- docs/docs/dev-environments.md | 0 docs/docs/services.md | 0 docs/docs/tasks.md | 0 .../alignment-handbook/fleet-distrib.dstack.yml | 2 +- .../fine-tuning/alignment-handbook/fleet.dstack.yml | 2 +- 7 files changed, 10 insertions(+), 10 deletions(-) delete mode 100644 docs/docs/dev-environments.md delete mode 100644 docs/docs/services.md delete mode 100644 docs/docs/tasks.md diff --git a/docs/docs/concepts/fleets.md b/docs/docs/concepts/fleets.md index abe17221f..6b6e45cf4 100644 --- a/docs/docs/concepts/fleets.md +++ b/docs/docs/concepts/fleets.md @@ -104,11 +104,11 @@ and their quantity. Examples: `nvidia` (one NVIDIA GPU), `A100` (one A100), `A10 Currently, only 8 TPU cores can be specified, supporting single TPU device workloads. Multi-TPU support is coming soon. -#### Termination policy +#### Idle duration By default, fleet instances remain active until the fleet is explicitly deleted via `dstack fleet delete`. -To automatically terminate `idle` instances after a certain period, configure `termination_idle_time`. +To automatically terminate `idle` instances after a certain period, configure `idle_duration`.
    @@ -120,7 +120,7 @@ To automatically terminate `idle` instances after a certain period, configure `t nodes: 2 # Terminate instances idle for more than 1 hour - termination_idle_time: 1h + idle_duration: 1h resources: gpu: 24GB diff --git a/docs/docs/concepts/snippets/manage-fleets.ext b/docs/docs/concepts/snippets/manage-fleets.ext index df11d6be1..d9e76c159 100644 --- a/docs/docs/concepts/snippets/manage-fleets.ext +++ b/docs/docs/concepts/snippets/manage-fleets.ext @@ -17,13 +17,13 @@ $ dstack apply -R -f examples/.dstack.yml Or, set [`creation_policy`](../reference/dstack.yml/dev-environment.md#creation_policy) to `reuse` in the run configuration. -### Termination policy +### Idle duration -If a fleet is created automatically, once the run is finished, by default, it remains `idle` for 5 minutes -and can be reused by other runs. +If a fleet is created automatically, it stays `idle` for 5 minutes by default and can be reused within that time. +If the fleet is not reused within this period, it is automatically terminated. To change the default idle duration, set -[`termination_idle_time`](../reference/dstack.yml/fleet.md#termination_idle_time) in the run configuration (e.g., to `0` or a -longer duration). +[`idle_duration`](../reference/dstack.yml/fleet.md#idle_duration) in the run configuration (e.g., `0s`, `1m`, or `off` for +unlimited). !!! info "Fleets" For greater control over fleet provisioning, it is recommended to create diff --git a/docs/docs/dev-environments.md b/docs/docs/dev-environments.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/docs/services.md b/docs/docs/services.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/docs/docs/tasks.md b/docs/docs/tasks.md deleted file mode 100644 index e69de29bb..000000000 diff --git a/examples/fine-tuning/alignment-handbook/fleet-distrib.dstack.yml b/examples/fine-tuning/alignment-handbook/fleet-distrib.dstack.yml index 94ac80bbc..6cc965fe7 100644 --- a/examples/fine-tuning/alignment-handbook/fleet-distrib.dstack.yml +++ b/examples/fine-tuning/alignment-handbook/fleet-distrib.dstack.yml @@ -10,7 +10,7 @@ placement: cluster # Uncomment to leverage spot instances #spot_policy: auto # Terminate instances if not used for one hour -termination_idle_time: 1h +idle_duration: 1h resources: gpu: diff --git a/examples/fine-tuning/alignment-handbook/fleet.dstack.yml b/examples/fine-tuning/alignment-handbook/fleet.dstack.yml index 7caad34ee..d7480b86b 100644 --- a/examples/fine-tuning/alignment-handbook/fleet.dstack.yml +++ b/examples/fine-tuning/alignment-handbook/fleet.dstack.yml @@ -8,7 +8,7 @@ nodes: 1 # Uncomment to leverage spot instances #spot_policy: auto # Terminate the instance if not used for one hour -termination_idle_time: 1h +idle_duration: 1h resources: gpu: From 965f0c00459c1c7e823fd92bec211f9f527cda85 Mon Sep 17 00:00:00 2001 From: Andrey Cheptsov <54148038+peterschmidt85@users.noreply.github.com> Date: Wed, 8 Jan 2025 17:12:20 +0100 Subject: [PATCH 05/13] Update docs/docs/concepts/gateways.md Co-authored-by: jvstme <36324149+jvstme@users.noreply.github.com> --- docs/docs/concepts/gateways.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/concepts/gateways.md b/docs/docs/concepts/gateways.md index c301a1f06..723bfdb86 100644 --- a/docs/docs/concepts/gateways.md +++ b/docs/docs/concepts/gateways.md @@ -2,7 +2,7 @@ Gateways manage the ingress traffic of running [services](services.md) provide an HTTPS endpoint mapped to your domain, -and handling auto-scaling. +and handle auto-scaling. > If you're using [dstack Sky :material-arrow-top-right-thin:{ .external }](https://sky.dstack.ai){:target="_blank"}, > the gateway is already set up for you. From 4778e907544767da181b152e7291a521f3dc6125 Mon Sep 17 00:00:00 2001 From: Andrey Cheptsov <54148038+peterschmidt85@users.noreply.github.com> Date: Wed, 8 Jan 2025 17:12:29 +0100 Subject: [PATCH 06/13] Update docs/docs/concepts/gateways.md Co-authored-by: jvstme <36324149+jvstme@users.noreply.github.com> --- docs/docs/concepts/gateways.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/concepts/gateways.md b/docs/docs/concepts/gateways.md index 723bfdb86..b531a3a80 100644 --- a/docs/docs/concepts/gateways.md +++ b/docs/docs/concepts/gateways.md @@ -1,6 +1,6 @@ # Gateways -Gateways manage the ingress traffic of running [services](services.md) +Gateways manage the ingress traffic of running [services](services.md), provide an HTTPS endpoint mapped to your domain, and handle auto-scaling. From 9bc0d9b0fbddb5148c07e565ebfeab5cfc6c1bb6 Mon Sep 17 00:00:00 2001 From: Andrey Cheptsov <54148038+peterschmidt85@users.noreply.github.com> Date: Wed, 8 Jan 2025 17:12:46 +0100 Subject: [PATCH 07/13] Update docs/docs/concepts/backends.md Co-authored-by: jvstme <36324149+jvstme@users.noreply.github.com> --- docs/docs/concepts/backends.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/concepts/backends.md b/docs/docs/concepts/backends.md index c2df1c34e..978a0c665 100644 --- a/docs/docs/concepts/backends.md +++ b/docs/docs/concepts/backends.md @@ -1,6 +1,6 @@ # Backends -To use `dstack` with cloud providers, configure [backends](../concepts/backends.md) +To use `dstack` with cloud providers, configure backends via the `~/.dstack/server/config.yml` file. The server loads this file on startup. From 732cf2bc5435c8ee58e961097527a799c56f7ece Mon Sep 17 00:00:00 2001 From: Andrey Cheptsov <54148038+peterschmidt85@users.noreply.github.com> Date: Wed, 8 Jan 2025 17:13:04 +0100 Subject: [PATCH 08/13] Update docs/docs/concepts/volumes.md Co-authored-by: jvstme <36324149+jvstme@users.noreply.github.com> --- docs/docs/concepts/volumes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/concepts/volumes.md b/docs/docs/concepts/volumes.md index a79fbe72e..ab3746167 100644 --- a/docs/docs/concepts/volumes.md +++ b/docs/docs/concepts/volumes.md @@ -74,7 +74,7 @@ To create or register the volume, pass the volume configuration to `dstack apply $ dstack apply -f volume.dstack.yml Volume my-volume does not exist yet. Create the volume? [y/n]: y - NAME BACKEND REGION STATUS CREATED + NAME BACKEND REGION STATUS CREATED my-volume aws eu-central-1 submitted now ``` From 325e5f527303df5c8297aeedbf6927628f9fdbe7 Mon Sep 17 00:00:00 2001 From: Andrey Cheptsov <54148038+peterschmidt85@users.noreply.github.com> Date: Wed, 8 Jan 2025 17:14:59 +0100 Subject: [PATCH 09/13] Update docs/docs/concepts/services.md Co-authored-by: jvstme <36324149+jvstme@users.noreply.github.com> --- docs/docs/concepts/services.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/docs/concepts/services.md b/docs/docs/concepts/services.md index 07971e7cd..0e8e38331 100644 --- a/docs/docs/concepts/services.md +++ b/docs/docs/concepts/services.md @@ -75,9 +75,9 @@ scaling:
    -The [`replicas`](#replicas) property can be a number or a range. +The [`replicas`](../reference/dstack.yml/service.md#replicas) property can be a number or a range. -The [`metric`](#metric) property of [`scaling`](#scaling) only supports the `rps` metric (requests per second). In this +The [`metric`](../reference/dstack.yml/service.md#metric) property of [`scaling`](../reference/dstack.yml/service.md#scaling) only supports the `rps` metric (requests per second). In this case `dstack` adjusts the number of replicas (scales up or down) automatically based on the load. Setting the minimum number of replicas to `0` allows the service to scale down to zero when there are no requests. From f6a5a178b094d4d961cff00735606c14bd49eb93 Mon Sep 17 00:00:00 2001 From: Andrey Cheptsov <54148038+peterschmidt85@users.noreply.github.com> Date: Wed, 8 Jan 2025 17:15:47 +0100 Subject: [PATCH 10/13] Update docs/docs/concepts/services.md Co-authored-by: jvstme <36324149+jvstme@users.noreply.github.com> --- docs/docs/concepts/services.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/concepts/services.md b/docs/docs/concepts/services.md index 0e8e38331..3a46a2a8b 100644 --- a/docs/docs/concepts/services.md +++ b/docs/docs/concepts/services.md @@ -83,7 +83,7 @@ case `dstack` adjusts the number of replicas (scales up or down) automatically b Setting the minimum number of replicas to `0` allows the service to scale down to zero when there are no requests. !!! info "Gateways" - The `scaling` property currently requires creating a [gateway](#manage-gateways). + The `scaling` property currently requires creating a [gateway](gateways.md). This requirement is expected to be removed soon. ### Authorization From 3952d726951e59e5f45d76d3f1f9aa6d6ba664a3 Mon Sep 17 00:00:00 2001 From: Andrey Cheptsov <54148038+peterschmidt85@users.noreply.github.com> Date: Wed, 8 Jan 2025 17:16:15 +0100 Subject: [PATCH 11/13] Update docs/docs/concepts/tasks.md Co-authored-by: jvstme <36324149+jvstme@users.noreply.github.com> --- docs/docs/concepts/tasks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/concepts/tasks.md b/docs/docs/concepts/tasks.md index 1e158fa7a..aaad78dce 100644 --- a/docs/docs/concepts/tasks.md +++ b/docs/docs/concepts/tasks.md @@ -309,7 +309,7 @@ If you don't assign a value to an environment variable (see `HF_TOKEN` above), ### Spot policy By default, `dstack` uses on-demand instances. However, you can change that -via the [`spot_policy`](../reference/dstack.yml/dev-environment.md#spot_policy) property. It accepts `spot`, `on-demand`, and `auto`. +via the [`spot_policy`](../reference/dstack.yml/task.md#spot_policy) property. It accepts `spot`, `on-demand`, and `auto`. !!! info "Reference" Tasks support many more configuration options, From 246aa2595f2c6cd6c2cff224ce9b90c517d27a91 Mon Sep 17 00:00:00 2001 From: peterschmidt85 Date: Wed, 8 Jan 2025 17:18:19 +0100 Subject: [PATCH 12/13] [Docs] Many docs improvements #2170 --- docs/docs/guides/troubleshooting.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/guides/troubleshooting.md b/docs/docs/guides/troubleshooting.md index 11eb77369..28f463ade 100644 --- a/docs/docs/guides/troubleshooting.md +++ b/docs/docs/guides/troubleshooting.md @@ -94,7 +94,7 @@ pointing to the gateway's hostname is configured. #### Cause 1: Bad Authorization -If the service endpoint returns a 403 error, it is likely because the [`Authorization`](../concepts/services.md#access-the-endpoint) +If the service endpoint returns a 403 error, it is likely because the [`Authorization`](../concepts/services.md#service-endpoint) header with the correct `dstack` token was not provided. [//]: # (#### Other) From 9ac9c115537bb3be726076cafb43682e27537b02 Mon Sep 17 00:00:00 2001 From: Andrey Cheptsov <54148038+peterschmidt85@users.noreply.github.com> Date: Wed, 8 Jan 2025 17:18:34 +0100 Subject: [PATCH 13/13] Update docs/docs/concepts/services.md Co-authored-by: jvstme <36324149+jvstme@users.noreply.github.com> --- docs/docs/concepts/services.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/concepts/services.md b/docs/docs/concepts/services.md index 3a46a2a8b..6fdb7170b 100644 --- a/docs/docs/concepts/services.md +++ b/docs/docs/concepts/services.md @@ -306,7 +306,7 @@ resources: ### Spot policy By default, `dstack` uses on-demand instances. However, you can change that -via the [`spot_policy`](../reference/dstack.yml/task.md#spot_policy) property. It accepts `spot`, `on-demand`, and `auto`. +via the [`spot_policy`](../reference/dstack.yml/service.md#spot_policy) property. It accepts `spot`, `on-demand`, and `auto`. !!! info "Reference" Services support many more configuration options,