diff --git a/.github/workflows/lint-test.yaml b/.github/workflows/lint-test.yaml new file mode 100644 index 0000000000000..a6a295198c48d --- /dev/null +++ b/.github/workflows/lint-test.yaml @@ -0,0 +1,87 @@ +name: Lint and Test Charts + +on: pull_request + +jobs: + lint-test: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v3 + with: + fetch-depth: 0 + + - name: Set up Helm + uses: azure/setup-helm@v4.2.0 + with: + version: v3.14.4 + + #Python is required because ct lint runs Yamale and yamllint which require Python. + - uses: actions/setup-python@v3 + with: + python-version: 3.7 + + - name: Set up chart-testing + uses: helm/chart-testing-action@v2.6.1 + with: + version: v3.10.1 + + - name: Run chart-testing (lint) + run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm + + - name: Setup minio + run: | + docker network create vllm-net + docker run -d -p 9000:9000 --name minio --net vllm-net \ + -e "MINIO_ACCESS_KEY=minioadmin" \ + -e "MINIO_SECRET_KEY=minioadmin" \ + -v /tmp/data:/data \ + -v /tmp/config:/root/.minio \ + minio/minio server /data + export AWS_ACCESS_KEY_ID=minioadmin + export AWS_SECRET_ACCESS_KEY=minioadmin + export AWS_EC2_METADATA_DISABLED=true + mkdir opt-125m + cd opt-125m && curl -O -Ls "https://huggingface.co/facebook/opt-125m/resolve/main/{pytorch_model.bin,config.json,generation_config.json,merges.txt,special_tokens_map.json,tokenizer_config.json,vocab.json}" && cd .. + aws --endpoint-url http://127.0.0.1:9000/ s3 mb s3://testbucket + aws --endpoint-url http://127.0.0.1:9000/ s3 cp opt-125m/ s3://testbucket/opt-125m --recursive + + - name: Create kind cluster + uses: helm/kind-action@v1.10.0 + + - name: Configuration of docker images, network and namespace for the kind cluster + run: | + docker pull adsai/vllm-cpu-env:latest + docker pull amazon/aws-cli:2.6.4 + kind load docker-image amazon/aws-cli:2.6.4 --name chart-testing + kind load docker-image adsai/vllm-cpu-env:latest --name chart-testing + docker network connect vllm-net "$(docker ps -aqf "name=chart-testing-control-plane")" + kubectl create ns ns-vllm + + - name: Run chart-testing (install) + run: | + export AWS_ACCESS_KEY_ID=minioadmin + export AWS_SECRET_ACCESS_KEY=minioadmin + helm install --wait --wait-for-jobs --timeout 5m0s --debug --create-namespace --namespace=ns-vllm test-vllm examples/chart-helm -f examples/chart-helm/values.yaml --set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_SECRET_ACCESS_KEY --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set resources.requests.cpu=1 --set resources.requests.memory=4Gi --set resources.limits.cpu=2 --set resources.limits.memory=5Gi --set-string image.env[0].value="1" --set-string extraInit.s3modelpath="opt-125m/" --set-string 'resources.limits.nvidia\.com/gpu=0' --set-string 'resources.requests.nvidia\.com/gpu=0' --set-string image.repository="adsai/vllm-cpu-env" + + - name: curl test + run: | + kubectl -n ns-vllm port-forward service/test-vllm-service 8001:80 & + sleep 10 + curl -f --location http://localhost:8001/v1/completions \ + --header "Content-Type: application/json" \ + --data '{ + "model": "opt-125m", + "prompt": "San Francisco is a", + "max_tokens": 7, + "temperature": 0 + }' + CODE="$(curl -v -f --location http://localhost:8001/v1/completions \ + --header "Content-Type: application/json" \ + --data '{ + "model": "opt-125m", + "prompt": "San Francisco is a", + "max_tokens": 7, + "temperature": 0 + }'):$CODE" + echo "$CODE" \ No newline at end of file diff --git a/examples/chart-helm/.helmignore b/examples/chart-helm/.helmignore new file mode 100644 index 0000000000000..2d1303b784cb8 --- /dev/null +++ b/examples/chart-helm/.helmignore @@ -0,0 +1,6 @@ +*.png +.git/ +ct.yaml +lintconf.yaml +values.schema.json +/workflows \ No newline at end of file diff --git a/examples/chart-helm/Chart.yaml b/examples/chart-helm/Chart.yaml new file mode 100644 index 0000000000000..fb0f06f6d2701 --- /dev/null +++ b/examples/chart-helm/Chart.yaml @@ -0,0 +1,21 @@ +apiVersion: v2 +name: chart-vllm +description: Chart vllm + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.0.1 + +maintainers: + - name: mfournioux diff --git a/examples/chart-helm/README.md b/examples/chart-helm/README.md new file mode 100644 index 0000000000000..bebb769660277 --- /dev/null +++ b/examples/chart-helm/README.md @@ -0,0 +1,83 @@ +# chart-vllm + +![Version: 0.0.1](https://img.shields.io/badge/Version-0.0.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) + +A Helm chart to deploy vllm for Kubernetes + +## Installing the chart + +To install the chart with the release name `test-vllm` + +```console +helm upgrade --install --create-namespace --namespace=ns-vllm test-vllm . -f values.yaml --set secrets.s3endpoint=$ACCESS_POINT --set secrets.s3buckername=$BUCKET --set secrets.s3accesskeyid=$ACCESS_KEY --set secrets.s3accesskey=$SECRET_KEY +``` +## ➖ Uninstalling the Chart + +To uninstall the `test-vllm` deployment + +```console +helm uninstall test-vllm --namespace=ns-vllm +``` + +The command removes all the Kubernetes components associated with the chart **including persistent volumes** and deletes the release. + +## Architecture + +![Architecture](architecture.excalidraw.png) + +## Values + +| Key | Type | Default | Description | +|-----|------|---------|-------------| +| autoscaling | object | `{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80}` | Autoscaling configuration | +| autoscaling.enabled | bool | `false` | Enable autoscaling | +| autoscaling.maxReplicas | int | `100` | Maximum replicas | +| autoscaling.minReplicas | int | `1` | Minimum replicas | +| autoscaling.targetCPUUtilizationPercentage | int | `80` | Target CPU utilization for autoscaling | +| configs | object | `{}` | Configmap | +| containerPort | int | `8000` | Container port | +| customObjects | list | `[]` | Custom Objects configuration | +| deploymentStrategy | object | `{}` | Deployment strategy configuration | +| externalConfigs | list | `[]` | External configuration | +| extraContainers | list | `[]` | Additional containers configuration | +| extraInit | object | `{"pvcStorage":"1Gi","s3modelpath":"relative_s3_model_path/opt-125m", "awsEc2MetadataDisabled": true}` | Additional configuration for the init container | +| extraInit.pvcStorage | string | `"50Gi"` | Storage size of the s3 | +| extraInit.s3modelpath | string | `"relative_s3_model_path/opt-125m"` | Path of the model on the s3 which hosts model weights and config files | +| extraInit.awsEc2MetadataDisabled | boolean | `true` | Disables the use of the Amazon EC2 instance metadata service | +| extraPorts | list | `[]` | Additional ports configuration | +| gpuModels | list | `["TYPE_GPU_USED"]` | Type of gpu used | +| image | object | `{"command":["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"],"repository":"vllm/vllm-openai","tag":"latest"}` | Image configuration | +| image.command | list | `["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"]` | Container launch command | +| image.repository | string | `"vllm/vllm-openai"` | Image repository | +| image.tag | string | `"latest"` | Image tag | +| livenessProbe | object | `{"failureThreshold":3,"httpGet":{"path":"/health","port":8000},"initialDelaySeconds":15,"periodSeconds":10}` | Liveness probe configuration | +| livenessProbe.failureThreshold | int | `3` | Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not alive | +| livenessProbe.httpGet | object | `{"path":"/health","port":8000}` | Configuration of the Kubelet http request on the server | +| livenessProbe.httpGet.path | string | `"/health"` | Path to access on the HTTP server | +| livenessProbe.httpGet.port | int | `8000` | Name or number of the port to access on the container, on which the server is listening | +| livenessProbe.initialDelaySeconds | int | `15` | Number of seconds after the container has started before liveness probe is initiated | +| livenessProbe.periodSeconds | int | `10` | How often (in seconds) to perform the liveness probe | +| maxUnavailablePodDisruptionBudget | string | `""` | Disruption Budget Configuration | +| readinessProbe | object | `{"failureThreshold":3,"httpGet":{"path":"/health","port":8000},"initialDelaySeconds":5,"periodSeconds":5}` | Readiness probe configuration | +| readinessProbe.failureThreshold | int | `3` | Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not ready | +| readinessProbe.httpGet | object | `{"path":"/health","port":8000}` | Configuration of the Kubelet http request on the server | +| readinessProbe.httpGet.path | string | `"/health"` | Path to access on the HTTP server | +| readinessProbe.httpGet.port | int | `8000` | Name or number of the port to access on the container, on which the server is listening | +| readinessProbe.initialDelaySeconds | int | `5` | Number of seconds after the container has started before readiness probe is initiated | +| readinessProbe.periodSeconds | int | `5` | How often (in seconds) to perform the readiness probe | +| replicaCount | int | `1` | Number of replicas | +| resources | object | `{"limits":{"cpu":4,"memory":"16Gi","nvidia.com/gpu":1},"requests":{"cpu":4,"memory":"16Gi","nvidia.com/gpu":1}}` | Resource configuration | +| resources.limits."nvidia.com/gpu" | int | `1` | Number of gpus used | +| resources.limits.cpu | int | `4` | Number of CPUs | +| resources.limits.memory | string | `"16Gi"` | CPU memory configuration | +| resources.requests."nvidia.com/gpu" | int | `1` | Number of gpus used | +| resources.requests.cpu | int | `4` | Number of CPUs | +| resources.requests.memory | string | `"16Gi"` | CPU memory configuration | +| secrets | object | `{}` | Secrets configuration | +| serviceName | string | | Service name | +| servicePort | int | `80` | Service port | +| labels.environment | string | `test` | Environment name | +| labels.release | string | `test` | Release name | + +---------------------------------------------- +Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2) diff --git a/examples/chart-helm/architecture.excalidraw.png b/examples/chart-helm/architecture.excalidraw.png new file mode 100644 index 0000000000000..2ba2f38f3add5 Binary files /dev/null and b/examples/chart-helm/architecture.excalidraw.png differ diff --git a/examples/chart-helm/ct.yaml b/examples/chart-helm/ct.yaml new file mode 100644 index 0000000000000..d273e118203ad --- /dev/null +++ b/examples/chart-helm/ct.yaml @@ -0,0 +1,3 @@ +chart-dirs: + - charts +validate-maintainers: false \ No newline at end of file diff --git a/examples/chart-helm/lintconf.yaml b/examples/chart-helm/lintconf.yaml new file mode 100644 index 0000000000000..c8e8c5d7d9767 --- /dev/null +++ b/examples/chart-helm/lintconf.yaml @@ -0,0 +1,42 @@ +--- +rules: + braces: + min-spaces-inside: 0 + max-spaces-inside: 0 + min-spaces-inside-empty: -1 + max-spaces-inside-empty: -1 + brackets: + min-spaces-inside: 0 + max-spaces-inside: 0 + min-spaces-inside-empty: -1 + max-spaces-inside-empty: -1 + colons: + max-spaces-before: 0 + max-spaces-after: 1 + commas: + max-spaces-before: 0 + min-spaces-after: 1 + max-spaces-after: 1 + comments: + require-starting-space: true + min-spaces-from-content: 2 + document-end: disable + document-start: disable # No --- to start a file + empty-lines: + max: 2 + max-start: 0 + max-end: 0 + hyphens: + max-spaces-after: 1 + indentation: + spaces: consistent + indent-sequences: whatever # - list indentation will handle both indentation and without + check-multi-line-strings: false + key-duplicates: enable + line-length: disable # Lines can be any length + new-line-at-end-of-file: disable + new-lines: + type: unix + trailing-spaces: enable + truthy: + level: warning \ No newline at end of file diff --git a/examples/chart-helm/templates/_helpers.tpl b/examples/chart-helm/templates/_helpers.tpl new file mode 100644 index 0000000000000..a9690bad3c945 --- /dev/null +++ b/examples/chart-helm/templates/_helpers.tpl @@ -0,0 +1,164 @@ +{{/* +Define ports for the pods +*/}} +{{- define "chart.container-port" -}} +{{- default "8000" .Values.containerPort }} +{{- end }} + +{{/* +Define service name +*/}} +{{- define "chart.service-name" -}} +{{- if .Values.serviceName }} +{{- .Values.serviceName | lower | trim }} +{{- else }} +"{{ .Release.Name }}-service" +{{- end }} +{{- end }} + +{{/* +Define service port +*/}} +{{- define "chart.service-port" -}} +{{- if .Values.servicePort }} +{{- .Values.servicePort }} +{{- else }} +{{- include "chart.container-port" . }} +{{- end }} +{{- end }} + +{{/* +Define service port name +*/}} +{{- define "chart.service-port-name" -}} +"service-port" +{{- end }} + +{{/* +Define container port name +*/}} +{{- define "chart.container-port-name" -}} +"container-port" +{{- end }} + +{{/* +Define deployment strategy +*/}} +{{- define "chart.strategy" -}} +strategy: +{{- if not .Values.deploymentStrategy }} + rollingUpdate: + maxSurge: 100% + maxUnavailable: 0 +{{- else }} +{{ toYaml .Values.deploymentStrategy | indent 2 }} +{{- end }} +{{- end }} + +{{/* +Define additional ports +*/}} +{{- define "chart.extraPorts" }} +{{- with .Values.extraPorts }} +{{ toYaml . }} +{{- end }} +{{- end }} + +{{/* +Define chart external ConfigMaps and Secrets +*/}} +{{- define "chart.externalConfigs" -}} +{{- with .Values.externalConfigs -}} +{{ toYaml . }} +{{- end }} +{{- end }} + + +{{/* +Define liveness et readiness probes +*/}} +{{- define "chart.probes" -}} +{{- if .Values.readinessProbe }} +readinessProbe: +{{- with .Values.readinessProbe }} +{{- toYaml . | nindent 2 }} +{{- end }} +{{- end }} +{{- if .Values.livenessProbe }} +livenessProbe: +{{- with .Values.livenessProbe }} +{{- toYaml . | nindent 2 }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Define resources +*/}} +{{- define "chart.resources" -}} +requests: + memory: {{ required "Value 'resources.requests.memory' must be defined !" .Values.resources.requests.memory | quote }} + cpu: {{ required "Value 'resources.requests.cpu' must be defined !" .Values.resources.requests.cpu | quote }} + {{- if and (gt (int (index .Values.resources.requests "nvidia.com/gpu")) 0) (gt (int (index .Values.resources.limits "nvidia.com/gpu")) 0) }} + nvidia.com/gpu: {{ required "Value 'resources.requests.nvidia.com/gpu' must be defined !" (index .Values.resources.requests "nvidia.com/gpu") | quote }} + {{- end }} +limits: + memory: {{ required "Value 'resources.limits.memory' must be defined !" .Values.resources.limits.memory | quote }} + cpu: {{ required "Value 'resources.limits.cpu' must be defined !" .Values.resources.limits.cpu | quote }} + {{- if and (gt (int (index .Values.resources.requests "nvidia.com/gpu")) 0) (gt (int (index .Values.resources.limits "nvidia.com/gpu")) 0) }} + nvidia.com/gpu: {{ required "Value 'resources.limits.nvidia.com/gpu' must be defined !" (index .Values.resources.limits "nvidia.com/gpu") | quote }} + {{- end }} +{{- end }} + + +{{/* +Define User used for the main container +*/}} +{{- define "chart.user" }} +{{- if .Values.image.runAsUser }} +runAsUser: +{{- with .Values.runAsUser }} +{{- toYaml . | nindent 2 }} +{{- end }} +{{- end }} +{{- end }} + +{{- define "chart.extraInitImage" -}} +"amazon/aws-cli:2.6.4" +{{- end }} + +{{- define "chart.extraInitEnv" -}} +- name: S3_ENDPOINT_URL + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-secrets + key: s3endpoint +- name: S3_BUCKET_NAME + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-secrets + key: s3bucketname +- name: AWS_ACCESS_KEY_ID + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-secrets + key: s3accesskeyid +- name: AWS_SECRET_ACCESS_KEY + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-secrets + key: s3accesskey +- name: S3_PATH + value: "{{ .Values.extraInit.s3modelpath }}" +- name: AWS_EC2_METADATA_DISABLED + value: "{{ .Values.extraInit.awsEc2MetadataDisabled }}" +{{- end }} + +{{/* + Define chart labels +*/}} +{{- define "chart.labels" -}} +{{- with .Values.labels -}} +{{ toYaml . }} +{{- end }} +{{- end }} \ No newline at end of file diff --git a/examples/chart-helm/templates/configmap.yaml b/examples/chart-helm/templates/configmap.yaml new file mode 100644 index 0000000000000..cc5d03782f878 --- /dev/null +++ b/examples/chart-helm/templates/configmap.yaml @@ -0,0 +1,11 @@ +{{- if .Values.configs -}} +apiVersion: v1 +kind: ConfigMap +metadata: + name: "{{ .Release.Name }}-configs" + namespace: {{ .Release.Namespace }} +data: + {{- with .Values.configs }} + {{- toYaml . | nindent 2 }} + {{- end }} +{{- end -}} \ No newline at end of file diff --git a/examples/chart-helm/templates/custom-objects.yaml b/examples/chart-helm/templates/custom-objects.yaml new file mode 100644 index 0000000000000..8a65ffd0e552d --- /dev/null +++ b/examples/chart-helm/templates/custom-objects.yaml @@ -0,0 +1,6 @@ +{{- if .Values.customObjects }} +{{- range .Values.customObjects }} +{{- tpl (. | toYaml) $ }} +--- +{{- end }} +{{- end }} \ No newline at end of file diff --git a/examples/chart-helm/templates/deployment.yaml b/examples/chart-helm/templates/deployment.yaml new file mode 100644 index 0000000000000..536983b587be2 --- /dev/null +++ b/examples/chart-helm/templates/deployment.yaml @@ -0,0 +1,122 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: "{{ .Release.Name }}-deployment-vllm" + namespace: {{ .Release.Namespace }} + labels: + {{- include "chart.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.replicaCount }} + {{- include "chart.strategy" . | nindent 2 }} + selector: + matchLabels: + environment: "test" + release: "test" + progressDeadlineSeconds: 1200 + template: + metadata: + labels: + environment: "test" + release: "test" + spec: + containers: + - name: "vllm" + image: "{{ required "Required value 'image.repository' must be defined !" .Values.image.repository }}:{{ required "Required value 'image.tag' must be defined !" .Values.image.tag }}" + {{- if .Values.image.command }} + command : + {{- with .Values.image.command }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- end }} + securityContext: + {{- if .Values.image.securityContext }} + {{- with .Values.image.securityContext }} + {{- toYaml . | nindent 12 }} + {{- end }} + {{- else }} + runAsNonRoot: false + {{- include "chart.user" . | indent 12 }} + {{- end }} + imagePullPolicy: IfNotPresent + {{- if .Values.image.env }} + env : + {{- with .Values.image.env }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- else }} + env: [] + {{- end }} + {{- if or .Values.externalConfigs .Values.configs .Values.secrets }} + envFrom: + {{- if .Values.configs }} + - configMapRef: + name: "{{ .Release.Name }}-configs" + {{- end }} + {{- if .Values.secrets}} + - secretRef: + name: "{{ .Release.Name }}-secrets" + {{- end }} + {{- include "chart.externalConfigs" . | nindent 12 }} + {{- end }} + ports: + - name: {{ include "chart.container-port-name" . }} + containerPort: {{ include "chart.container-port" . }} + {{- include "chart.extraPorts" . | nindent 12 }} + {{- include "chart.probes" . | indent 10 }} + resources: {{- include "chart.resources" . | nindent 12 }} + volumeMounts: + - name: {{ .Release.Name }}-storage + mountPath: /data + + {{- with .Values.extraContainers }} + {{ toYaml . | nindent 8 }} + {{- end }} + + {{- if .Values.extraInit }} + initContainers: + - name: wait-download-model + image: {{ include "chart.extraInitImage" . }} + command: + - /bin/bash + args: + - -eucx + - while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done + env: {{- include "chart.extraInitEnv" . | nindent 10 }} + resources: + requests: + cpu: 200m + memory: 1Gi + limits: + cpu: 500m + memory: 2Gi + volumeMounts: + - name: {{ .Release.Name }}-storage + mountPath: /data + {{- end }} + volumes: + - name: {{ .Release.Name }}-storage + persistentVolumeClaim: + claimName: {{ .Release.Name }}-storage-claim + + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- if and (gt (int (index .Values.resources.requests "nvidia.com/gpu")) 0) (gt (int (index .Values.resources.limits "nvidia.com/gpu")) 0) }} + runtimeClassName: nvidia + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: nvidia.com/gpu.product + operator: In + {{- with .Values.gpuModels }} + values: + {{- toYaml . | nindent 20 }} + {{- end }} + {{- end }} \ No newline at end of file diff --git a/examples/chart-helm/templates/hpa.yaml b/examples/chart-helm/templates/hpa.yaml new file mode 100644 index 0000000000000..5ca94c8213541 --- /dev/null +++ b/examples/chart-helm/templates/hpa.yaml @@ -0,0 +1,31 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: "{{ .Release.Name }}-hpa" + namespace: {{ .Release.Namespace }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: vllm + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/examples/chart-helm/templates/job.yaml b/examples/chart-helm/templates/job.yaml new file mode 100644 index 0000000000000..f9ea3541e78d2 --- /dev/null +++ b/examples/chart-helm/templates/job.yaml @@ -0,0 +1,37 @@ +{{- if .Values.extraInit }} +apiVersion: batch/v1 +kind: Job +metadata: + name: "{{ .Release.Name }}-init-vllm" + namespace: {{ .Release.Namespace }} +spec: + ttlSecondsAfterFinished: 100 + template: + metadata: + name: init-vllm + spec: + containers: + - name: job-download-model + image: {{ include "chart.extraInitImage" . }} + command: + - /bin/bash + args: + - -eucx + - aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data + env: {{- include "chart.extraInitEnv" . | nindent 8 }} + volumeMounts: + - name: {{ .Release.Name }}-storage + mountPath: /data + resources: + requests: + cpu: 200m + memory: 1Gi + limits: + cpu: 500m + memory: 2Gi + restartPolicy: OnFailure + volumes: + - name: {{ .Release.Name }}-storage + persistentVolumeClaim: + claimName: "{{ .Release.Name }}-storage-claim" +{{- end }} \ No newline at end of file diff --git a/examples/chart-helm/templates/poddisruptionbudget.yaml b/examples/chart-helm/templates/poddisruptionbudget.yaml new file mode 100644 index 0000000000000..512bac727da87 --- /dev/null +++ b/examples/chart-helm/templates/poddisruptionbudget.yaml @@ -0,0 +1,7 @@ +apiVersion: policy/v1 +kind: PodDisruptionBudget +metadata: + name: "{{ .Release.Name }}-pdb" + namespace: {{ .Release.Namespace }} +spec: + maxUnavailable: {{ default 1 .Values.maxUnavailablePodDisruptionBudget }} \ No newline at end of file diff --git a/examples/chart-helm/templates/pvc.yaml b/examples/chart-helm/templates/pvc.yaml new file mode 100644 index 0000000000000..e8d203a7a5ace --- /dev/null +++ b/examples/chart-helm/templates/pvc.yaml @@ -0,0 +1,13 @@ +{{- if .Values.extraInit }} +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: "{{ .Release.Name }}-storage-claim" + namespace: {{ .Release.Namespace }} +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{ .Values.extraInit.pvcStorage }} +{{- end }} \ No newline at end of file diff --git a/examples/chart-helm/templates/secrets.yaml b/examples/chart-helm/templates/secrets.yaml new file mode 100644 index 0000000000000..4e88e747b616a --- /dev/null +++ b/examples/chart-helm/templates/secrets.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +kind: Secret +metadata: + name: "{{ .Release.Name }}-secrets" + namespace: {{ .Release.Namespace }} +type: Opaque +data: + {{- range $key, $val := .Values.secrets }} + {{ $key }}: {{ $val | b64enc | quote }} + {{- end }} \ No newline at end of file diff --git a/examples/chart-helm/templates/service.yaml b/examples/chart-helm/templates/service.yaml new file mode 100644 index 0000000000000..fe3838c0a8214 --- /dev/null +++ b/examples/chart-helm/templates/service.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: "{{ .Release.Name }}-service" + namespace: {{ .Release.Namespace }} +spec: + type: ClusterIP + ports: + - name: {{ include "chart.service-port-name" . }} + port: {{ include "chart.service-port" . }} + targetPort: {{ include "chart.container-port-name" . }} + protocol: TCP + selector: + environment: test + release: test \ No newline at end of file diff --git a/examples/chart-helm/values.schema.json b/examples/chart-helm/values.schema.json new file mode 100644 index 0000000000000..812d54bde1397 --- /dev/null +++ b/examples/chart-helm/values.schema.json @@ -0,0 +1,265 @@ +{ + "$schema": "http://json-schema.org/schema#", + "type": "object", + "properties": { + "image": { + "type": "object", + "properties": { + "repository": { + "type": "string" + }, + "tag": { + "type": "string" + }, + "command": { + "type": "array", + "items": { + "type": "string" + } + } + }, + "required": [ + "command", + "repository", + "tag" + ] + }, + "containerPort": { + "type": "integer" + }, + "serviceName": { + "type": "null" + }, + "servicePort": { + "type": "integer" + }, + "extraPorts": { + "type": "array" + }, + "replicaCount": { + "type": "integer" + }, + "deploymentStrategy": { + "type": "object" + }, + "resources": { + "type": "object", + "properties": { + "requests": { + "type": "object", + "properties": { + "cpu": { + "type": "integer" + }, + "memory": { + "type": "string" + }, + "nvidia.com/gpu": { + "type": "integer" + } + }, + "required": [ + "cpu", + "memory", + "nvidia.com/gpu" + ] + }, + "limits": { + "type": "object", + "properties": { + "cpu": { + "type": "integer" + }, + "memory": { + "type": "string" + }, + "nvidia.com/gpu": { + "type": "integer" + } + }, + "required": [ + "cpu", + "memory", + "nvidia.com/gpu" + ] + } + }, + "required": [ + "limits", + "requests" + ] + }, + "gpuModels": { + "type": "array", + "items": { + "type": "string" + } + }, + "autoscaling": { + "type": "object", + "properties": { + "enabled": { + "type": "boolean" + }, + "minReplicas": { + "type": "integer" + }, + "maxReplicas": { + "type": "integer" + }, + "targetCPUUtilizationPercentage": { + "type": "integer" + } + }, + "required": [ + "enabled", + "maxReplicas", + "minReplicas", + "targetCPUUtilizationPercentage" + ] + }, + "configs": { + "type": "object" + }, + "secrets": { + "type": "object" + }, + "externalConfigs": { + "type": "array" + }, + "customObjects": { + "type": "array" + }, + "maxUnavailablePodDisruptionBudget": { + "type": "string" + }, + "extraInit": { + "type": "object", + "properties": { + "s3modelpath": { + "type": "string" + }, + "pvcStorage": { + "type": "string" + }, + "awsEc2MetadataDisabled": { + "type": "boolean" + } + }, + "required": [ + "pvcStorage", + "s3modelpath", + "awsEc2MetadataDisabled" + ] + }, + "extraContainers": { + "type": "array" + }, + "readinessProbe": { + "type": "object", + "properties": { + "initialDelaySeconds": { + "type": "integer" + }, + "periodSeconds": { + "type": "integer" + }, + "failureThreshold": { + "type": "integer" + }, + "httpGet": { + "type": "object", + "properties": { + "path": { + "type": "string" + }, + "port": { + "type": "integer" + } + }, + "required": [ + "path", + "port" + ] + } + }, + "required": [ + "failureThreshold", + "httpGet", + "initialDelaySeconds", + "periodSeconds" + ] + }, + "livenessProbe": { + "type": "object", + "properties": { + "initialDelaySeconds": { + "type": "integer" + }, + "failureThreshold": { + "type": "integer" + }, + "periodSeconds": { + "type": "integer" + }, + "httpGet": { + "type": "object", + "properties": { + "path": { + "type": "string" + }, + "port": { + "type": "integer" + } + }, + "required": [ + "path", + "port" + ] + } + }, + "required": [ + "failureThreshold", + "httpGet", + "initialDelaySeconds", + "periodSeconds" + ] + }, + "labels": { + "type": "object", + "properties": { + "environment": { + "type": "string" + }, + "release": { + "type": "string" + } + }, + "required": [ + "environment", + "release" + ] + } + }, + "required": [ + "autoscaling", + "configs", + "containerPort", + "customObjects", + "deploymentStrategy", + "externalConfigs", + "extraContainers", + "extraInit", + "extraPorts", + "gpuModels", + "image", + "labels", + "livenessProbe", + "maxUnavailablePodDisruptionBudget", + "readinessProbe", + "replicaCount", + "resources", + "secrets", + "servicePort" + ] +} \ No newline at end of file diff --git a/examples/chart-helm/values.yaml b/examples/chart-helm/values.yaml new file mode 100644 index 0000000000000..cf6dd15116e9c --- /dev/null +++ b/examples/chart-helm/values.yaml @@ -0,0 +1,119 @@ +# -- Default values for chart vllm +# -- Declare variables to be passed into your templates. + +# -- Image configuration +image: + # -- Image repository + repository: "vllm/vllm-openai" + # -- Image tag + tag: "latest" + # -- Container launch command + command: ["vllm", "serve", "/data/", "--served-model-name", "opt-125m", "--host", "0.0.0.0", "--port", "8000"] + +# -- Container port +containerPort: 8000 +# -- Service name +serviceName: +# -- Service port +servicePort: 80 +# -- Additional ports configuration +extraPorts: [] + +# -- Number of replicas +replicaCount: 1 + +# -- Deployment strategy configuration +deploymentStrategy: {} + +# -- Resource configuration +resources: + requests: + # -- Number of CPUs + cpu: 4 + # -- CPU memory configuration + memory: 16Gi + # -- Number of gpus used + nvidia.com/gpu: 1 + limits: + # -- Number of CPUs + cpu: 4 + # -- CPU memory configuration + memory: 16Gi + # -- Number of gpus used + nvidia.com/gpu: 1 + +# -- Type of gpu used +gpuModels: + - "TYPE_GPU_USED" + +# -- Autoscaling configuration +autoscaling: + # -- Enable autoscaling + enabled: false + # -- Minimum replicas + minReplicas: 1 + # -- Maximum replicas + maxReplicas: 100 + # -- Target CPU utilization for autoscaling + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# -- Configmap +configs: {} + +# -- Secrets configuration +secrets: {} + +# -- External configuration +externalConfigs: [] + +# -- Custom Objects configuration +customObjects: [] + +# -- Disruption Budget Configuration +maxUnavailablePodDisruptionBudget: "" + +# -- Additional configuration for the init container +extraInit: + # -- Path of the model on the s3 which hosts model weights and config files + s3modelpath: "relative_s3_model_path/opt-125m" + # -- Storage size of the s3 + pvcStorage: "1Gi" + awsEc2MetadataDisabled: true + +# -- Additional containers configuration +extraContainers: [] + +# -- Readiness probe configuration +readinessProbe: + # -- Number of seconds after the container has started before readiness probe is initiated + initialDelaySeconds: 5 + # -- How often (in seconds) to perform the readiness probe + periodSeconds: 5 + # -- Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not ready + failureThreshold: 3 + # -- Configuration of the Kubelet http request on the server + httpGet: + # -- Path to access on the HTTP server + path: /health + # -- Name or number of the port to access on the container, on which the server is listening + port: 8000 + +# -- Liveness probe configuration +livenessProbe: + # -- Number of seconds after the container has started before liveness probe is initiated + initialDelaySeconds: 15 + # -- Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not alive + failureThreshold: 3 + # -- How often (in seconds) to perform the liveness probe + periodSeconds: 10 + # -- Configuration of the Kubelet http request on the server + httpGet: + # -- Path to access on the HTTP server + path: /health + # -- Name or number of the port to access on the container, on which the server is listening + port: 8000 + +labels: + environment: "test" + release: "test"