Skip to content

Add example of helm chart for vllm deployment on k8s #20

Add example of helm chart for vllm deployment on k8s

Add example of helm chart for vllm deployment on k8s #20

Workflow file for this run

name: Lint and Test Charts
on: pull_request
jobs:
lint-test:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Helm
uses: azure/[email protected]
with:
version: v3.14.4
#Python is required because ct lint runs Yamale and yamllint which require Python.
- uses: actions/setup-python@v2
with:
python-version: 3.7
- name: Set up chart-testing
uses: helm/[email protected]
with:
version: v3.10.1
- name: Run chart-testing (lint)
run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm
- name: Setup minio
run: |
docker network create vllm-net
docker run -d -p 9000:9000 --name minio --net vllm-net \
-e "MINIO_ACCESS_KEY=minioadmin" \
-e "MINIO_SECRET_KEY=minioadmin" \
-v /tmp/data:/data \
-v /tmp/config:/root/.minio \
minio/minio server /data
export AWS_ACCESS_KEY_ID=minioadmin
export AWS_SECRET_ACCESS_KEY=minioadmin
export AWS_EC2_METADATA_DISABLED=true
mkdir opt-125m
cd opt-125m && curl -O -Ls "https://huggingface.co/facebook/opt-125m/resolve/main/{pytorch_model.bin,config.json,generation_config.json,merge.txt,tokenizer_config.json,vocab.json}" && cd ..
aws --endpoint-url http://127.0.0.1:9000/ s3 mb s3://testbucket
aws --endpoint-url http://127.0.0.1:9000/ s3 cp opt-125m/ s3://testbucket/opt-125m --recursive
- name: Create kind cluster
uses: helm/[email protected]
- name: Configuration of docker images, network and namespace for the kind cluster
run: |
docker pull adsai/vllm-cpu-env:latest
docker pull amazon/aws-cli:2.6.4
kind load docker-image amazon/aws-cli:2.6.4 --name chart-testing
kind load docker-image adsai/vllm-cpu-env:latest --name chart-testing
docker network create test-net
docker network connect test-net "$(docker ps -aqf "name=chart-testing-control-plane")"
kubectl create ns ns-vllm
- name: Run chart-testing (install)
run: ct install --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm --namespace=ns-vllm --helm-extra-set-args "--set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_ACCESS_ACCESS_KEY --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set-string image.env[0].value="1" --set-string resources.requests.cpu="1" --set resources.requests.memory=4Gi --set-string resources.requests."nvidia/gpu"="0" --set-string resources.limits.cpu="2" --set resources.limits.memory=5Gi --set-string resources.limits."nvidia/gpu"="0""
- name: curl test
run: |
kubectl port-forward svc/test-vllm-service 8001:80 -n ns-vllm
CODE=`curl --location http://localhost:8001/v1/completions \
--header "Content-Type: application/json" \
--data '{
"model": "opt-125m",
"prompt": "San Francisco is a",
"max_tokens": 7,
"temperature": 0
}'`
if [ $CODE!="200" ]
then
echo "FAILURE"
else
echo "SUCCESS"
fi