Add example of helm chart for vllm deployment on k8s #152
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Lint and Test Charts | |
on: pull_request | |
jobs: | |
lint-test: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Set up Helm | |
uses: azure/[email protected] | |
with: | |
version: v3.14.4 | |
#Python is required because ct lint runs Yamale and yamllint which require Python. | |
- uses: actions/setup-python@v3 | |
with: | |
python-version: 3.7 | |
- name: Set up chart-testing | |
uses: helm/[email protected] | |
with: | |
version: v3.10.1 | |
- name: Run chart-testing (lint) | |
run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm | |
- name: Setup minio | |
run: | | |
docker network create vllm-net | |
docker run -d -p 9000:9000 --name minio --net vllm-net \ | |
-e "MINIO_ACCESS_KEY=minioadmin" \ | |
-e "MINIO_SECRET_KEY=minioadmin" \ | |
-v /tmp/data:/data \ | |
-v /tmp/config:/root/.minio \ | |
minio/minio server /data | |
export AWS_ACCESS_KEY_ID=minioadmin | |
export AWS_SECRET_ACCESS_KEY=minioadmin | |
export AWS_EC2_METADATA_DISABLED=true | |
mkdir opt-125m | |
cd opt-125m && curl -O -Ls "https://huggingface.co/facebook/opt-125m/resolve/main/{pytorch_model.bin,config.json,generation_config.json,merges.txt,special_tokens_map.json,tokenizer_config.json,vocab.json}" && cd .. | |
aws --endpoint-url http://127.0.0.1:9000/ s3 mb s3://testbucket | |
aws --endpoint-url http://127.0.0.1:9000/ s3 cp opt-125m/ s3://testbucket/opt-125m --recursive | |
- name: Create kind cluster | |
uses: helm/[email protected] | |
- name: Build the Docker image vllm cpu | |
run: docker buildx build --file Dockerfile.cpu --tag vllm-cpu-env --shm-size=4g . | |
- name: Configuration of docker images, network and namespace for the kind cluster | |
run: | | |
docker pull amazon/aws-cli:2.6.4 | |
kind load docker-image amazon/aws-cli:2.6.4 --name chart-testing | |
kind load docker-image vllm-cpu-env:latest --name chart-testing | |
docker network connect vllm-net "$(docker ps -aqf "name=chart-testing-control-plane")" | |
kubectl create ns ns-vllm | |
- name: Run chart-testing (install) | |
run: | | |
export AWS_ACCESS_KEY_ID=minioadmin | |
export AWS_SECRET_ACCESS_KEY=minioadmin | |
helm install --wait --wait-for-jobs --timeout 5m0s --debug --create-namespace --namespace=ns-vllm test-vllm examples/chart-helm -f examples/chart-helm/values.yaml --set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_SECRET_ACCESS_KEY --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set resources.requests.cpu=1 --set resources.requests.memory=4Gi --set resources.limits.cpu=2 --set resources.limits.memory=5Gi --set-string image.env[0].value="1" --set-string extraInit.s3modelpath="opt-125m/" --set-string 'resources.limits.nvidia\.com/gpu=0' --set-string 'resources.requests.nvidia\.com/gpu=0' --set-string image.repository="vllm-cpu-env" | |
- name: curl test | |
run: | | |
kubectl -n ns-vllm port-forward service/test-vllm-service 8001:80 & | |
sleep 10 | |
curl -f --location http://localhost:8001/v1/completions \ | |
--header "Content-Type: application/json" \ | |
--data '{ | |
"model": "opt-125m", | |
"prompt": "San Francisco is a", | |
"max_tokens": 7, | |
"temperature": 0 | |
}' | |
CODE="$(curl -v -f --location http://localhost:8001/v1/completions \ | |
--header "Content-Type: application/json" \ | |
--data '{ | |
"model": "opt-125m", | |
"prompt": "San Francisco is a", | |
"max_tokens": 7, | |
"temperature": 0 | |
}'):$CODE" | |
echo "$CODE" |