Add example of helm chart for vllm deployment on k8s #4
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Lint and Test Charts | |
on: pull_request | |
jobs: | |
lint-test: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Set up Helm | |
uses: azure/[email protected] | |
with: | |
version: v3.14.4 | |
#Python is required because ct lint runs Yamale and yamllint which require Python. | |
- uses: actions/setup-python@v5 | |
with: | |
python-version: '3.x' | |
check-latest: true | |
- name: Set up chart-testing | |
uses: helm/[email protected] | |
- name: Run chart-testing (lint) | |
run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm | |
- name: Setup minio | |
run: | | |
docker network create vllm-net | |
docker run -d -p 9000:9000 --name minio --net vllm-net \ | |
-e "MINIO_ACCESS_KEY=minioadmin" \ | |
-e "MINIO_SECRET_KEY=minioadmin" \ | |
-v /tmp/data:/data \ | |
-v /tmp/config:/root/.minio \ | |
minio/minio server /data | |
export AWS_ACCESS_KEY_ID=minioadmin | |
export AWS_SECRET_ACCESS_KEY=minioadmin | |
export AWS_EC2_METADATA_DISABLED=true | |
mkdir opt-125m | |
cd opt-125m && curl -O -Ls "https://huggingface.co/facebook/opt-125m/resolve/main/{pytorch_model.bin,config.json,generation_config.json,merge.txt,tokenizer_config.json,vocab.json}" && cd .. | |
aws --endpoint-url http://127.0.0.1:9000/ s3 mb s3://testbucket | |
aws --endpoint-url http://127.0.0.1:9000/ s3 cp opt-125m/ s3://testbucket/opt-125m --recursive | |
- name: Create kind cluster | |
uses: helm/[email protected] | |
- name: Configuration of docker images on the kind cluster and cluster network | |
run: | | |
docker pull adsai/vllm-cpu-env:latest | |
kind load docker-image amazon/aws-cli:2.6.4 --name kind | |
kind load docker-image adsai/vllm-cpu-env:latest --name kind | |
docker network connect test-net "$(docker ps -aqf "name=kind-control-plane")" | |
- name: Run chart-testing (install) | |
run: ct install --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm --namespace=ns-vllm --helm-extra-set-args "--set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_ACCESS_ACCESS_KEY --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set-string image.env[0].value="1" --set-string resources.requests.cpu="1" --set resources.requests.memory=4Gi --set-string resources.requests."nvidia/gpu"="0" --set-string resources.limits.cpu="2" --set resources.limits.memory=5Gi --set-string resources.limits."nvidia/gpu"="0""" | |
- name: curl test | |
run: | | |
kubectl port-forward svc/test-vllm-service 8001:80 -n test-vllm | |
CODE=`curl --location http://localhost:8001/v1/completions \ | |
--header "Content-Type: application/json" \ | |
--data '{ | |
"model": "opt-125m", | |
"prompt": "San Francisco is a", | |
"max_tokens": 7, | |
"temperature": 0 | |
}'` | |
if [ $CODE!="200" ] | |
then | |
echo "FAILURE" | |
else | |
echo "SUCCESS" | |
fi |