Add example of helm chart for vllm deployment on k8s #70
Workflow file for this run
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Lint and Test Charts | |
on: pull_request | |
jobs: | |
lint-test: | |
runs-on: ubuntu-latest | |
steps: | |
- name: Checkout | |
uses: actions/checkout@v3 | |
with: | |
fetch-depth: 0 | |
- name: Set up Helm | |
uses: azure/[email protected] | |
with: | |
version: v3.14.4 | |
#Python is required because ct lint runs Yamale and yamllint which require Python. | |
- uses: actions/setup-python@v2 | |
with: | |
python-version: 3.7 | |
- name: Set up chart-testing | |
uses: helm/[email protected] | |
with: | |
version: v3.10.1 | |
- name: Run chart-testing (lint) | |
run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm | |
- name: Setup minio | |
run: | | |
docker network create vllm-net | |
docker run -d -p 9000:9000 --name minio --net vllm-net \ | |
-e "MINIO_ACCESS_KEY=minioadmin" \ | |
-e "MINIO_SECRET_KEY=minioadmin" \ | |
-v /tmp/data:/data \ | |
-v /tmp/config:/root/.minio \ | |
minio/minio server /data | |
export AWS_ACCESS_KEY_ID=minioadmin | |
export AWS_SECRET_ACCESS_KEY=minioadmin | |
export AWS_EC2_METADATA_DISABLED=true | |
mkdir opt-125m | |
cd opt-125m && curl -O -Ls "https://huggingface.co/facebook/opt-125m/resolve/main/{pytorch_model.bin,config.json,generation_config.json,merge.txt,tokenizer_config.json,vocab.json}" && cd .. | |
aws --endpoint-url http://127.0.0.1:9000/ s3 mb s3://testbucket | |
aws --endpoint-url http://127.0.0.1:9000/ s3 cp opt-125m/ s3://testbucket/opt-125m --recursive | |
- name: Create kind cluster | |
uses: helm/[email protected] | |
- name: Configuration of docker images, network and namespace for the kind cluster | |
run: | | |
docker pull adsai/vllm-cpu-env:latest | |
docker pull amazon/aws-cli:2.6.4 | |
kind load docker-image amazon/aws-cli:2.6.4 --name chart-testing | |
kind load docker-image adsai/vllm-cpu-env:latest --name chart-testing | |
docker network connect vllm-net "$(docker ps -aqf "name=chart-testing-control-plane")" | |
kubectl create ns ns-vllm | |
- name: Run chart-testing (install) | |
run: helm install --wait --wait-for-jobs --timeout 5m0s --debug --create-namespace --namespace=ns-vllm test-vllm examples/chart-helm -f examples/chart-helm/values.yaml --set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_SECRET_ACCESS_KEY --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set resources.requests.cpu=1 --set resources.requests.memory=4Gi --set resources.limits.cpu=2 --set resources.limits.memory=5Gi --set-string image.env[0].value="1" --set-string extraInit.s3modelpath="opt-125m/" --set-string 'resources.limits.nvidia\.com/gpu=0' --set-string 'resources.requests.nvidia\.com/gpu=0' --set-string image.repository="adsai/vllm-cpu-env" | |
- name: curl test | |
run: | | |
kubectl -n ns-vllm get svc | |
kubectl -n ns-vllm get pods | |
kubectl port-forward svc/test-vllm-service 8001:80 -n ns-vllm | |
CODE=`curl --location http://localhost:8001/v1/completions \ | |
--header "Content-Type: application/json" \ | |
--data '{ | |
"model": "opt-125m", | |
"prompt": "San Francisco is a", | |
"max_tokens": 7, | |
"temperature": 0 | |
}'` | |
if [ $CODE!="200" ] | |
then | |
echo "FAILURE" | |
else | |
echo "SUCCESS" | |
fi |