Add example of helm chart for vllm deployment on k8s #160

Summary
Jobs
- lint-test
Run details
- Usage
- Workflow file

Workflow file for this run

.github/workflows/lint-test.yaml at 0f48176

	name: Lint and Test Charts

	on: pull_request

	jobs:
	lint-test:
	runs-on: ubuntu-latest
	steps:
	- name: Checkout
	uses: actions/checkout@v3
	with:
	fetch-depth: 0

	- name: Set up Helm
	uses: azure/[email protected]
	with:
	version: v3.14.4

	#Python is required because ct lint runs Yamale and yamllint which require Python.
	- uses: actions/setup-python@v3
	with:
	python-version: 3.7

	- name: Set up chart-testing
	uses: helm/[email protected]
	with:
	version: v3.10.1

	- name: Run chart-testing (lint)
	run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm

	- name: Setup minio
	run: \|
	docker network create vllm-net
	docker run -d -p 9000:9000 --name minio --net vllm-net \
	-e "MINIO_ACCESS_KEY=minioadmin" \
	-e "MINIO_SECRET_KEY=minioadmin" \
	-v /tmp/data:/data \
	-v /tmp/config:/root/.minio \
	minio/minio server /data
	export AWS_ACCESS_KEY_ID=minioadmin
	export AWS_SECRET_ACCESS_KEY=minioadmin
	export AWS_EC2_METADATA_DISABLED=true
	mkdir opt-125m
	cd opt-125m && curl -O -Ls "https://huggingface.co/facebook/opt-125m/resolve/main/{pytorch_model.bin,config.json,generation_config.json,merges.txt,special_tokens_map.json,tokenizer_config.json,vocab.json}" && cd ..
	aws --endpoint-url http://127.0.0.1:9000/ s3 mb s3://testbucket
	aws --endpoint-url http://127.0.0.1:9000/ s3 cp opt-125m/ s3://testbucket/opt-125m --recursive

	- name: Create kind cluster
	uses: helm/[email protected]

	- name: Build the Docker image vllm cpu
	run: docker buildx build --file Dockerfile.cpu --tag vllm-cpu-env --shm-size=4g .

	- name: Configuration of docker images, network and namespace for the kind cluster
	run: \|
	docker pull amazon/aws-cli:2.6.4
	kind load docker-image amazon/aws-cli:2.6.4 --name chart-testing
	kind load docker-image vllm-cpu-env:latest --name chart-testing
	docker network connect vllm-net "$(docker ps -aqf "name=chart-testing-control-plane")"
	kubectl create ns ns-vllm

	- name: Run chart-testing (install)
	run: \|
	export AWS_ACCESS_KEY_ID=minioadmin
	export AWS_SECRET_ACCESS_KEY=minioadmin
	helm install --wait --wait-for-jobs --timeout 10m0s --debug --create-namespace --namespace=ns-vllm test-vllm examples/chart-helm -f examples/chart-helm/values.yaml --set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_SECRET_ACCESS_KEY --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set resources.requests.cpu=1 --set resources.requests.memory=4Gi --set resources.limits.cpu=2 --set resources.limits.memory=5Gi --set-string image.env[0].value="1" --set-string extraInit.s3modelpath="opt-125m/" --set-string 'resources.limits.nvidia\.com/gpu=0' --set-string 'resources.requests.nvidia\.com/gpu=0' --set-string image.repository="vllm-cpu-env" & (sleep 5; kubectl -n ns-vllm logs -f $(kubectl -n ns-vllm get pods -o name \| awk '/init/ {print $1;exit}'))

	- name: curl test
	run: \|
	kubectl -n ns-vllm get pods
	kubectl -n ns-vllm port-forward service/test-vllm-service 8001:80 &
	sleep 10
	curl -f --location http://localhost:8001/v1/completions \
	--header "Content-Type: application/json" \
	--data '{
	"model": "opt-125m",
	"prompt": "San Francisco is a",
	"max_tokens": 7,
	"temperature": 0
	}'
	CODE="$(curl -v -f --location http://localhost:8001/v1/completions \
	--header "Content-Type: application/json" \
	--data '{
	"model": "opt-125m",
	"prompt": "San Francisco is a",
	"max_tokens": 7,
	"temperature": 0
	}'):$CODE"
	echo "$CODE"

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add example of helm chart for vllm deployment on k8s #160

Workflow file

Add example of helm chart for vllm deployment on k8s #160

Jobs

Run details

Workflow file for this run