diff --git a/.github/workflows/lint-test.yaml b/.github/workflows/lint-test.yaml
new file mode 100644
index 0000000000000..a6a295198c48d
--- /dev/null
+++ b/.github/workflows/lint-test.yaml
@@ -0,0 +1,87 @@
+name: Lint and Test Charts
+
+on: pull_request
+
+jobs:
+  lint-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Set up Helm
+        uses: azure/setup-helm@v4.2.0
+        with:
+          version: v3.14.4
+
+       #Python is required because ct lint runs Yamale and yamllint which require Python.
+      - uses: actions/setup-python@v3
+        with:
+          python-version: 3.7
+
+      - name: Set up chart-testing
+        uses: helm/chart-testing-action@v2.6.1
+        with:
+          version: v3.10.1
+
+      - name: Run chart-testing (lint)
+        run: ct lint --target-branch ${{ github.event.repository.default_branch }} --chart-dirs examples/chart-helm --charts examples/chart-helm
+
+      - name: Setup minio
+        run: |
+          docker network create vllm-net
+          docker run -d -p 9000:9000 --name minio --net vllm-net \
+                     -e "MINIO_ACCESS_KEY=minioadmin" \
+                     -e "MINIO_SECRET_KEY=minioadmin" \
+                     -v /tmp/data:/data \
+                     -v /tmp/config:/root/.minio \
+                     minio/minio server /data
+          export AWS_ACCESS_KEY_ID=minioadmin
+          export AWS_SECRET_ACCESS_KEY=minioadmin
+          export AWS_EC2_METADATA_DISABLED=true
+          mkdir opt-125m
+          cd opt-125m && curl -O -Ls "https://huggingface.co/facebook/opt-125m/resolve/main/{pytorch_model.bin,config.json,generation_config.json,merges.txt,special_tokens_map.json,tokenizer_config.json,vocab.json}" && cd ..
+          aws --endpoint-url http://127.0.0.1:9000/ s3 mb s3://testbucket
+          aws --endpoint-url http://127.0.0.1:9000/ s3 cp opt-125m/ s3://testbucket/opt-125m --recursive
+
+      - name: Create kind cluster
+        uses: helm/kind-action@v1.10.0
+
+      - name: Configuration of docker images, network and namespace for the kind cluster
+        run: |
+          docker pull adsai/vllm-cpu-env:latest
+          docker pull amazon/aws-cli:2.6.4
+          kind load docker-image  amazon/aws-cli:2.6.4 --name chart-testing
+          kind load docker-image adsai/vllm-cpu-env:latest --name chart-testing
+          docker network connect vllm-net "$(docker ps -aqf "name=chart-testing-control-plane")"
+          kubectl create ns ns-vllm
+
+      - name: Run chart-testing (install)
+        run: |
+          export AWS_ACCESS_KEY_ID=minioadmin
+          export AWS_SECRET_ACCESS_KEY=minioadmin
+          helm install --wait --wait-for-jobs --timeout 5m0s --debug --create-namespace --namespace=ns-vllm test-vllm examples/chart-helm -f examples/chart-helm/values.yaml --set secrets.s3endpoint=http://minio:9000 --set secrets.s3bucketname=testbucket --set secrets.s3accesskeyid=$AWS_ACCESS_KEY_ID --set secrets.s3accesskey=$AWS_SECRET_ACCESS_KEY --set image.env[0].name=VLLM_CPU_KVCACHE_SPACE --set resources.requests.cpu=1 --set resources.requests.memory=4Gi --set resources.limits.cpu=2 --set resources.limits.memory=5Gi --set-string image.env[0].value="1" --set-string extraInit.s3modelpath="opt-125m/" --set-string 'resources.limits.nvidia\.com/gpu=0' --set-string 'resources.requests.nvidia\.com/gpu=0' --set-string image.repository="adsai/vllm-cpu-env"
+    
+      - name: curl test
+        run: |
+          kubectl -n ns-vllm port-forward service/test-vllm-service 8001:80 &
+          sleep 10
+          curl -f --location http://localhost:8001/v1/completions \
+                  --header "Content-Type: application/json" \
+                  --data '{
+                          "model": "opt-125m",
+                          "prompt": "San Francisco is a",
+                          "max_tokens": 7,
+                          "temperature": 0
+                  }'
+          CODE="$(curl -v -f --location http://localhost:8001/v1/completions \
+                  --header "Content-Type: application/json" \
+                  --data '{
+                          "model": "opt-125m",
+                          "prompt": "San Francisco is a",
+                          "max_tokens": 7,
+                          "temperature": 0
+                  }'):$CODE"
+          echo "$CODE"
\ No newline at end of file
diff --git a/examples/chart-helm/.helmignore b/examples/chart-helm/.helmignore
new file mode 100644
index 0000000000000..2d1303b784cb8
--- /dev/null
+++ b/examples/chart-helm/.helmignore
@@ -0,0 +1,6 @@
+*.png
+.git/
+ct.yaml
+lintconf.yaml
+values.schema.json
+/workflows
\ No newline at end of file
diff --git a/examples/chart-helm/Chart.yaml b/examples/chart-helm/Chart.yaml
new file mode 100644
index 0000000000000..fb0f06f6d2701
--- /dev/null
+++ b/examples/chart-helm/Chart.yaml
@@ -0,0 +1,21 @@
+apiVersion: v2
+name: chart-vllm
+description: Chart vllm
+
+# A chart can be either an 'application' or a 'library' chart.
+#
+# Application charts are a collection of templates that can be packaged into versioned archives
+# to be deployed.
+#
+# Library charts provide useful utilities or functions for the chart developer. They're included as
+# a dependency of application charts to inject those utilities and functions into the rendering
+# pipeline. Library charts do not define any templates and therefore cannot be deployed.
+type: application
+
+# This is the chart version. This version number should be incremented each time you make changes
+# to the chart and its templates, including the app version.
+# Versions are expected to follow Semantic Versioning (https://semver.org/)
+version: 0.0.1
+
+maintainers:
+  - name: mfournioux
diff --git a/examples/chart-helm/README.md b/examples/chart-helm/README.md
new file mode 100644
index 0000000000000..bebb769660277
--- /dev/null
+++ b/examples/chart-helm/README.md
@@ -0,0 +1,83 @@
+# chart-vllm
+
+![Version: 0.0.1](https://img.shields.io/badge/Version-0.0.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square)
+
+A Helm chart to deploy vllm for Kubernetes
+
+## Installing the chart
+
+To install the chart with the release name `test-vllm`
+
+```console
+helm upgrade --install --create-namespace --namespace=ns-vllm test-vllm . -f values.yaml --set secrets.s3endpoint=$ACCESS_POINT --set secrets.s3buckername=$BUCKET --set secrets.s3accesskeyid=$ACCESS_KEY --set secrets.s3accesskey=$SECRET_KEY
+```
+## ➖ Uninstalling the Chart
+
+To uninstall the `test-vllm` deployment
+
+```console
+helm uninstall test-vllm --namespace=ns-vllm
+```
+
+The command removes all the Kubernetes components associated with the chart **including persistent volumes** and deletes the release.
+
+## Architecture
+
+![Architecture](architecture.excalidraw.png)
+
+## Values
+
+| Key | Type | Default | Description |
+|-----|------|---------|-------------|
+| autoscaling | object | `{"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80}` | Autoscaling configuration |
+| autoscaling.enabled | bool | `false` | Enable autoscaling |
+| autoscaling.maxReplicas | int | `100` | Maximum replicas |
+| autoscaling.minReplicas | int | `1` | Minimum replicas |
+| autoscaling.targetCPUUtilizationPercentage | int | `80` | Target CPU utilization for autoscaling |
+| configs | object | `{}` | Configmap  |
+| containerPort | int | `8000` | Container port |
+| customObjects | list | `[]` | Custom Objects configuration |
+| deploymentStrategy | object | `{}` | Deployment strategy configuration |
+| externalConfigs | list | `[]` | External configuration |
+| extraContainers | list | `[]` | Additional containers configuration |
+| extraInit | object | `{"pvcStorage":"1Gi","s3modelpath":"relative_s3_model_path/opt-125m", "awsEc2MetadataDisabled": true}` | Additional configuration for the init container |
+| extraInit.pvcStorage | string | `"50Gi"` | Storage size of the s3 |
+| extraInit.s3modelpath | string | `"relative_s3_model_path/opt-125m"` | Path of the model on the s3 which hosts model weights and config files |
+| extraInit.awsEc2MetadataDisabled | boolean | `true` | Disables the use of the Amazon EC2 instance metadata service |
+| extraPorts | list | `[]` | Additional ports configuration |
+| gpuModels | list | `["TYPE_GPU_USED"]` | Type of gpu used |
+| image | object | `{"command":["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"],"repository":"vllm/vllm-openai","tag":"latest"}` | Image configuration |
+| image.command | list | `["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"]` | Container launch command |
+| image.repository | string | `"vllm/vllm-openai"` | Image repository |
+| image.tag | string | `"latest"` | Image tag |
+| livenessProbe | object | `{"failureThreshold":3,"httpGet":{"path":"/health","port":8000},"initialDelaySeconds":15,"periodSeconds":10}` | Liveness probe configuration |
+| livenessProbe.failureThreshold | int | `3` | Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not alive |
+| livenessProbe.httpGet | object | `{"path":"/health","port":8000}` | Configuration of the Kubelet http request on the server |
+| livenessProbe.httpGet.path | string | `"/health"` | Path to access on the HTTP server |
+| livenessProbe.httpGet.port | int | `8000` | Name or number of the port to access on the container, on which the server is listening |
+| livenessProbe.initialDelaySeconds | int | `15` | Number of seconds after the container has started before liveness probe is initiated |
+| livenessProbe.periodSeconds | int | `10` | How often (in seconds) to perform the liveness probe |
+| maxUnavailablePodDisruptionBudget | string | `""` | Disruption Budget Configuration |
+| readinessProbe | object | `{"failureThreshold":3,"httpGet":{"path":"/health","port":8000},"initialDelaySeconds":5,"periodSeconds":5}` | Readiness probe configuration |
+| readinessProbe.failureThreshold | int | `3` | Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not ready |
+| readinessProbe.httpGet | object | `{"path":"/health","port":8000}` | Configuration of the Kubelet http request on the server |
+| readinessProbe.httpGet.path | string | `"/health"` | Path to access on the HTTP server |
+| readinessProbe.httpGet.port | int | `8000` | Name or number of the port to access on the container, on which the server is listening |
+| readinessProbe.initialDelaySeconds | int | `5` | Number of seconds after the container has started before readiness probe is initiated |
+| readinessProbe.periodSeconds | int | `5` | How often (in seconds) to perform the readiness probe |
+| replicaCount | int | `1` | Number of replicas |
+| resources | object | `{"limits":{"cpu":4,"memory":"16Gi","nvidia.com/gpu":1},"requests":{"cpu":4,"memory":"16Gi","nvidia.com/gpu":1}}` | Resource configuration |
+| resources.limits."nvidia.com/gpu" | int | `1` | Number of gpus used |
+| resources.limits.cpu | int | `4` | Number of CPUs |
+| resources.limits.memory | string | `"16Gi"` | CPU memory configuration |
+| resources.requests."nvidia.com/gpu" | int | `1` | Number of gpus used |
+| resources.requests.cpu | int | `4` | Number of CPUs |
+| resources.requests.memory | string | `"16Gi"` | CPU memory configuration |
+| secrets | object | `{}` | Secrets configuration |
+| serviceName | string | | Service name |
+| servicePort | int | `80` | Service port |
+| labels.environment | string | `test` | Environment name  |
+| labels.release | string | `test` | Release name |
+
+----------------------------------------------
+Autogenerated from chart metadata using [helm-docs v1.14.2](https://github.com/norwoodj/helm-docs/releases/v1.14.2)
diff --git a/examples/chart-helm/architecture.excalidraw.png b/examples/chart-helm/architecture.excalidraw.png
new file mode 100644
index 0000000000000..2ba2f38f3add5
Binary files /dev/null and b/examples/chart-helm/architecture.excalidraw.png differ
diff --git a/examples/chart-helm/ct.yaml b/examples/chart-helm/ct.yaml
new file mode 100644
index 0000000000000..d273e118203ad
--- /dev/null
+++ b/examples/chart-helm/ct.yaml
@@ -0,0 +1,3 @@
+chart-dirs:
+  - charts
+validate-maintainers: false
\ No newline at end of file
diff --git a/examples/chart-helm/lintconf.yaml b/examples/chart-helm/lintconf.yaml
new file mode 100644
index 0000000000000..c8e8c5d7d9767
--- /dev/null
+++ b/examples/chart-helm/lintconf.yaml
@@ -0,0 +1,42 @@
+---
+rules:
+  braces:
+    min-spaces-inside: 0
+    max-spaces-inside: 0
+    min-spaces-inside-empty: -1
+    max-spaces-inside-empty: -1
+  brackets:
+    min-spaces-inside: 0
+    max-spaces-inside: 0
+    min-spaces-inside-empty: -1
+    max-spaces-inside-empty: -1
+  colons:
+    max-spaces-before: 0
+    max-spaces-after: 1
+  commas:
+    max-spaces-before: 0
+    min-spaces-after: 1
+    max-spaces-after: 1
+  comments:
+    require-starting-space: true
+    min-spaces-from-content: 2
+  document-end: disable
+  document-start: disable           # No --- to start a file
+  empty-lines:
+    max: 2
+    max-start: 0
+    max-end: 0
+  hyphens:
+    max-spaces-after: 1
+  indentation:
+    spaces: consistent
+    indent-sequences: whatever      # - list indentation will handle both indentation and without
+    check-multi-line-strings: false
+  key-duplicates: enable
+  line-length: disable              # Lines can be any length
+  new-line-at-end-of-file: disable
+  new-lines:
+    type: unix
+  trailing-spaces: enable
+  truthy:
+    level: warning
\ No newline at end of file
diff --git a/examples/chart-helm/templates/_helpers.tpl b/examples/chart-helm/templates/_helpers.tpl
new file mode 100644
index 0000000000000..a9690bad3c945
--- /dev/null
+++ b/examples/chart-helm/templates/_helpers.tpl
@@ -0,0 +1,164 @@
+{{/*
+Define ports for the pods
+*/}}
+{{- define "chart.container-port" -}}
+{{-  default "8000" .Values.containerPort }}
+{{- end }}
+
+{{/*
+Define service name
+*/}}
+{{- define "chart.service-name" -}}
+{{-  if .Values.serviceName }}
+{{-    .Values.serviceName | lower | trim }}
+{{-  else }}
+"{{ .Release.Name }}-service"
+{{-  end }}
+{{- end }}
+
+{{/*
+Define service port
+*/}}
+{{- define "chart.service-port" -}}
+{{-  if .Values.servicePort }}
+{{-    .Values.servicePort }}
+{{-  else }}
+{{-    include "chart.container-port" . }}
+{{-  end }}
+{{- end }}
+
+{{/*
+Define service port name
+*/}}
+{{- define "chart.service-port-name" -}}
+"service-port"
+{{- end }}
+
+{{/*
+Define container port name
+*/}}
+{{- define "chart.container-port-name" -}}
+"container-port"
+{{- end }}
+
+{{/*
+Define deployment strategy
+*/}}
+{{- define "chart.strategy" -}}
+strategy:
+{{-   if not .Values.deploymentStrategy }}
+  rollingUpdate:
+    maxSurge: 100%
+    maxUnavailable: 0
+{{-   else }}
+{{      toYaml .Values.deploymentStrategy | indent 2 }}
+{{-   end }}
+{{- end }}
+
+{{/*
+Define additional ports
+*/}}
+{{- define "chart.extraPorts" }}
+{{-   with .Values.extraPorts }}
+{{      toYaml . }}
+{{-   end }}
+{{- end }}
+
+{{/*
+Define chart external ConfigMaps and Secrets
+*/}}
+{{- define "chart.externalConfigs" -}}
+{{-   with .Values.externalConfigs -}}
+{{      toYaml . }}
+{{-   end }}
+{{- end }}
+
+
+{{/*
+Define liveness et readiness probes
+*/}}
+{{- define "chart.probes" -}}
+{{-   if .Values.readinessProbe  }}
+readinessProbe:
+{{-     with .Values.readinessProbe }}
+{{-       toYaml . | nindent 2 }}
+{{-     end }}
+{{-   end }}
+{{-   if .Values.livenessProbe  }}
+livenessProbe:
+{{-     with .Values.livenessProbe }}
+{{-       toYaml . | nindent 2 }}
+{{-     end }}
+{{-   end }}
+{{- end }}
+
+{{/*
+Define resources
+*/}}
+{{- define "chart.resources" -}}
+requests:
+  memory: {{ required "Value 'resources.requests.memory' must be defined !" .Values.resources.requests.memory | quote }}
+  cpu: {{ required "Value 'resources.requests.cpu' must be defined !" .Values.resources.requests.cpu | quote }}
+  {{- if and (gt (int (index .Values.resources.requests "nvidia.com/gpu")) 0) (gt (int (index .Values.resources.limits "nvidia.com/gpu")) 0) }}
+  nvidia.com/gpu: {{ required "Value 'resources.requests.nvidia.com/gpu' must be defined !" (index .Values.resources.requests "nvidia.com/gpu") | quote }}
+  {{- end }}
+limits:
+  memory: {{ required "Value 'resources.limits.memory' must be defined !" .Values.resources.limits.memory | quote }}
+  cpu: {{ required "Value 'resources.limits.cpu' must be defined !" .Values.resources.limits.cpu | quote }}
+  {{- if and (gt (int (index .Values.resources.requests "nvidia.com/gpu")) 0) (gt (int (index .Values.resources.limits "nvidia.com/gpu")) 0) }}
+  nvidia.com/gpu: {{ required "Value 'resources.limits.nvidia.com/gpu' must be defined !" (index .Values.resources.limits "nvidia.com/gpu") | quote }}
+  {{- end }}
+{{- end }}
+
+
+{{/*
+Define User used for the main container
+*/}}
+{{- define "chart.user" }}
+{{-   if .Values.image.runAsUser  }}
+runAsUser: 
+{{-     with .Values.runAsUser }}
+{{-       toYaml . | nindent 2 }}
+{{-     end }}
+{{-   end }}
+{{- end }}
+
+{{- define "chart.extraInitImage" -}}
+"amazon/aws-cli:2.6.4"
+{{- end }}
+
+{{- define "chart.extraInitEnv" -}}
+- name: S3_ENDPOINT_URL
+  valueFrom:
+    secretKeyRef:
+      name: {{ .Release.Name }}-secrets
+      key: s3endpoint
+- name: S3_BUCKET_NAME
+  valueFrom:
+    secretKeyRef:
+      name: {{ .Release.Name }}-secrets
+      key: s3bucketname
+- name: AWS_ACCESS_KEY_ID
+  valueFrom:
+    secretKeyRef:
+      name: {{ .Release.Name }}-secrets
+      key: s3accesskeyid
+- name: AWS_SECRET_ACCESS_KEY
+  valueFrom:
+    secretKeyRef:
+      name: {{ .Release.Name }}-secrets
+      key: s3accesskey
+- name: S3_PATH
+  value: "{{ .Values.extraInit.s3modelpath }}"
+- name: AWS_EC2_METADATA_DISABLED
+  value: "{{ .Values.extraInit.awsEc2MetadataDisabled }}"
+{{- end }}
+
+{{/*
+  Define chart labels
+*/}}
+{{- define "chart.labels" -}}
+{{-   with .Values.labels -}}
+{{      toYaml . }}
+{{-   end }}
+{{- end }}
\ No newline at end of file
diff --git a/examples/chart-helm/templates/configmap.yaml b/examples/chart-helm/templates/configmap.yaml
new file mode 100644
index 0000000000000..cc5d03782f878
--- /dev/null
+++ b/examples/chart-helm/templates/configmap.yaml
@@ -0,0 +1,11 @@
+{{- if .Values.configs -}}
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: "{{ .Release.Name }}-configs"
+  namespace: {{ .Release.Namespace }}
+data:
+  {{- with .Values.configs }}
+  {{- toYaml . | nindent 2 }}
+  {{- end }}
+{{- end -}}
\ No newline at end of file
diff --git a/examples/chart-helm/templates/custom-objects.yaml b/examples/chart-helm/templates/custom-objects.yaml
new file mode 100644
index 0000000000000..8a65ffd0e552d
--- /dev/null
+++ b/examples/chart-helm/templates/custom-objects.yaml
@@ -0,0 +1,6 @@
+{{- if .Values.customObjects }}
+{{- range .Values.customObjects }}
+{{- tpl (. | toYaml) $ }}
+---
+{{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/examples/chart-helm/templates/deployment.yaml b/examples/chart-helm/templates/deployment.yaml
new file mode 100644
index 0000000000000..536983b587be2
--- /dev/null
+++ b/examples/chart-helm/templates/deployment.yaml
@@ -0,0 +1,122 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: "{{ .Release.Name }}-deployment-vllm"
+  namespace: {{ .Release.Namespace }}
+  labels:
+  {{- include "chart.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.replicaCount }}
+  {{- include "chart.strategy" . | nindent 2 }}
+  selector:                                                                                                                                  
+    matchLabels:
+      environment: "test"
+      release: "test"
+  progressDeadlineSeconds: 1200
+  template:
+    metadata:
+      labels:
+        environment: "test"
+        release: "test"
+    spec:
+      containers:
+        - name: "vllm"
+          image: "{{ required "Required value 'image.repository' must be defined !" .Values.image.repository }}:{{ required "Required value 'image.tag' must be defined !" .Values.image.tag }}"
+          {{- if .Values.image.command }}
+          command :
+            {{- with .Values.image.command }}
+            {{- toYaml . | nindent 10 }}
+            {{- end }}
+          {{- end }}
+          securityContext:
+            {{- if .Values.image.securityContext }}
+              {{- with .Values.image.securityContext }}
+              {{- toYaml . | nindent 12 }}
+              {{- end }}
+            {{- else }}
+            runAsNonRoot: false
+              {{- include "chart.user" . | indent 12 }}
+            {{- end }}
+          imagePullPolicy: IfNotPresent
+          {{- if .Values.image.env }}
+          env :
+            {{- with .Values.image.env }}
+            {{- toYaml . | nindent 10 }}
+            {{- end }}
+          {{- else }}
+          env: []
+          {{- end }}
+          {{- if or .Values.externalConfigs .Values.configs .Values.secrets }}
+          envFrom:
+            {{- if .Values.configs }}
+            - configMapRef:
+                name: "{{ .Release.Name }}-configs"
+            {{- end }}
+            {{- if .Values.secrets}}
+            - secretRef:
+                name: "{{ .Release.Name }}-secrets"
+            {{- end }}
+            {{- include "chart.externalConfigs" . | nindent 12 }}
+          {{- end }}          
+          ports:
+            - name: {{ include "chart.container-port-name" . }}
+              containerPort: {{ include "chart.container-port" . }}
+            {{- include "chart.extraPorts" . | nindent 12 }}
+          {{- include "chart.probes" . | indent 10 }}
+          resources: {{- include "chart.resources" . | nindent 12 }}
+          volumeMounts:
+          - name: {{ .Release.Name }}-storage
+            mountPath: /data
+
+        {{- with .Values.extraContainers }}
+        {{ toYaml . | nindent 8 }}
+        {{- end }}
+
+      {{-   if .Values.extraInit  }}
+      initContainers:
+      - name: wait-download-model
+        image: {{ include "chart.extraInitImage" . }}
+        command: 
+          - /bin/bash
+        args:
+          - -eucx
+          - while aws --endpoint-url $S3_ENDPOINT_URL s3 sync --dryrun s3://$S3_BUCKET_NAME/$S3_PATH /data | grep -q download; do sleep 10; done
+        env: {{- include "chart.extraInitEnv" . | nindent 10 }}
+        resources:
+          requests:
+            cpu: 200m
+            memory: 1Gi
+          limits:
+            cpu: 500m
+            memory: 2Gi
+        volumeMounts:
+        - name: {{ .Release.Name }}-storage
+          mountPath: /data
+      {{- end }}
+      volumes:
+        - name: {{ .Release.Name }}-storage
+          persistentVolumeClaim:
+            claimName: {{ .Release.Name }}-storage-claim     
+
+      {{- with .Values.nodeSelector }}
+      nodeSelector:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- with .Values.tolerations }}
+      tolerations:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
+      {{- if and (gt (int (index .Values.resources.requests "nvidia.com/gpu")) 0) (gt (int (index .Values.resources.limits "nvidia.com/gpu")) 0) }}
+      runtimeClassName: nvidia
+      affinity:
+        nodeAffinity:
+          requiredDuringSchedulingIgnoredDuringExecution:
+            nodeSelectorTerms:
+              - matchExpressions:
+                - key: nvidia.com/gpu.product
+                  operator: In
+                  {{- with .Values.gpuModels }}
+                  values:
+                    {{- toYaml . | nindent 20 }}
+                  {{- end }}
+      {{- end }} 
\ No newline at end of file
diff --git a/examples/chart-helm/templates/hpa.yaml b/examples/chart-helm/templates/hpa.yaml
new file mode 100644
index 0000000000000..5ca94c8213541
--- /dev/null
+++ b/examples/chart-helm/templates/hpa.yaml
@@ -0,0 +1,31 @@
+{{- if .Values.autoscaling.enabled }}
+apiVersion: autoscaling/v2
+kind: HorizontalPodAutoscaler
+metadata:
+  name: "{{ .Release.Name }}-hpa"
+  namespace: {{ .Release.Namespace }}
+spec:
+  scaleTargetRef:
+    apiVersion: apps/v1
+    kind: Deployment
+    name: vllm
+  minReplicas: {{ .Values.autoscaling.minReplicas }}
+  maxReplicas: {{ .Values.autoscaling.maxReplicas }}
+  metrics:
+    {{- if .Values.autoscaling.targetCPUUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: cpu
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }}
+    {{- end }}
+    {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    - type: Resource
+      resource:
+        name: memory
+        target:
+          type: Utilization
+          averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }}
+    {{- end }}
+{{- end }}
\ No newline at end of file
diff --git a/examples/chart-helm/templates/job.yaml b/examples/chart-helm/templates/job.yaml
new file mode 100644
index 0000000000000..f9ea3541e78d2
--- /dev/null
+++ b/examples/chart-helm/templates/job.yaml
@@ -0,0 +1,37 @@
+{{-   if .Values.extraInit  }}
+apiVersion: batch/v1
+kind: Job
+metadata:
+  name: "{{ .Release.Name }}-init-vllm"
+  namespace: {{ .Release.Namespace }}
+spec:
+  ttlSecondsAfterFinished: 100
+  template:
+   metadata:
+     name: init-vllm
+   spec:
+    containers:
+    - name: job-download-model
+      image: {{ include "chart.extraInitImage" . }}
+      command: 
+        - /bin/bash
+      args:
+        - -eucx
+        - aws --endpoint-url $S3_ENDPOINT_URL s3 sync s3://$S3_BUCKET_NAME/$S3_PATH /data
+      env: {{- include "chart.extraInitEnv" . | nindent 8 }}
+      volumeMounts:
+        - name: {{ .Release.Name }}-storage
+          mountPath: /data
+      resources:
+        requests:
+          cpu: 200m
+          memory: 1Gi
+        limits:
+          cpu: 500m
+          memory: 2Gi
+    restartPolicy: OnFailure
+    volumes:
+    - name: {{ .Release.Name }}-storage
+      persistentVolumeClaim:
+        claimName: "{{ .Release.Name }}-storage-claim"
+{{- end }}
\ No newline at end of file
diff --git a/examples/chart-helm/templates/poddisruptionbudget.yaml b/examples/chart-helm/templates/poddisruptionbudget.yaml
new file mode 100644
index 0000000000000..512bac727da87
--- /dev/null
+++ b/examples/chart-helm/templates/poddisruptionbudget.yaml
@@ -0,0 +1,7 @@
+apiVersion: policy/v1
+kind: PodDisruptionBudget
+metadata:
+  name: "{{ .Release.Name }}-pdb"
+  namespace: {{ .Release.Namespace }}
+spec:
+  maxUnavailable: {{ default 1 .Values.maxUnavailablePodDisruptionBudget }}
\ No newline at end of file
diff --git a/examples/chart-helm/templates/pvc.yaml b/examples/chart-helm/templates/pvc.yaml
new file mode 100644
index 0000000000000..e8d203a7a5ace
--- /dev/null
+++ b/examples/chart-helm/templates/pvc.yaml
@@ -0,0 +1,13 @@
+{{-   if .Values.extraInit  }}
+apiVersion: v1
+kind: PersistentVolumeClaim
+metadata:
+  name: "{{ .Release.Name }}-storage-claim"
+  namespace: {{ .Release.Namespace }}
+spec:
+  accessModes:
+    - ReadWriteOnce
+  resources:
+    requests:
+      storage: {{ .Values.extraInit.pvcStorage }}
+{{- end }}
\ No newline at end of file
diff --git a/examples/chart-helm/templates/secrets.yaml b/examples/chart-helm/templates/secrets.yaml
new file mode 100644
index 0000000000000..4e88e747b616a
--- /dev/null
+++ b/examples/chart-helm/templates/secrets.yaml
@@ -0,0 +1,10 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: "{{ .Release.Name }}-secrets"
+  namespace: {{ .Release.Namespace }}
+type: Opaque
+data:
+  {{- range $key, $val := .Values.secrets }}
+  {{ $key }}: {{ $val | b64enc | quote }}
+  {{- end }}
\ No newline at end of file
diff --git a/examples/chart-helm/templates/service.yaml b/examples/chart-helm/templates/service.yaml
new file mode 100644
index 0000000000000..fe3838c0a8214
--- /dev/null
+++ b/examples/chart-helm/templates/service.yaml
@@ -0,0 +1,15 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: "{{ .Release.Name }}-service"
+  namespace: {{ .Release.Namespace }}
+spec:
+  type: ClusterIP
+  ports:
+    - name: {{ include "chart.service-port-name" . }}
+      port: {{ include "chart.service-port" . }}
+      targetPort: {{ include "chart.container-port-name" . }}
+      protocol: TCP
+  selector:
+    environment: test
+    release: test
\ No newline at end of file
diff --git a/examples/chart-helm/values.schema.json b/examples/chart-helm/values.schema.json
new file mode 100644
index 0000000000000..812d54bde1397
--- /dev/null
+++ b/examples/chart-helm/values.schema.json
@@ -0,0 +1,265 @@
+{
+    "$schema": "http://json-schema.org/schema#",
+    "type": "object",
+    "properties": {
+        "image": {
+            "type": "object",
+            "properties": {
+                "repository": {
+                    "type": "string"
+                },
+                "tag": {
+                    "type": "string"
+                },
+                "command": {
+                    "type": "array",
+                    "items": {
+                        "type": "string"
+                    }
+                }
+            },
+            "required": [
+                "command",
+                "repository",
+                "tag"
+            ]
+        },
+        "containerPort": {
+            "type": "integer"
+        },
+        "serviceName": {
+            "type": "null"
+        },
+        "servicePort": {
+            "type": "integer"
+        },
+        "extraPorts": {
+            "type": "array"
+        },
+        "replicaCount": {
+            "type": "integer"
+        },
+        "deploymentStrategy": {
+            "type": "object"
+        },
+        "resources": {
+            "type": "object",
+            "properties": {
+                "requests": {
+                    "type": "object",
+                    "properties": {
+                        "cpu": {
+                            "type": "integer"
+                        },
+                        "memory": {
+                            "type": "string"
+                        },
+                        "nvidia.com/gpu": {
+                            "type": "integer"
+                        }
+                    },
+                    "required": [
+                        "cpu",
+                        "memory",
+                        "nvidia.com/gpu"
+                    ]
+                },
+                "limits": {
+                    "type": "object",
+                    "properties": {
+                        "cpu": {
+                            "type": "integer"
+                        },
+                        "memory": {
+                            "type": "string"
+                        },
+                        "nvidia.com/gpu": {
+                            "type": "integer"
+                        }
+                    },
+                    "required": [
+                        "cpu",
+                        "memory",
+                        "nvidia.com/gpu"
+                    ]
+                }
+            },
+            "required": [
+                "limits",
+                "requests"
+            ]
+        },
+        "gpuModels": {
+            "type": "array",
+            "items": {
+                "type": "string"
+            }
+        },
+        "autoscaling": {
+            "type": "object",
+            "properties": {
+                "enabled": {
+                    "type": "boolean"
+                },
+                "minReplicas": {
+                    "type": "integer"
+                },
+                "maxReplicas": {
+                    "type": "integer"
+                },
+                "targetCPUUtilizationPercentage": {
+                    "type": "integer"
+                }
+            },
+            "required": [
+                "enabled",
+                "maxReplicas",
+                "minReplicas",
+                "targetCPUUtilizationPercentage"
+            ]
+        },
+        "configs": {
+            "type": "object"
+        },
+        "secrets": {
+            "type": "object"
+        },
+        "externalConfigs": {
+            "type": "array"
+        },
+        "customObjects": {
+            "type": "array"
+        },
+        "maxUnavailablePodDisruptionBudget": {
+            "type": "string"
+        },
+        "extraInit": {
+            "type": "object",
+            "properties": {
+                "s3modelpath": {
+                    "type": "string"
+                },
+                "pvcStorage": {
+                    "type": "string"
+                },
+                "awsEc2MetadataDisabled": {
+                    "type": "boolean"
+                }
+            },
+            "required": [
+                "pvcStorage",
+                "s3modelpath",
+                "awsEc2MetadataDisabled"
+            ]
+        },
+        "extraContainers": {
+            "type": "array"
+        },
+        "readinessProbe": {
+            "type": "object",
+            "properties": {
+                "initialDelaySeconds": {
+                    "type": "integer"
+                },
+                "periodSeconds": {
+                    "type": "integer"
+                },
+                "failureThreshold": {
+                    "type": "integer"
+                },
+                "httpGet": {
+                    "type": "object",
+                    "properties": {
+                        "path": {
+                            "type": "string"
+                        },
+                        "port": {
+                            "type": "integer"
+                        }
+                    },
+                    "required": [
+                        "path",
+                        "port"
+                    ]
+                }
+            },
+            "required": [
+                "failureThreshold",
+                "httpGet",
+                "initialDelaySeconds",
+                "periodSeconds"
+            ]
+        },
+        "livenessProbe": {
+            "type": "object",
+            "properties": {
+                "initialDelaySeconds": {
+                    "type": "integer"
+                },
+                "failureThreshold": {
+                    "type": "integer"
+                },
+                "periodSeconds": {
+                    "type": "integer"
+                },
+                "httpGet": {
+                    "type": "object",
+                    "properties": {
+                        "path": {
+                            "type": "string"
+                        },
+                        "port": {
+                            "type": "integer"
+                        }
+                    },
+                    "required": [
+                        "path",
+                        "port"
+                    ]
+                }
+            },
+            "required": [
+                "failureThreshold",
+                "httpGet",
+                "initialDelaySeconds",
+                "periodSeconds"
+            ]
+        },
+        "labels": {
+            "type": "object",
+            "properties": {
+                "environment": {
+                    "type": "string"
+                },
+                "release": {
+                    "type": "string"
+                }
+            },
+            "required": [
+                "environment",
+                "release"
+            ]
+        }
+    },
+    "required": [
+        "autoscaling",
+        "configs",
+        "containerPort",
+        "customObjects",
+        "deploymentStrategy",
+        "externalConfigs",
+        "extraContainers",
+        "extraInit",
+        "extraPorts",
+        "gpuModels",
+        "image",
+        "labels",
+        "livenessProbe",
+        "maxUnavailablePodDisruptionBudget",
+        "readinessProbe",
+        "replicaCount",
+        "resources",
+        "secrets",
+        "servicePort"
+    ]
+}
\ No newline at end of file
diff --git a/examples/chart-helm/values.yaml b/examples/chart-helm/values.yaml
new file mode 100644
index 0000000000000..cf6dd15116e9c
--- /dev/null
+++ b/examples/chart-helm/values.yaml
@@ -0,0 +1,119 @@
+# -- Default values for chart vllm
+# -- Declare variables to be passed into your templates.
+
+# -- Image configuration
+image:
+  # -- Image repository
+  repository: "vllm/vllm-openai"
+  # -- Image tag
+  tag: "latest"
+  # -- Container launch command
+  command: ["vllm", "serve", "/data/", "--served-model-name", "opt-125m", "--host", "0.0.0.0", "--port", "8000"]
+
+# -- Container port
+containerPort: 8000
+# -- Service name
+serviceName:
+# -- Service port
+servicePort: 80
+# -- Additional ports configuration
+extraPorts: []
+
+# -- Number of replicas
+replicaCount: 1
+
+# -- Deployment strategy configuration
+deploymentStrategy: {}
+
+# -- Resource configuration
+resources:
+  requests:
+    # -- Number of CPUs
+    cpu: 4
+    # -- CPU memory configuration
+    memory: 16Gi
+    # -- Number of gpus used
+    nvidia.com/gpu: 1
+  limits:
+    # -- Number of CPUs
+    cpu: 4
+    # -- CPU memory configuration
+    memory: 16Gi
+    # -- Number of gpus used
+    nvidia.com/gpu: 1
+
+# -- Type of gpu used
+gpuModels:
+  - "TYPE_GPU_USED"
+
+# -- Autoscaling configuration
+autoscaling:
+  # -- Enable autoscaling
+  enabled: false
+  # -- Minimum replicas
+  minReplicas: 1
+  # -- Maximum replicas
+  maxReplicas: 100
+  # -- Target CPU utilization for autoscaling
+  targetCPUUtilizationPercentage: 80
+  # targetMemoryUtilizationPercentage: 80
+
+# -- Configmap
+configs: {}
+
+# -- Secrets configuration
+secrets: {}
+
+# -- External configuration
+externalConfigs: []
+
+# -- Custom Objects configuration
+customObjects: []
+
+# -- Disruption Budget Configuration
+maxUnavailablePodDisruptionBudget: ""
+
+# -- Additional configuration for the init container
+extraInit:
+   # -- Path of the model on the s3 which hosts model weights and config files
+  s3modelpath: "relative_s3_model_path/opt-125m"
+   # -- Storage size of the s3
+  pvcStorage: "1Gi"
+  awsEc2MetadataDisabled: true
+
+# -- Additional containers configuration
+extraContainers: []
+
+# -- Readiness probe configuration
+readinessProbe:
+  # -- Number of seconds after the container has started before readiness probe is initiated
+  initialDelaySeconds: 5
+  # -- How often (in seconds) to perform the readiness probe
+  periodSeconds: 5
+  # -- Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not ready
+  failureThreshold: 3
+   # -- Configuration of the Kubelet http request on the server
+  httpGet:
+    # -- Path to access on the HTTP server
+    path: /health
+    # -- Name or number of the port to access on the container, on which the server is listening
+    port: 8000
+
+# -- Liveness probe configuration
+livenessProbe:
+ # -- Number of seconds after the container has started before liveness probe is initiated
+  initialDelaySeconds: 15
+  # -- Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not alive
+  failureThreshold: 3
+  # -- How often (in seconds) to perform the liveness probe
+  periodSeconds: 10
+  # -- Configuration of the Kubelet http request on the server
+  httpGet:
+    # -- Path to access on the HTTP server
+    path: /health
+    # -- Name or number of the port to access on the container, on which the server is listening
+    port: 8000
+
+labels:
+  environment: "test"
+  release: "test"