From 58c7f96b3b3d10f9cadc5bece4b26db7c1bef3a7 Mon Sep 17 00:00:00 2001 From: Maxime Fournioux <55544262+mfournioux@users.noreply.github.com> Date: Thu, 5 Dec 2024 09:34:32 +0100 Subject: [PATCH] correct malformed table error on rst file Signed-off-by: Maxime Fournioux <55544262+mfournioux@users.noreply.github.com> --- docs/source/serving/deploying_with_helm.rst | 478 +++++++++----------- 1 file changed, 204 insertions(+), 274 deletions(-) diff --git a/docs/source/serving/deploying_with_helm.rst b/docs/source/serving/deploying_with_helm.rst index 0b442edb06f9b..38b8d8b6c04c7 100644 --- a/docs/source/serving/deploying_with_helm.rst +++ b/docs/source/serving/deploying_with_helm.rst @@ -47,277 +47,207 @@ Architecture Values ------ -+---------------------+---------+-----------------------+---------------+ -| Key | Type | Default | Description | -+=====================+=========+=======================+===============+ -| autoscaling | object | ``{"ena | Autoscaling | -| | | bled":false,"maxRepli | configuration | -| | | cas":100,"minReplicas | | -| | | ":1,"targetCPUUtiliza | | -| | | tionPercentage":80}`` | | -+---------------------+---------+-----------------------+---------------+ -| autoscaling.enabled | bool | ``false`` | Enable | -| | | | autoscaling | -+---------------------+---------+-----------------------+---------------+ -| autoscaling. | int | ``100`` | Maximum | -| maxReplicas | | | replicas | -+---------------------+---------+-----------------------+---------------+ -| autoscaling. | int | ``1`` | Minimum | -| minReplicas | | | replicas | -+---------------------+---------+-----------------------+---------------+ -| autoscaling. | int | ``80`` | Target CPU | -| targetCPUUt | | | utilization | -| ilizationPercentage | | | for | -| | | | autoscaling | -+---------------------+---------+-----------------------+---------------+ -| configs | object | ``{}`` | Configmap | -+---------------------+---------+-----------------------+---------------+ -| containerPort | int | ``8000`` | Container | -| | | | port | -+---------------------+---------+-----------------------+---------------+ -| customObjects | list | ``[]`` | Custom | -| | | | Objects | -| | | | configuration | -| | | | | -+---------------------+---------+-----------------------+---------------+ -| deploymentStrategy | object | ``{}`` | Deployment | -| | | | strategy | -| | | | configuration | -| | | | | -+---------------------+---------+-----------------------+---------------+ -| externalConfigs | list | ``[]`` | External | -| | | | configuration | -| | | | | -+---------------------+---------+-----------------------+---------------+ -| extraContainers | list | ``[]`` | Additional | -| | | | containers | -| | | | configuration | -| | | | | -+---------------------+---------+-----------------------+---------------+ -| extraInit | object | ``{"pvcStorage":"1Gi | Additional | -| | | ","s3modelpath":"rela | configuration | -| | | tive_s3_model_path/op | for the | -| | | t-125m", "awsEc2Metad | init | -| | | ataDisabled": true}`` | container | -| | | | | -+---------------------+---------+-----------------------+---------------+ -| extraInit. | string | ``"50Gi"`` | Storage | -| pvcStorage | | | size of the | -| | | | s3 | -+---------------------+---------+-----------------------+---------------+ -| exraInit. | string | ``"relative_s3_m | Path of the | -| s3modelpath | | odel_path/opt-125m"`` | model on | -| | | | the s3 | -| | | | which hosts | -| | | | model | -| | | | weights and | -| | | | config | -| | | | files | -+---------------------+---------+-----------------------+---------------+ -| extraInit.aws | boolean | ``true`` | Disables | -| Ec2MetadataDisabled | | | the use of | -| | | | the Amazon | -| | | | EC2 | -| | | | instance | -| | | | metadata | -| | | | service | -+---------------------+---------+-----------------------+---------------+ -| extraPorts | list | ``[]`` | Additional | -| | | | ports | -| | | | configuration | -| | | | | -+---------------------+---------+-----------------------+---------------+ -| gpuModels | list | ``["TYPE_GPU_USED"]`` | Type of gpu | -| | | | used | -+---------------------+---------+-----------------------+---------------+ -| image | object | ``{"command": | Image | -| | | ["vllm","serve"," | configuration | -| | | /data/","--served-mod | | -| | | el-name","opt-125m"," | | -| | | --host","0.0.0.0","-- | | -| | | port","8000"],"reposi | | -| | | tory":"vllm/vllm-open | | -| | | ai","tag":"latest"}`` | | -+---------------------+---------+-----------------------+---------------+ -| image.command | list | ``["vllm","se | Container | -| | | rve","/data/","--serv | launch | -| | | ed-model-name","opt-1 | command | -| | | 25m","--host","0.0.0. | | -| | | 0","--port","8000"]`` | | -+---------------------+---------+-----------------------+---------------+ -| image.repository | string | ` | Image | -| | | `"vllm/vllm-openai"`` | repository | -+---------------------+---------+-----------------------+---------------+ -| image.tag | string | ``"latest"`` | Image tag | -+---------------------+---------+-----------------------+---------------+ -| livenessProbe | object | ``{"fa | Liveness | -| | | ilureThreshold":3,"ht | probe | -| | | tpGet":{"path":"/heal | configuration | -| | | th","port":8000},"ini | | -| | | tialDelaySeconds":15, | | -| | | "periodSeconds":10}`` | | -+---------------------+---------+-----------------------+---------------+ -| livenessProbe. | int | ``3`` | Number of | -| failureThreshold | | | times after | -| | | | which if a | -| | | | probe fails | -| | | | in a row, | -| | | | Kubernetes | -| | | | considers | -| | | | that the | -| | | | overall | -| | | | check has | -| | | | failed: the | -| | | | container | -| | | | is not | -| | | | alive | -+---------------------+---------+-----------------------+---------------+ -| livenessProbe. | object | ``{"path":"/h | Configuration | -| httpGet | | ealth","port":8000}`` | of the | -| | | | Kubelet | -| | | | http | -| | | | request on | -| | | | the server | -+---------------------+---------+-----------------------+---------------+ -| livenessProbe. | string | ``"/health"`` | Path to | -| httpGet.path | | | access on | -| | | | the HTTP | -| | | | server | -+---------------------+---------+-----------------------+---------------+ -| livenessProbe. | int | ``8000`` | Name or | -| httpGet.port | | | number of | -| | | | the port to | -| | | | access on | -| | | | the | -| | | | container, | -| | | | on which | -| | | | the server | -| | | | is | -| | | | listening | -+---------------------+---------+-----------------------+---------------+ -| livenessProbe. | int | ``15`` | Number of | -| initialDelaySeconds | | | seconds | -| | | | after the | -| | | | container | -| | | | has started | -| | | | before | -| | | | liveness | -| | | | probe is | -| | | | initiated | -+---------------------+---------+-----------------------+---------------+ -| livenessProbe. | int | ``10`` | How often | -| periodSeconds | | | (in | -| | | | seconds) to | -| | | | perform the | -| | | | liveness | -| | | | probe | -+---------------------+---------+-----------------------+---------------+ -| maxUnavailable | string | ``""`` | Disruption | -| PodDisruptionBudget | | | Budget | -| | | | Configuration | -+---------------------+---------+-----------------------+---------------+ -| readinessProbe | object | ``{" | Readiness | -| | | failureThreshold":3," | probe | -| | | httpGet":{"path":"/he | configuration | -| | | alth","port":8000},"i | | -| | | nitialDelaySeconds":5 | | -| | | ,"periodSeconds":5}`` | | -+---------------------+---------+-----------------------+---------------+ -| readinessProbe. | int | ``3`` | Number of | -| failureThreshold | | | times after | -| | | | which if a | -| | | | probe fails | -| | | | in a row, | -| | | | Kubernetes | -| | | | considers | -| | | | that the | -| | | | overall | -| | | | check has | -| | | | failed: the | -| | | | container | -| | | | is not | -| | | | ready | -+---------------------+---------+-----------------------+---------------+ -| readinessProbe. | object | ``{"path":"/h | Configuration | -| httpGet | | ealth","port":8000}`` | of the | -| | | | Kubelet | -| | | | http | -| | | | request on | -| | | | the server | -+---------------------+---------+-----------------------+---------------+ -| readinessProbe. | string | ``"/health"`` | Path to | -| httpGet.path | | | access on | -| | | | the HTTP | -| | | | server | -+---------------------+---------+-----------------------+---------------+ -| readinessProbe. | int | ``8000`` | Name or | -| httpGet.port | | | number of | -| | | | the port to | -| | | | access on | -| | | | the | -| | | | container, | -| | | | on which | -| | | | the server | -| | | | is | -| | | | listening | -+---------------------+---------+-----------------------+---------------+ -| readinessProbe. | int | ``5`` | Number of | -| initialDelaySeconds | | | seconds | -| | | | after the | -| | | | container | -| | | | has started | -| | | | before | -| | | | readiness | -| | | | probe is | -| | | | initiated | -+---------------------+---------+-----------------------+---------------+ -| readinessProbe. | int | ``5`` | How often | -| periodSeconds | | | (in | -| | | | seconds) to | -| | | | perform the | -| | | | readiness | -| | | | probe | -+---------------------+---------+-----------------------+---------------+ -| replicaCount | int | ``1`` | Number of | -| | | | replicas | -+---------------------+---------+-----------------------+---------------+ -| resources | object | ``{"limits | Resource | -| | | ":{"cpu":4,"memory":" | configuration | -| | | 16Gi","nvidia.com/gpu | | -| | | ":1},"requests":{"cpu | | -| | | ":4,"memory":"16Gi"," | | -| | | nvidia.com/gpu":1}}`` | | -+---------------------+---------+-----------------------+---------------+ -| resources.limits. | int | ``1`` | Number of | -| ”nvidia.com/gpu” | | | gpus used | -+---------------------+---------+-----------------------+---------------+ -| resources. | int | ``4`` | Number of | -| limits.cpu | | | CPUs | -+---------------------+---------+-----------------------+---------------+ -| resources. | string | ``"16Gi"`` | CPU memory | -| limits.memory | | | configuration | -+---------------------+---------+-----------------------+---------------+ -| resources.requests. | int | ``1`` | Number of | -| ”nvidia.com/gpu” | | | gpus used | -+---------------------+---------+-----------------------+---------------+ -| resources. | int | ``4`` | Number of | -| requests.cpu | | | CPUs | -+---------------------+---------+-----------------------+---------------+ -| resources. | string | ``"16Gi"`` | CPU memory | -| requests.memory | | | configuration | -+---------------------+---------+-----------------------+---------------+ -| secrets | object | ``{}`` | Secrets | -| | | | configuration | -+---------------------+---------+-----------------------+---------------+ -| serviceName | string | | Service | -| | | | name | -+---------------------+---------+-----------------------+---------------+ -| servicePort | int | ``80`` | Service | -| | | | port | -+---------------------+---------+-----------------------+---------------+ -| labels.environment | string | ``test`` | Environment | -| | | | name | -+---------------------+---------+-----------------------+---------------+ -| labels.release | string | ``test`` | Release | -| | | | name | -+---------------------+---------+-----------------------+---------------+ \ No newline at end of file +.. list-table:: Values + :widths: 25 25 25, 25 + :header-rows: 1 + + * - Key + - Type + - Default + - Description + * - autoscaling + - object + - {"enabled":false,"maxReplicas":100,"minReplicas":1,"targetCPUUtilizationPercentage":80} + - Autoscaling configuration + * - autoscaling.enabled + - bool + - false + - Enable autoscaling + * - autoscaling.maxReplicas + - int + - 100 + - Maximum replicas + * - autoscaling.minReplicas + - int + - 1 + - Minimum replicas + * - autoscaling.targetCPUUtilizationPercentage + - int + - 80 + - Target CPU utilization for autoscaling + * - configs + - object + - {} + - Configmap + * - containerPort + - int + - 8000 + - Container port + * - customObjects + - list + - [] + - Custom Objects configuration + * - deploymentStrategy + - object + - {} + - Deployment strategy configuration + * - externalConfigs + - list + - [] + - External configuration + * - extraContainers + - list + - [] + - Additional containers configuration + * - extraInit + - object + - {"pvcStorage":"1Gi","s3modelpath":"relative_s3_model_path/opt-125m", "awsEc2MetadataDisabled": true} + - Additional configuration for the init container + * - extraInit.pvcStorage + - string + - "50Gi" + - Storage size of the s3 + * - extraInit.s3modelpath + - string + - "relative_s3_model_path/opt-125m" + - Path of the model on the s3 which hosts model weights and config files + * - extraInit.awsEc2MetadataDisabled + - boolean + - true + - Disables the use of the Amazon EC2 instance metadata service + * - extraPorts + - list + - [] + - Additional ports configuration + * - gpuModels + - list + - ["TYPE_GPU_USED"] + - Type of gpu used + * - image + - object + - {"command":["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"],"repository":"vllm/vllm-openai","tag":"latest"} + - Image configuration + * - image.command + - list + - ["vllm","serve","/data/","--served-model-name","opt-125m","--host","0.0.0.0","--port","8000"] + - Container launch command + * - image.repository + - string + - "vllm/vllm-openai" + - Image repository + * - image.tag + - string + - "latest" + - Image tag + * - livenessProbe + - object + - {"failureThreshold":3,"httpGet":{"path":"/health","port":8000},"initialDelaySeconds":15,"periodSeconds":10} + - Liveness probe configuration + * - livenessProbe.failureThreshold + - int + - 3 + - Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not alive + * - livenessProbe.httpGet + - object + - {"path":"/health","port":8000} + - Configuration of the Kubelet http request on the server + * - livenessProbe.httpGet.path + - string + - "/health" + - Path to access on the HTTP server + * - livenessProbe.httpGet.port + - int + - 8000 + - Name or number of the port to access on the container, on which the server is listening + * - livenessProbe.initialDelaySeconds + - int + - 15 + - Number of seconds after the container has started before liveness probe is initiated + * - livenessProbe.periodSeconds + - int + - 10 + - How often (in seconds) to perform the liveness probe + * - maxUnavailablePodDisruptionBudget + - string + - "" + - Disruption Budget Configuration + * - readinessProbe + - object + - {"failureThreshold":3,"httpGet":{"path":"/health","port":8000},"initialDelaySeconds":5,"periodSeconds":5} + - Readiness probe configuration + * - readinessProbe.failureThreshold + - int + - 3 + - Number of times after which if a probe fails in a row, Kubernetes considers that the overall check has failed: the container is not ready + * - readinessProbe.httpGet + - object + - {"path":"/health","port":8000} + - Configuration of the Kubelet http request on the server + * - readinessProbe.httpGet.path + - string + - "/health" + - Path to access on the HTTP server + * - readinessProbe.httpGet.port + - int + - 8000 + - Name or number of the port to access on the container, on which the server is listening + * - readinessProbe.initialDelaySeconds + - int + - 5 + - Number of seconds after the container has started before readiness probe is initiated + * - readinessProbe.periodSeconds + - int + - 5 + - How often (in seconds) to perform the readiness probe + * - replicaCount + - int + - 1 + - Number of replicas + * - resources + - object + - {"limits":{"cpu":4,"memory":"16Gi","nvidia.com/gpu":1},"requests":{"cpu":4,"memory":"16Gi","nvidia.com/gpu":1}} + - Resource configuration + * - resources.limits."nvidia.com/gpu" + - int + - 1 + - Number of gpus used + * - resources.limits.cpu + - int + - 4 + - Number of CPUs + * - resources.limits.memory + - string + - "16Gi" + - CPU memory configuration + * - resources.requests."nvidia.com/gpu" + - int + - 1 + - Number of gpus used + * - resources.requests.cpu + - int + - 4 + - Number of CPUs + * - resources.requests.memory + - string + - "16Gi" + - CPU memory configuration + * - secrets + - object + - {} + - Secrets configuration + * - serviceName + - string + - + - Service name + * - servicePort + - int + - 80 + - Service port + * - labels.environment + - string + - test + - Environment name + * - labels.release + - string + - test + - Release name \ No newline at end of file