From 15a38d5d0e8e88f58b29d1b47ba82ad861d36f55 Mon Sep 17 00:00:00 2001 From: Binoy Dalal Date: Fri, 26 Jul 2024 12:31:57 -0400 Subject: [PATCH] Allow `eager_prefill` to be set in Helm chart (#557) --- charts/lorax/templates/deployment.yaml | 2 ++ charts/lorax/values.yaml | 1 + 2 files changed, 3 insertions(+) diff --git a/charts/lorax/templates/deployment.yaml b/charts/lorax/templates/deployment.yaml index 9ae096aca..1646e6d56 100644 --- a/charts/lorax/templates/deployment.yaml +++ b/charts/lorax/templates/deployment.yaml @@ -45,6 +45,8 @@ spec: - {{ .Values.deployment.args.maxBatchPrefillTokens | quote }} - --sharded - {{ .Values.deployment.args.sharded | quote }} + - --eager-prefill + - {{ .Values.deployment.args.eagerPrefill | quote }} env: - name: PORT value: "8000" diff --git a/charts/lorax/values.yaml b/charts/lorax/values.yaml index cffb9b80a..c9769484e 100644 --- a/charts/lorax/values.yaml +++ b/charts/lorax/values.yaml @@ -13,6 +13,7 @@ deployment: maxBatchTotalTokens: 4096 maxBatchPrefillTokens: 2048 sharded: false + eagerPrefill: false env: # Your huggingface hub token. Required for some models such as the llama-2 family.