diff --git a/charts/lorax/templates/deployment.yaml b/charts/lorax/templates/deployment.yaml index 9ae096aca..1646e6d56 100644 --- a/charts/lorax/templates/deployment.yaml +++ b/charts/lorax/templates/deployment.yaml @@ -45,6 +45,8 @@ spec: - {{ .Values.deployment.args.maxBatchPrefillTokens | quote }} - --sharded - {{ .Values.deployment.args.sharded | quote }} + - --eager-prefill + - {{ .Values.deployment.args.eagerPrefill | quote }} env: - name: PORT value: "8000" diff --git a/charts/lorax/values.yaml b/charts/lorax/values.yaml index cffb9b80a..c9769484e 100644 --- a/charts/lorax/values.yaml +++ b/charts/lorax/values.yaml @@ -13,6 +13,7 @@ deployment: maxBatchTotalTokens: 4096 maxBatchPrefillTokens: 2048 sharded: false + eagerPrefill: false env: # Your huggingface hub token. Required for some models such as the llama-2 family.