From 7025918c813d08d718b8939f44f10651a0ff2c8c Mon Sep 17 00:00:00 2001 From: Philip Kiely - Baseten <98474633+philipkiely-baseten@users.noreply.github.com> Date: Wed, 20 Nov 2024 19:01:34 -0600 Subject: [PATCH] Update config.yaml (#381) Force rebuild --- llama/engine-llama-3-1-70b-instruct/README.md | 2 +- llama/engine-llama-3-1-70b-instruct/config.yaml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/llama/engine-llama-3-1-70b-instruct/README.md b/llama/engine-llama-3-1-70b-instruct/README.md index cd8c0db8..31237343 100644 --- a/llama/engine-llama-3-1-70b-instruct/README.md +++ b/llama/engine-llama-3-1-70b-instruct/README.md @@ -1,4 +1,4 @@ -# Llama 3.1 70B Instruct +# Llama 3.1 70B Instruct This deployment of Llama 3.1 70B Instruct uses the TensorRT-LLM Engine Builder. diff --git a/llama/engine-llama-3-1-70b-instruct/config.yaml b/llama/engine-llama-3-1-70b-instruct/config.yaml index 7b891c87..9058bc9d 100644 --- a/llama/engine-llama-3-1-70b-instruct/config.yaml +++ b/llama/engine-llama-3-1-70b-instruct/config.yaml @@ -42,3 +42,4 @@ trt_llm: num_builder_gpus: 4 quantization_type: fp8_kv tensor_parallel_count: 2 + enable_chunked_context: false