update triton_model_repo and default config

replicate · Aug 7, 2024 · 4152b5a · 4152b5a
1 parent 56557e8
commit 4152b5a
Show file tree

Hide file tree

Showing 3 changed files with 4 additions and 3 deletions.
diff --git a/configs/example_official_model_config.yaml b/configs/example_official_model_config.yaml
@@ -28,12 +28,13 @@ instantiate:
         max_queue_delay_microseconds: 100
         max_attention_window_size: 4096
         kv_cache_free_gpu_mem_fraction: 0.95
+        max_queue_size: 0
 
 
     postprocessing:
       args:
         tokenizer_dir: /src/triton_model_repo/tensorrt_llm/1/
-        tokenizer_type: llama
+        tokenizer_type: auto
         triton_max_batch_size: 64
         postprocessing_instance_count: 64
 

diff --git a/triton_model_repo/tensorrt_llm/config.pbtxt b/triton_model_repo/tensorrt_llm/config.pbtxt
@@ -35,7 +35,7 @@ model_transaction_policy {
 dynamic_batching {
     preferred_batch_size: [ 64 ]
     max_queue_delay_microseconds: 100
-    default_queue_policy: { max_queue_size: ${max_queue_size} }
+    default_queue_policy: { max_queue_size: 0 }
 }
 
 input [

diff --git a/triton_templates/tensorrt_llm/config.pbtxt b/triton_templates/tensorrt_llm/config.pbtxt
@@ -25,7 +25,7 @@
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 name: "tensorrt_llm"
-backend: "${triton_backend}"
+backend: "tensorrtllm"
 max_batch_size: ${triton_max_batch_size}
 
 model_transaction_policy {