Skip to content

Commit

Permalink
update triton_model_repo and default config
Browse files Browse the repository at this point in the history
  • Loading branch information
yorickvP committed Aug 7, 2024
1 parent 56557e8 commit 4152b5a
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 3 deletions.
3 changes: 2 additions & 1 deletion configs/example_official_model_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,13 @@ instantiate:
max_queue_delay_microseconds: 100
max_attention_window_size: 4096
kv_cache_free_gpu_mem_fraction: 0.95
max_queue_size: 0


postprocessing:
args:
tokenizer_dir: /src/triton_model_repo/tensorrt_llm/1/
tokenizer_type: llama
tokenizer_type: auto
triton_max_batch_size: 64
postprocessing_instance_count: 64

Expand Down
2 changes: 1 addition & 1 deletion triton_model_repo/tensorrt_llm/config.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ model_transaction_policy {
dynamic_batching {
preferred_batch_size: [ 64 ]
max_queue_delay_microseconds: 100
default_queue_policy: { max_queue_size: ${max_queue_size} }
default_queue_policy: { max_queue_size: 0 }
}
input [
Expand Down
2 changes: 1 addition & 1 deletion triton_templates/tensorrt_llm/config.pbtxt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

name: "tensorrt_llm"
backend: "${triton_backend}"
backend: "tensorrtllm"
max_batch_size: ${triton_max_batch_size}

model_transaction_policy {
Expand Down

0 comments on commit 4152b5a

Please sign in to comment.