@@ -150,7 +150,6 @@ trtllm-bench -m deepseek-ai/DeepSeek-R1 --model_path ${DS_R1_NVFP4_MODEL_PATH} t
150150 --tp 8 --ep 8 \
151151 --warmup 0 \
152152 --dataset /tmp/benchmarking_64k.txt \
153- --backend pytorch \
154153 --max_batch_size 12 \
155154 --max_num_tokens 65548 \
156155 --kv_cache_free_gpu_mem_fraction 0.6 \
@@ -179,7 +178,6 @@ trtllm-bench -m deepseek-ai/DeepSeek-R1 --model_path ${DS_R1_NVFP4_MODEL_PATH} t
179178 --tp 8 --ep 8 \
180179 --warmup 0 \
181180 --dataset /tmp/benchmarking_128k.txt \
182- --backend pytorch \
183181 --max_batch_size 2 \
184182 --max_num_tokens 131074 \
185183 --kv_cache_free_gpu_mem_fraction 0.3 \
@@ -512,7 +510,7 @@ mpirun \
512510-H < HOST1> :8,< HOST2> :8 \
513511-mca plm_rsh_args " -p 2233" \
514512--allow-run-as-root -n 16 \
515- trtllm-llmapi-launch trtllm-bench --model deepseek-ai/DeepSeek-V3 --model_path /models/DeepSeek-V3 throughput --backend pytorch -- max_batch_size 161 --max_num_tokens 1160 --dataset /workspace/tensorrt_llm/dataset_isl1000.txt --tp 16 --ep 8 --kv_cache_free_gpu_mem_fraction 0.95 --extra_llm_api_options /workspace/tensorrt_llm/extra-llm-api-config.yml --concurrency 4096 --streaming
513+ trtllm-llmapi-launch trtllm-bench --model deepseek-ai/DeepSeek-V3 --model_path /models/DeepSeek-V3 throughput --max_batch_size 161 --max_num_tokens 1160 --dataset /workspace/tensorrt_llm/dataset_isl1000.txt --tp 16 --ep 8 --kv_cache_free_gpu_mem_fraction 0.95 --extra_llm_api_options /workspace/tensorrt_llm/extra-llm-api-config.yml --concurrency 4096 --streaming
516514```
517515
518516#### Slurm
@@ -524,7 +522,7 @@ trtllm-llmapi-launch trtllm-bench --model deepseek-ai/DeepSeek-V3 --model_path /
524522 --container-image=< CONTAINER_IMG> \
525523 --container-mounts=/workspace:/workspace \
526524 --container-workdir /workspace \
527- bash -c " trtllm-llmapi-launch trtllm-bench --model deepseek-ai/DeepSeek-V3 --model_path <YOUR_MODEL_DIR> throughput --backend pytorch -- max_batch_size 161 --max_num_tokens 1160 --dataset /workspace/dataset.txt --tp 16 --ep 4 --kv_cache_free_gpu_mem_fraction 0.95 --extra_llm_api_options ./extra-llm-api-config.yml"
525+ bash -c " trtllm-llmapi-launch trtllm-bench --model deepseek-ai/DeepSeek-V3 --model_path <YOUR_MODEL_DIR> throughput --max_batch_size 161 --max_num_tokens 1160 --dataset /workspace/dataset.txt --tp 16 --ep 4 --kv_cache_free_gpu_mem_fraction 0.95 --extra_llm_api_options ./extra-llm-api-config.yml"
528526```
529527
530528
@@ -592,7 +590,7 @@ DS_R1_NVFP4_MODEL_PATH=/path/to/DeepSeek-R1 # optional
592590trtllm-llmapi-launch trtllm-bench \
593591 --model deepseek-ai/DeepSeek-R1 \
594592 --model_path $DS_R1_NVFP4_MODEL_PATH \
595- throughput --backend pytorch \
593+ throughput \
596594 --num_requests 49152 \
597595 --max_batch_size 384 --max_num_tokens 1536 \
598596 --concurrency 3072 \
@@ -644,7 +642,6 @@ trtllm-bench \
644642 --model deepseek-ai/DeepSeek-V3 \
645643 --model_path /models/DeepSeek-V3 \
646644 throughput \
647- --backend pytorch \
648645 --max_batch_size ${MAX_BATCH_SIZE} \
649646 --max_num_tokens ${MAX_NUM_TOKENS} \
650647 --dataset dataset.txt \
@@ -666,7 +663,6 @@ mpirun -H <HOST1>:8,<HOST2>:8 \
666663 --model deepseek-ai/DeepSeek-V3 \
667664 --model_path /models/DeepSeek-V3 \
668665 throughput \
669- --backend pytorch \
670666 --max_batch_size ${MAX_BATCH_SIZE} \
671667 --max_num_tokens ${MAX_NUM_TOKENS} \
672668 --dataset dataset.txt \
0 commit comments