Skip to content

Commit 7231134

Browse files
authored
doc: remove backend parameter for trtllm-bench when backend is set to… (#6428)
Signed-off-by: nv-guomingz <[email protected]>
1 parent f1086e7 commit 7231134

File tree

5 files changed

+4
-20
lines changed

5 files changed

+4
-20
lines changed

docs/source/blogs/Best_perf_practice_on_DeepSeek-R1_in_TensorRT-LLM.md

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,6 @@ export TRTLLM_ENABLE_PDL=1
149149
trtllm-bench --model nvidia/DeepSeek-R1-FP4 \
150150
throughput \
151151
--dataset $YOUR_DATA_PATH \
152-
--backend pytorch \
153152
--num_requests 10 \
154153
--concurrency 1 \
155154
--max_batch_size 1 \
@@ -161,7 +160,6 @@ trtllm-bench --model nvidia/DeepSeek-R1-FP4 \
161160
Explanation:
162161
- `trtllm-bench`: A CLI benchmarking utility that aims to make it easier for users to reproduce our officially published. See [TensorRT-LLM Benchmarking](https://nvidia.github.io/TensorRT-LLM/performance/perf-benchmarking.html) for details.
163162
- `--dataset`: Prompt dataset used to benchmark. Our official benchmark dataset has ISL = 1K, OSL = 2K
164-
- `--backend`: Inference backend. Here we use PyTorch backend.
165163
- `--num_requests`: Num requests used for the benchmark.
166164
- `--concurrency`: Total concurrency for the system.
167165
- `--max_batch_size`: Max batch size in each rank.
@@ -216,7 +214,6 @@ EOF
216214
trtllm-bench --model nvidia/DeepSeek-R1-0528-FP4
217215
throughput
218216
--dataset ${YOUR_DATA_PATH}
219-
--backend pytorch
220217
--tp 8 --ep 8
221218
--extra_llm_api_options ./extra-llm-api-config.yml
222219
--max_batch_size 896
@@ -285,7 +282,6 @@ trtllm-bench -m nvidia/DeepSeek-R1-FP4 \
285282
--ep 8 \
286283
--warmup 0 \
287284
--dataset ${YOUR_DATA_PATH} \
288-
--backend pytorch \
289285
--max_batch_size 384 \
290286
--max_num_tokens 1536 \
291287
--num_requests 49152 \
@@ -325,7 +321,6 @@ EOF
325321
trtllm-bench --model deepseek-ai/DeepSeek-R1 \
326322
throughput \
327323
--dataset $YOUR_DATA_PATH \
328-
--backend pytorch \
329324
--num_requests 10 \
330325
--max_batch_size 1 \
331326
--tp 8 \
@@ -380,7 +375,6 @@ trtllm-bench -m deepseek-ai/DeepSeek-R1 \
380375
--ep 8 \
381376
--warmup 0 \
382377
--dataset $YOUR_DATA_PATH \
383-
--backend pytorch \
384378
--max_batch_size 128 \
385379
--max_num_tokens 1151 \
386380
--num_requests 5120 \

docs/source/performance/perf-analysis.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,6 @@ TLLM_PROFILE_START_STOP=100-150 nsys profile \
8383
--model_path ${MODEL_PATH} \
8484
throughput \
8585
--dataset /tmp/dataset.txt --warmup 0 \
86-
--backend pytorch \
8786
--streaming
8887
```
8988

docs/source/performance/perf-benchmarking.md

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -438,7 +438,7 @@ for build heuristics.
438438
```
439439
440440
```shell
441-
trtllm-bench --model meta-llama/Llama-3.1-8B --model_path /Ckpt/Path/To/Llama-3.1-8B throughput --dataset /tmp/synthetic_128_128.txt --backend pytorch
441+
trtllm-bench --model meta-llama/Llama-3.1-8B --model_path /Ckpt/Path/To/Llama-3.1-8B throughput --dataset /tmp/synthetic_128_128.txt
442442
443443
# Example output
444444
<snip verbose logging>
@@ -544,7 +544,6 @@ lora_config:
544544
trtllm-bench --model /path/to/base/model \
545545
throughput \
546546
--dataset synthetic_lora_data.json \
547-
--backend pytorch \
548547
--extra_llm_api_options extra-llm-api-options.yaml
549548
```
550549
@@ -586,7 +585,6 @@ Run the benchmark:
586585
trtllm-bench --model Qwen/Qwen2-VL-2B-Instruct \
587586
throughput \
588587
--dataset mm_data.jsonl \
589-
--backend pytorch \
590588
--num_requests 10 \
591589
--max_batch_size 4 \
592590
--modality image

examples/models/core/deepseek_v3/README.md

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,6 @@ trtllm-bench -m deepseek-ai/DeepSeek-R1 --model_path ${DS_R1_NVFP4_MODEL_PATH} t
150150
--tp 8 --ep 8 \
151151
--warmup 0 \
152152
--dataset /tmp/benchmarking_64k.txt \
153-
--backend pytorch \
154153
--max_batch_size 12 \
155154
--max_num_tokens 65548 \
156155
--kv_cache_free_gpu_mem_fraction 0.6 \
@@ -179,7 +178,6 @@ trtllm-bench -m deepseek-ai/DeepSeek-R1 --model_path ${DS_R1_NVFP4_MODEL_PATH} t
179178
--tp 8 --ep 8 \
180179
--warmup 0 \
181180
--dataset /tmp/benchmarking_128k.txt \
182-
--backend pytorch \
183181
--max_batch_size 2 \
184182
--max_num_tokens 131074 \
185183
--kv_cache_free_gpu_mem_fraction 0.3 \
@@ -512,7 +510,7 @@ mpirun \
512510
-H <HOST1>:8,<HOST2>:8 \
513511
-mca plm_rsh_args "-p 2233" \
514512
--allow-run-as-root -n 16 \
515-
trtllm-llmapi-launch trtllm-bench --model deepseek-ai/DeepSeek-V3 --model_path /models/DeepSeek-V3 throughput --backend pytorch --max_batch_size 161 --max_num_tokens 1160 --dataset /workspace/tensorrt_llm/dataset_isl1000.txt --tp 16 --ep 8 --kv_cache_free_gpu_mem_fraction 0.95 --extra_llm_api_options /workspace/tensorrt_llm/extra-llm-api-config.yml --concurrency 4096 --streaming
513+
trtllm-llmapi-launch trtllm-bench --model deepseek-ai/DeepSeek-V3 --model_path /models/DeepSeek-V3 throughput --max_batch_size 161 --max_num_tokens 1160 --dataset /workspace/tensorrt_llm/dataset_isl1000.txt --tp 16 --ep 8 --kv_cache_free_gpu_mem_fraction 0.95 --extra_llm_api_options /workspace/tensorrt_llm/extra-llm-api-config.yml --concurrency 4096 --streaming
516514
```
517515

518516
#### Slurm
@@ -524,7 +522,7 @@ trtllm-llmapi-launch trtllm-bench --model deepseek-ai/DeepSeek-V3 --model_path /
524522
--container-image=<CONTAINER_IMG> \
525523
--container-mounts=/workspace:/workspace \
526524
--container-workdir /workspace \
527-
bash -c "trtllm-llmapi-launch trtllm-bench --model deepseek-ai/DeepSeek-V3 --model_path <YOUR_MODEL_DIR> throughput --backend pytorch --max_batch_size 161 --max_num_tokens 1160 --dataset /workspace/dataset.txt --tp 16 --ep 4 --kv_cache_free_gpu_mem_fraction 0.95 --extra_llm_api_options ./extra-llm-api-config.yml"
525+
bash -c "trtllm-llmapi-launch trtllm-bench --model deepseek-ai/DeepSeek-V3 --model_path <YOUR_MODEL_DIR> throughput --max_batch_size 161 --max_num_tokens 1160 --dataset /workspace/dataset.txt --tp 16 --ep 4 --kv_cache_free_gpu_mem_fraction 0.95 --extra_llm_api_options ./extra-llm-api-config.yml"
528526
```
529527

530528

@@ -592,7 +590,7 @@ DS_R1_NVFP4_MODEL_PATH=/path/to/DeepSeek-R1 # optional
592590
trtllm-llmapi-launch trtllm-bench \
593591
--model deepseek-ai/DeepSeek-R1 \
594592
--model_path $DS_R1_NVFP4_MODEL_PATH \
595-
throughput --backend pytorch \
593+
throughput \
596594
--num_requests 49152 \
597595
--max_batch_size 384 --max_num_tokens 1536 \
598596
--concurrency 3072 \
@@ -644,7 +642,6 @@ trtllm-bench \
644642
--model deepseek-ai/DeepSeek-V3 \
645643
--model_path /models/DeepSeek-V3 \
646644
throughput \
647-
--backend pytorch \
648645
--max_batch_size ${MAX_BATCH_SIZE} \
649646
--max_num_tokens ${MAX_NUM_TOKENS} \
650647
--dataset dataset.txt \
@@ -666,7 +663,6 @@ mpirun -H <HOST1>:8,<HOST2>:8 \
666663
--model deepseek-ai/DeepSeek-V3 \
667664
--model_path /models/DeepSeek-V3 \
668665
throughput \
669-
--backend pytorch \
670666
--max_batch_size ${MAX_BATCH_SIZE} \
671667
--max_num_tokens ${MAX_NUM_TOKENS} \
672668
--dataset dataset.txt \

examples/wide_ep/ep_load_balancer/README.md

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@ trtllm-bench --model ${MODEL_NAME} \
4141
--ep 32 \
4242
--extra_llm_api_options ./extra_llm_api_options.yaml \
4343
--kv_cache_free_gpu_mem_fraction 0.75 \
44-
--backend pytorch \
4544
--dataset ./dataset.json \
4645
--warmup 0 \
4746
--eos_id -1
@@ -133,7 +132,6 @@ trtllm-bench --model ${MODEL_NAME} \
133132
--ep 36 \
134133
--extra_llm_api_options ./extra_llm_api_options_eplb.yaml \
135134
--kv_cache_free_gpu_mem_fraction 0.75 \
136-
--backend pytorch \
137135
--dataset ./dataset.json \
138136
--warmup 0 \
139137
--eos_id -1
@@ -200,7 +198,6 @@ trtllm-bench --model ${MODEL_NAME} \
200198
--ep 36 \
201199
--extra_llm_api_options ./extra_llm_api_options_eplb.yaml \
202200
--kv_cache_free_gpu_mem_fraction 0.75 \
203-
--backend pytorch \
204201
--dataset ./dataset.json \
205202
--warmup 0 \
206203
--eos_id -1

0 commit comments

Comments
 (0)