File tree Expand file tree Collapse file tree 2 files changed +10
-10
lines changed Expand file tree Collapse file tree 2 files changed +10
-10
lines changed Original file line number Diff line number Diff line change 1- from tensorrt_llm import LLM , SamplingParams
1+ from tensorrt_llm import BuildConfig , SamplingParams
2+ from tensorrt_llm ._tensorrt_engine import LLM # NOTE the change
23
34
45def main ():
56
7+ build_config = BuildConfig ()
8+ build_config .max_batch_size = 256
9+ build_config .max_num_tokens = 1024
10+
611 # Model could accept HF model name, a path to local HF model,
712 # or TensorRT Model Optimizer's quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.
8- llm = LLM (model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" )
13+ llm = LLM (model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" ,
14+ build_config = build_config )
915
1016 # Sample prompts.
1117 prompts = [
Original file line number Diff line number Diff line change 1- from tensorrt_llm import BuildConfig , SamplingParams
2- from tensorrt_llm ._tensorrt_engine import LLM # NOTE the change
1+ from tensorrt_llm import LLM , SamplingParams
32
43
54def main ():
65
7- build_config = BuildConfig ()
8- build_config .max_batch_size = 256
9- build_config .max_num_tokens = 1024
10-
116 # Model could accept HF model name, a path to local HF model,
127 # or TensorRT Model Optimizer's quantized checkpoints like nvidia/Llama-3.1-8B-Instruct-FP8 on HF.
13- llm = LLM (model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" ,
14- build_config = build_config )
8+ llm = LLM (model = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" )
159
1610 # Sample prompts.
1711 prompts = [
You can’t perform that action at this time.
0 commit comments