Skip to content

Commit f5b24a2

Browse files
committed
Update GitHub pages in root to v1.1.0rc1
1 parent fc07cd3 commit f5b24a2

File tree

214 files changed

+18573
-18314
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

214 files changed

+18573
-18314
lines changed

.buildinfo

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
# Sphinx build info version 1
22
# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done.
3-
config: 948e770da845dc8359196b2c84313b41
3+
config: b90210c5e973dc682de95f90231586fa
44
tags: 645f666f9bcd5a90fca523b33c5a78b7

_cpp_gen/executor.html

Lines changed: 7668 additions & 7661 deletions
Large diffs are not rendered by default.

_cpp_gen/runtime.html

Lines changed: 9108 additions & 9108 deletions
Large diffs are not rendered by default.

_downloads/b509390ba70e52fabb10dbd9d15d5118/attention.py

Lines changed: 137 additions & 130 deletions
Large diffs are not rendered by default.

_downloads/c68095123d889975e6e5e839a4241d22/model_engine.py

Lines changed: 138 additions & 119 deletions
Large diffs are not rendered by default.

_downloads/cba6509356738d5d6b4dcb3b7f52cf39/llm_args.py

Lines changed: 46 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1968,6 +1968,13 @@ class LoadFormat(Enum):
19681968
DUMMY = 1
19691969

19701970

1971+
class SamplerType(StrEnum):
1972+
"""Enum for sampler type options."""
1973+
TRTLLMSampler = "TRTLLMSampler"
1974+
TorchSampler = "TorchSampler"
1975+
auto = "auto"
1976+
1977+
19711978
class TorchCompileConfig(StrictBaseModel):
19721979
"""
19731980
Configuration for torch.compile.
@@ -1983,6 +1990,21 @@ class TorchCompileConfig(StrictBaseModel):
19831990
default=False,
19841991
description="Enable piecewise CUDA graph in torch.compile.")
19851992

1993+
capture_num_tokens: Optional[List[int]] = Field(
1994+
default=None,
1995+
description=
1996+
"List of num of tokens to capture the piecewise CUDA graph for. If not provided, the number of tokens will be the same as cuda_graph_config.batch_sizes."
1997+
)
1998+
1999+
@field_validator('capture_num_tokens')
2000+
@classmethod
2001+
def validate_capture_num_tokens(cls, v):
2002+
if v is None:
2003+
return v
2004+
if any(t <= 0 for t in v):
2005+
raise ValueError("capture_num_tokens must contain positive ints.")
2006+
return sorted(set(v), reverse=True)
2007+
19862008
enable_userbuffers: bool = Field(
19872009
default=True,
19882010
description=
@@ -2055,11 +2077,11 @@ class TorchLlmArgs(BaseLlmArgs):
20552077
"If true, will iterate over sampling_params of each request and use the corresponding sampling strategy, e.g. top-k, top-p, etc.",
20562078
status="beta")
20572079

2058-
use_torch_sampler: bool = Field(
2059-
default=False,
2080+
sampler_type: Union[str, SamplerType] = Field(
2081+
default=SamplerType.auto,
20602082
description=
2061-
"If true, will use the Torch sampler instead of the TRTLLM sampler.",
2062-
status="beta")
2083+
"The type of sampler to use. Options are TRTLLMSampler, TorchSampler or auto. Defaults to auto, which will use TorchSampler unless BeamSearch is requested.",
2084+
status="prototype")
20632085

20642086
enable_iter_perf_stats: bool = Field(
20652087
default=False,
@@ -2076,6 +2098,12 @@ class TorchLlmArgs(BaseLlmArgs):
20762098
description="Print iteration logs.",
20772099
status="beta")
20782100

2101+
batch_wait_timeout_ms: float = Field(
2102+
default=0,
2103+
description=
2104+
"If greater than 0, the request queue might wait up to batch_wait_timeout_ms to receive max_batch_size requests, if fewer than max_batch_size requests are currently available. If 0, no waiting occurs.",
2105+
status="prototype")
2106+
20792107
torch_compile_config: Optional[TorchCompileConfig] = Field(
20802108
default=None, description="Torch compile config.", status="prototype")
20812109

@@ -2322,6 +2350,13 @@ def validate_attention_dp_config(self) -> 'TorchLlmArgs':
23222350
)
23232351
return self
23242352

2353+
@model_validator(mode='after')
2354+
def validate_batch_wait_timeout_ms(self) -> 'TorchLlmArgs':
2355+
"""Validate batch wait timeout."""
2356+
if self.batch_wait_timeout_ms < 0:
2357+
raise ValueError("batch_wait_timeout_ms must be greater than 0")
2358+
return self
2359+
23252360
# TODO: Remove this after the PyTorch backend is fully migrated to TorchLlmArgs from ExecutorConfig
23262361
def get_pytorch_backend_config(self) -> "PyTorchConfig":
23272362
from tensorrt_llm._torch.pyexecutor.config import PyTorchConfig
@@ -2344,7 +2379,7 @@ def get_pytorch_backend_config(self) -> "PyTorchConfig":
23442379
attn_backend=self.attn_backend,
23452380
moe_backend=self.moe_config.backend,
23462381
enable_mixed_sampler=self.enable_mixed_sampler,
2347-
use_torch_sampler=self.use_torch_sampler,
2382+
sampler_type=self.sampler_type,
23482383
kv_cache_dtype=self.kv_cache_config.dtype,
23492384
mamba_ssm_cache_dtype=self.kv_cache_config.mamba_ssm_cache_dtype,
23502385
enable_iter_perf_stats=self.enable_iter_perf_stats,
@@ -2361,6 +2396,10 @@ def get_pytorch_backend_config(self) -> "PyTorchConfig":
23612396
enable_piecewise_cuda_graph
23622397
if self.torch_compile_config is not None else TorchCompileConfig.
23632398
model_fields['enable_piecewise_cuda_graph'].default,
2399+
torch_compile_piecewise_cuda_graph_num_tokens=self.
2400+
torch_compile_config.capture_num_tokens
2401+
if self.torch_compile_config is not None else
2402+
TorchCompileConfig.model_fields['capture_num_tokens'].default,
23642403
torch_compile_enable_userbuffers=self.torch_compile_config.
23652404
enable_userbuffers if self.torch_compile_config is not None else
23662405
TorchCompileConfig.model_fields['enable_userbuffers'].default,
@@ -2383,7 +2422,8 @@ def get_pytorch_backend_config(self) -> "PyTorchConfig":
23832422
AttentionDpConfig.model_fields['timeout_iters'].default,
23842423
attention_dp_batching_wait_iters=self.attention_dp_config.
23852424
batching_wait_iters if self.attention_dp_config is not None else
2386-
AttentionDpConfig.model_fields['batching_wait_iters'].default)
2425+
AttentionDpConfig.model_fields['batching_wait_iters'].default,
2426+
batch_wait_timeout_ms=self.batch_wait_timeout_ms)
23872427

23882428

23892429
def update_llm_args_with_extra_dict(

_modules/index.html

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
<script>
5959
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
6060
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
61-
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc0';
61+
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc1';
6262
DOCUMENTATION_OPTIONS.show_version_warning_banner =
6363
false;
6464
</script>
@@ -68,7 +68,7 @@
6868

6969
<meta name="viewport" content="width=device-width, initial-scale=1"/>
7070
<meta name="docsearch:language" content="en"/>
71-
<meta name="docsearch:version" content="1.1.0rc0" />
71+
<meta name="docsearch:version" content="1.1.0rc1" />
7272

7373

7474
</head>
@@ -688,9 +688,9 @@ <h1>All modules for which code is available</h1>
688688
<div class="footer-item">
689689
<div class="extra_footer">
690690

691-
<p>Last updated on August 15, 2025.</p>
691+
<p>Last updated on August 19, 2025.</p>
692692

693-
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/26f413a">26f413a</a>.</p>
693+
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/7334f93">7334f93</a>.</p>
694694

695695
</div></div>
696696

_modules/tensorrt_llm/builder.html

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
<script>
5959
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
6060
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
61-
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc0';
61+
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc1';
6262
DOCUMENTATION_OPTIONS.show_version_warning_banner =
6363
false;
6464
</script>
@@ -68,7 +68,7 @@
6868

6969
<meta name="viewport" content="width=device-width, initial-scale=1"/>
7070
<meta name="docsearch:language" content="en"/>
71-
<meta name="docsearch:version" content="1.1.0rc0" />
71+
<meta name="docsearch:version" content="1.1.0rc1" />
7272

7373

7474
</head>
@@ -2038,9 +2038,9 @@ <h1>Source code for tensorrt_llm.builder</h1><div class="highlight"><pre>
20382038
<div class="footer-item">
20392039
<div class="extra_footer">
20402040

2041-
<p>Last updated on August 15, 2025.</p>
2041+
<p>Last updated on August 19, 2025.</p>
20422042

2043-
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/26f413a">26f413a</a>.</p>
2043+
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/7334f93">7334f93</a>.</p>
20442044

20452045
</div></div>
20462046

_modules/tensorrt_llm/disaggregated_params.html

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
<script>
5959
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
6060
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
61-
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc0';
61+
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc1';
6262
DOCUMENTATION_OPTIONS.show_version_warning_banner =
6363
false;
6464
</script>
@@ -68,7 +68,7 @@
6868

6969
<meta name="viewport" content="width=device-width, initial-scale=1"/>
7070
<meta name="docsearch:language" content="en"/>
71-
<meta name="docsearch:version" content="1.1.0rc0" />
71+
<meta name="docsearch:version" content="1.1.0rc1" />
7272

7373

7474
</head>
@@ -673,9 +673,9 @@ <h1>Source code for tensorrt_llm.disaggregated_params</h1><div class="highlight"
673673
<div class="footer-item">
674674
<div class="extra_footer">
675675

676-
<p>Last updated on August 15, 2025.</p>
676+
<p>Last updated on August 19, 2025.</p>
677677

678-
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/26f413a">26f413a</a>.</p>
678+
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/7334f93">7334f93</a>.</p>
679679

680680
</div></div>
681681

_modules/tensorrt_llm/executor/result.html

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858
<script>
5959
DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
6060
DOCUMENTATION_OPTIONS.theme_switcher_json_url = './_static/switcher.json';
61-
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc0';
61+
DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.1.0rc1';
6262
DOCUMENTATION_OPTIONS.show_version_warning_banner =
6363
false;
6464
</script>
@@ -68,7 +68,7 @@
6868

6969
<meta name="viewport" content="width=device-width, initial-scale=1"/>
7070
<meta name="docsearch:language" content="en"/>
71-
<meta name="docsearch:version" content="1.1.0rc0" />
71+
<meta name="docsearch:version" content="1.1.0rc1" />
7272

7373

7474
</head>
@@ -671,6 +671,9 @@ <h1>Source code for tensorrt_llm.executor.result</h1><div class="highlight"><pre
671671
<span class="bp">self</span><span class="o">.</span><span class="n">postproc_params</span> <span class="o">=</span> <span class="n">postproc_params</span>
672672
<span class="bp">self</span><span class="o">.</span><span class="n">disaggregated_params</span> <span class="o">=</span> <span class="kc">None</span>
673673
<span class="bp">self</span><span class="o">.</span><span class="n">decoding_iter</span> <span class="o">=</span> <span class="mi">0</span>
674+
<span class="c1"># Average decoded tokens per runtime iteration; set when the first LLM response arrives.</span>
675+
<span class="c1"># None indicates not yet available (e.g., before first step/stream).</span>
676+
<span class="bp">self</span><span class="o">.</span><span class="n">avg_decoded_tokens_per_iter</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="nb">float</span><span class="p">]</span> <span class="o">=</span> <span class="kc">None</span>
674677
<span class="bp">self</span><span class="o">.</span><span class="n">_done</span> <span class="o">=</span> <span class="kc">False</span>
675678
<span class="bp">self</span><span class="o">.</span><span class="n">metrics_dict</span> <span class="o">=</span> <span class="p">{}</span>
676679

@@ -844,6 +847,7 @@ <h1>Source code for tensorrt_llm.executor.result</h1><div class="highlight"><pre
844847
<span class="bp">self</span><span class="o">.</span><span class="n">_done</span> <span class="o">=</span> <span class="n">response_result</span><span class="o">.</span><span class="n">is_final</span>
845848
<span class="n">context_phase_params</span> <span class="o">=</span> <span class="n">response_result</span><span class="o">.</span><span class="n">context_phase_params</span>
846849
<span class="bp">self</span><span class="o">.</span><span class="n">decoding_iter</span> <span class="o">=</span> <span class="n">response_result</span><span class="o">.</span><span class="n">decoding_iter</span>
850+
<span class="bp">self</span><span class="o">.</span><span class="n">avg_decoded_tokens_per_iter</span> <span class="o">=</span> <span class="n">response_result</span><span class="o">.</span><span class="n">avg_decoded_tokens_per_iter</span>
847851
<span class="k">if</span> <span class="n">context_phase_params</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span>
848852
<span class="bp">self</span><span class="o">.</span><span class="n">disaggregated_params</span> <span class="o">=</span> <span class="n">DisaggregatedParams</span><span class="p">(</span>
849853
<span class="n">request_type</span><span class="o">=</span><span class="s2">&quot;context_only&quot;</span><span class="p">,</span>
@@ -1385,9 +1389,9 @@ <h1>Source code for tensorrt_llm.executor.result</h1><div class="highlight"><pre
13851389
<div class="footer-item">
13861390
<div class="extra_footer">
13871391

1388-
<p>Last updated on August 15, 2025.</p>
1392+
<p>Last updated on August 19, 2025.</p>
13891393

1390-
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/26f413a">26f413a</a>.</p>
1394+
<p>This page is generated by TensorRT-LLM commit <a href="https://github.com/NVIDIA/TensorRT-LLM/tree/7334f93">7334f93</a>.</p>
13911395

13921396
</div></div>
13931397

0 commit comments

Comments
 (0)