Skip to content

Commit fd96a08

Browse files
committed
rebase
Signed-off-by: junq <[email protected]>
2 parents d043821 + 7e2521a commit fd96a08

File tree

282 files changed

+2448
-1373
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

282 files changed

+2448
-1373
lines changed

README.md

Lines changed: 36 additions & 36 deletions
Large diffs are not rendered by default.

benchmarks/cpp/bertBenchmark.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,7 @@ void benchmarkBert(std::string const& modelName, std::filesystem::path const& da
135135

136136
int main(int argc, char* argv[])
137137
{
138-
cxxopts::Options options("TensorRT-LLM C++ Runtime Benchmark", "TensorRT-LLM C++ Runtime Benchmark for BERT.");
138+
cxxopts::Options options("TensorRT LLM C++ Runtime Benchmark", "TensorRT LLM C++ Runtime Benchmark for BERT.");
139139
options.add_options()("h,help", "Print usage");
140140
options.add_options()(
141141
"m,model", "Model name specified for engines.", cxxopts::value<std::string>()->default_value("bert_base"));

benchmarks/cpp/disaggServerBenchmark.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1146,7 +1146,7 @@ void benchmark(std::vector<std::filesystem::path> const& contextEngineDirs,
11461146
int main(int argc, char* argv[])
11471147

11481148
{
1149-
cxxopts::Options options("TensorRT-LLm DisaggServer Benchmark");
1149+
cxxopts::Options options("TensorRT LLM DisaggServer Benchmark");
11501150
options.add_options()("h,help", "Print usage");
11511151
options.add_options()("context_engine_dirs", "Directories that store context engines,separator is a ,",
11521152
cxxopts::value<std::vector<std::string>>());

benchmarks/cpp/gptManagerBenchmark.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1056,7 +1056,7 @@ void benchmarkExecutor(std::optional<std::filesystem::path> const& decoderEngine
10561056
int main(int argc, char* argv[])
10571057
{
10581058
cxxopts::Options options(
1059-
"TensorRT-LLM BatchManager Benchmark", "TensorRT-LLM BatchManager Benchmark for GPT and GPT-like models.");
1059+
"TensorRT LLM BatchManager Benchmark", "TensorRT LLM BatchManager Benchmark for GPT and GPT-like models.");
10601060
options.add_options()("h,help", "Print usage");
10611061
options.add_options()("engine_dir, decoder_engine_dir", "Directory that store the engines of decoder models.",
10621062
cxxopts::value<std::string>());

cpp/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ else()
6868
message(STATUS "NVTX is enabled")
6969
endif()
7070

71-
# Add TensorRT-LLM Gen export interface and CUDA support
71+
# Add TensorRT LLM Gen export interface and CUDA support
7272
add_compile_definitions("TLLM_GEN_EXPORT_INTERFACE")
7373
add_compile_definitions("TLLM_ENABLE_CUDA")
7474

@@ -138,9 +138,9 @@ execute_process(
138138
OUTPUT_STRIP_TRAILING_WHITESPACE)
139139

140140
if(TRTLLM_VERSION_RESULT EQUAL 0)
141-
message(STATUS "TensorRT-LLM version: ${TRTLLM_VERSION}")
141+
message(STATUS "TensorRT LLM version: ${TRTLLM_VERSION}")
142142
else()
143-
message(FATAL_ERROR "Failed to determine Tensorrt-LLM version")
143+
message(FATAL_ERROR "Failed to determine TensorRT LLM version")
144144
endif()
145145

146146
configure_file(

cpp/cmake/modules/cuda_configuration.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ function(setup_cuda_architectures)
116116
unset(CMAKE_CUDA_ARCHITECTURES_RAW)
117117
message(
118118
STATUS
119-
"Setting CMAKE_CUDA_ARCHITECTURES to all enables all architectures TensorRT-LLM optimized for, "
119+
"Setting CMAKE_CUDA_ARCHITECTURES to all enables all architectures TensorRT LLM optimized for, "
120120
"not all architectures CUDA compiler supports.")
121121
elseif(CMAKE_CUDA_ARCHITECTURES_RAW STREQUAL "all-major")
122122
message(

cpp/include/tensorrt_llm/deep_gemm/compiler.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ std::vector<std::filesystem::path> getJitIncludeDirs()
217217
}
218218
else
219219
{
220-
TLLM_LOG_WARNING("Failed to find TensorRT-LLM installation, DeepGEMM will be disabled.");
220+
TLLM_LOG_WARNING("Failed to find TensorRT LLM installation, DeepGEMM will be disabled.");
221221
}
222222
}
223223
return includeDirs;

cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ CacheTransceiver::CacheTransceiver(kv_cache_manager::BaseKVCacheManager* cacheMa
174174
{
175175
void* ret = dllGetSym(handle, name);
176176
TLLM_CHECK_WITH_INFO(ret != nullptr,
177-
"Unable to load UCX wrapper library symbol, possible cause is that TensorRT-LLM library is not "
177+
"Unable to load UCX wrapper library symbol, possible cause is that TensorRT LLM library is not "
178178
"built with UCX support, please rebuild in UCX-enabled environment.");
179179
return ret;
180180
};

cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -618,12 +618,12 @@ BlockManager::BlockManager(std::vector<SizeType32> const& numKvHeadsPerLayer, Si
618618
mAbsolutePoolToWindowSize.push_back(windowSize);
619619
mAbsolutePoolToRelativePoolIndex.push_back(i);
620620
}
621-
// (eop) SWA allocates blocks linearly, and we need as many blocks as full attention,
621+
// SWA allocates blocks linearly, and we need as many blocks as full attention,
622622
// where full attention has windowSize = maxSequenceLength.
623623
auto const maxTokenNum = std::max(windowSize, maxSequenceLength) + sinkBubbleLength;
624624
auto const temporaryAttentionWindow = manager.calculateTemporaryAttentionWindow(tempAttentionWindowInputs);
625625
// Consider the temporaryAttentionWindow when allocating blocks.
626-
// (eop) Current tempAttentionWindow calculation does not consider the
626+
// Current tempAttentionWindow calculation does not consider the
627627
// concept of SWA right now at most occupying maxSequenceLength of
628628
// blocks. So the calculation of maxToken + tempAttention will exceed
629629
// maxSequenceLength. A temporary resolution here is to cap the

cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/fp4_gemm_template.h

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ size_t dispatchNVFP4xNVFP4GemmClusterShapeSm10x(T* D, void const* A, void const*
106106
break;
107107
default:
108108
throw std::runtime_error(
109-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
109+
"[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
110110
break;
111111
}
112112
}
@@ -187,7 +187,7 @@ size_t dispatchNVFP4xNVFP4GemmClusterShapeSm120(T* D, void const* A, void const*
187187
break;
188188
default:
189189
throw std::runtime_error(
190-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
190+
"[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
191191
break;
192192
}
193193
}
@@ -215,16 +215,16 @@ size_t dispatchNVFP4xNVFP4GemmCTAShapeSm120(T* D, void const* A, void const* B,
215215
occupancy);
216216
break;
217217
case tkc::CutlassTileConfigSM120::Undefined:
218-
throw std::runtime_error("[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config undefined.");
218+
throw std::runtime_error("[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config undefined.");
219219
break;
220220
case tkc::CutlassTileConfigSM120::ChooseWithHeuristic:
221221
throw std::runtime_error(
222-
"[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config should have already been set by "
222+
"[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config should have already been set by "
223223
"heuristic.");
224224
break;
225225
default:
226226
throw std::runtime_error(
227-
"[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
227+
"[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
228228
break;
229229
}
230230
}
@@ -267,7 +267,7 @@ size_t dispatchMXFP8xMXFP4GemmClusterShapeSm100(T* D, void const* A, void const*
267267
break;
268268
default:
269269
throw std::runtime_error(
270-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
270+
"[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
271271
break;
272272
}
273273
}
@@ -303,15 +303,15 @@ size_t dispatchMXFP8xMXFP4GemmCTAShapeSm100(T* D, void const* A, void const* B,
303303
occupancy);
304304
break;
305305
case tkc::CutlassTileConfigSM100::Undefined:
306-
throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
306+
throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
307307
break;
308308
case tkc::CutlassTileConfigSM100::ChooseWithHeuristic:
309309
throw std::runtime_error(
310-
"[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "
310+
"[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "
311311
"heuristic.");
312312
break;
313313
default:
314-
throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
314+
throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
315315
break;
316316
}
317317
}
@@ -348,7 +348,7 @@ size_t CutlassFp4GemmRunner<T, fp4GemmType>::dispatchToArch(T* D, void const* A,
348348
else
349349
{
350350
throw std::runtime_error(
351-
"[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
351+
"[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
352352
}
353353
}
354354
else if constexpr (fp4GemmType == FP4GemmType::W4A4_NVFP4_NVFP4)
@@ -376,13 +376,13 @@ size_t CutlassFp4GemmRunner<T, fp4GemmType>::dispatchToArch(T* D, void const* A,
376376
else
377377
{
378378
throw std::runtime_error(
379-
"[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
379+
"[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
380380
}
381381
}
382382
else
383383
{
384384
throw std::runtime_error(
385-
"[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] FP4 Gemm type unsupported for CUTLASS FP4 GEMM");
385+
"[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] FP4 Gemm type unsupported for CUTLASS FP4 GEMM");
386386
}
387387
}
388388

0 commit comments

Comments
 (0)