QiJune
diff --git a/‎README.md‎
Lines changed: 36 additions & 36 deletions b/‎README.md‎
Lines changed: 36 additions & 36 deletions
diff --git a/‎benchmarks/cpp/bertBenchmark.cpp‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/cpp/bertBenchmark.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/cpp/disaggServerBenchmark.cpp‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/cpp/disaggServerBenchmark.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎benchmarks/cpp/gptManagerBenchmark.cpp‎
Lines changed: 1 addition & 1 deletion b/‎benchmarks/cpp/gptManagerBenchmark.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cpp/CMakeLists.txt‎
Lines changed: 3 additions & 3 deletions b/‎cpp/CMakeLists.txt‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎cpp/cmake/modules/cuda_configuration.cmake‎
Lines changed: 1 addition & 1 deletion b/‎cpp/cmake/modules/cuda_configuration.cmake‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cpp/include/tensorrt_llm/deep_gemm/compiler.cuh‎
Lines changed: 1 addition & 1 deletion b/‎cpp/include/tensorrt_llm/deep_gemm/compiler.cuh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp‎
Lines changed: 1 addition & 1 deletion b/‎cpp/tensorrt_llm/batch_manager/cacheTransceiver.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp‎
Lines changed: 2 additions & 2 deletions b/‎cpp/tensorrt_llm/batch_manager/kvCacheManager.cpp‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/fp4_gemm_template.h‎
Lines changed: 12 additions & 12 deletions b/‎cpp/tensorrt_llm/kernels/cutlass_kernels/fp4_gemm/fp4_gemm_template.h‎
Lines changed: 12 additions & 12 deletions
@@ -135,7 +135,7 @@ void benchmarkBert(std::string const& modelName, std::filesystem::path const& da
 
 int main(int argc, char* argv[])
 {
-    cxxopts::Options options("TensorRT-LLM C++ Runtime Benchmark", "TensorRT-LLM C++ Runtime Benchmark for BERT.");
+    cxxopts::Options options("TensorRT LLM C++ Runtime Benchmark", "TensorRT LLM C++ Runtime Benchmark for BERT.");
     options.add_options()("h,help", "Print usage");
     options.add_options()(
         "m,model", "Model name specified for engines.", cxxopts::value<std::string>()->default_value("bert_base"));
 
@@ -1146,7 +1146,7 @@ void benchmark(std::vector<std::filesystem::path> const& contextEngineDirs,
 int main(int argc, char* argv[])
 
 {
-    cxxopts::Options options("TensorRT-LLm DisaggServer Benchmark");
+    cxxopts::Options options("TensorRT LLM DisaggServer Benchmark");
     options.add_options()("h,help", "Print usage");
     options.add_options()("context_engine_dirs", "Directories that store context engines,separator is a ,",
         cxxopts::value<std::vector<std::string>>());
 
@@ -1056,7 +1056,7 @@ void benchmarkExecutor(std::optional<std::filesystem::path> const& decoderEngine
 int main(int argc, char* argv[])
 {
     cxxopts::Options options(
-        "TensorRT-LLM BatchManager Benchmark", "TensorRT-LLM BatchManager Benchmark for GPT and GPT-like models.");
+        "TensorRT LLM BatchManager Benchmark", "TensorRT LLM BatchManager Benchmark for GPT and GPT-like models.");
     options.add_options()("h,help", "Print usage");
     options.add_options()("engine_dir, decoder_engine_dir", "Directory that store the engines of decoder models.",
         cxxopts::value<std::string>());
 
@@ -68,7 +68,7 @@ else()
   message(STATUS "NVTX is enabled")
 endif()
 
-# Add TensorRT-LLM Gen export interface and CUDA support
+# Add TensorRT LLM Gen export interface and CUDA support
 add_compile_definitions("TLLM_GEN_EXPORT_INTERFACE")
 add_compile_definitions("TLLM_ENABLE_CUDA")
 
@@ -138,9 +138,9 @@ execute_process(
   OUTPUT_STRIP_TRAILING_WHITESPACE)
 
 if(TRTLLM_VERSION_RESULT EQUAL 0)
-  message(STATUS "TensorRT-LLM version: ${TRTLLM_VERSION}")
+  message(STATUS "TensorRT LLM version: ${TRTLLM_VERSION}")
 else()
-  message(FATAL_ERROR "Failed to determine Tensorrt-LLM version")
+  message(FATAL_ERROR "Failed to determine TensorRT LLM version")
 endif()
 
 configure_file(
 
@@ -116,7 +116,7 @@ function(setup_cuda_architectures)
     unset(CMAKE_CUDA_ARCHITECTURES_RAW)
     message(
       STATUS
-        "Setting CMAKE_CUDA_ARCHITECTURES to all enables all architectures TensorRT-LLM optimized for, "
+        "Setting CMAKE_CUDA_ARCHITECTURES to all enables all architectures TensorRT LLM optimized for, "
         "not all architectures CUDA compiler supports.")
   elseif(CMAKE_CUDA_ARCHITECTURES_RAW STREQUAL "all-major")
     message(
 
@@ -217,7 +217,7 @@ std::vector<std::filesystem::path> getJitIncludeDirs()
         }
         else
         {
-            TLLM_LOG_WARNING("Failed to find TensorRT-LLM installation, DeepGEMM will be disabled.");
+            TLLM_LOG_WARNING("Failed to find TensorRT LLM installation, DeepGEMM will be disabled.");
         }
     }
     return includeDirs;
 
@@ -174,7 +174,7 @@ CacheTransceiver::CacheTransceiver(kv_cache_manager::BaseKVCacheManager* cacheMa
         {
             void* ret = dllGetSym(handle, name);
             TLLM_CHECK_WITH_INFO(ret != nullptr,
-                "Unable to load UCX wrapper library symbol, possible cause is that TensorRT-LLM library is not "
+                "Unable to load UCX wrapper library symbol, possible cause is that TensorRT LLM library is not "
                 "built with UCX support, please rebuild in UCX-enabled environment.");
             return ret;
         };
 
@@ -618,12 +618,12 @@ BlockManager::BlockManager(std::vector<SizeType32> const& numKvHeadsPerLayer, Si
             mAbsolutePoolToWindowSize.push_back(windowSize);
             mAbsolutePoolToRelativePoolIndex.push_back(i);
         }
-        // (eop) SWA allocates blocks linearly, and we need as many blocks as full attention,
+        // SWA allocates blocks linearly, and we need as many blocks as full attention,
         // where full attention has windowSize = maxSequenceLength.
         auto const maxTokenNum = std::max(windowSize, maxSequenceLength) + sinkBubbleLength;
         auto const temporaryAttentionWindow = manager.calculateTemporaryAttentionWindow(tempAttentionWindowInputs);
         // Consider the temporaryAttentionWindow when allocating blocks.
-        // (eop) Current tempAttentionWindow calculation does not consider the
+        // Current tempAttentionWindow calculation does not consider the
         // concept of SWA right now at most occupying maxSequenceLength of
         // blocks. So the calculation of maxToken + tempAttention will exceed
         // maxSequenceLength. A temporary resolution here is to cap the
 
@@ -106,7 +106,7 @@ size_t dispatchNVFP4xNVFP4GemmClusterShapeSm10x(T* D, void const* A, void const*
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
+            "[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
         break;
     }
 }
@@ -187,7 +187,7 @@ size_t dispatchNVFP4xNVFP4GemmClusterShapeSm120(T* D, void const* A, void const*
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
+            "[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
         break;
     }
 }
@@ -215,16 +215,16 @@ size_t dispatchNVFP4xNVFP4GemmCTAShapeSm120(T* D, void const* A, void const* B,
             occupancy);
         break;
     case tkc::CutlassTileConfigSM120::Undefined:
-        throw std::runtime_error("[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config undefined.");
+        throw std::runtime_error("[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config undefined.");
         break;
     case tkc::CutlassTileConfigSM120::ChooseWithHeuristic:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config should have already been set by "
+            "[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config should have already been set by "
             "heuristic.");
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
+            "[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
         break;
     }
 }
@@ -267,7 +267,7 @@ size_t dispatchMXFP8xMXFP4GemmClusterShapeSm100(T* D, void const* A, void const*
         break;
     default:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
+            "[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");
         break;
     }
 }
@@ -303,15 +303,15 @@ size_t dispatchMXFP8xMXFP4GemmCTAShapeSm100(T* D, void const* A, void const* B,
             occupancy);
         break;
     case tkc::CutlassTileConfigSM100::Undefined:
-        throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
+        throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");
         break;
     case tkc::CutlassTileConfigSM100::ChooseWithHeuristic:
         throw std::runtime_error(
-            "[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "
+            "[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "
             "heuristic.");
         break;
     default:
-        throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
+        throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");
         break;
     }
 }
@@ -348,7 +348,7 @@ size_t CutlassFp4GemmRunner<T, fp4GemmType>::dispatchToArch(T* D, void const* A,
         else
         {
             throw std::runtime_error(
-                "[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
+                "[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
         }
     }
     else if constexpr (fp4GemmType == FP4GemmType::W4A4_NVFP4_NVFP4)
@@ -376,13 +376,13 @@ size_t CutlassFp4GemmRunner<T, fp4GemmType>::dispatchToArch(T* D, void const* A,
         else
         {
             throw std::runtime_error(
-                "[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
+                "[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");
         }
     }
     else
     {
         throw std::runtime_error(
-            "[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] FP4 Gemm type unsupported for CUTLASS FP4 GEMM");
+            "[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] FP4 Gemm type unsupported for CUTLASS FP4 GEMM");
     }
 }
Original file line number	Diff line number	Diff line change
`@@ -135,7 +135,7 @@ void benchmarkBert(std::string const& modelName, std::filesystem::path const& da`
`135`	`135`
`136`	`136`	`int main(int argc, char* argv[])`
`137`	`137`	`{`
`138`		`- cxxopts::Options options("TensorRT-LLM C++ Runtime Benchmark", "TensorRT-LLM C++ Runtime Benchmark for BERT.");`
	`138`	`+ cxxopts::Options options("TensorRT LLM C++ Runtime Benchmark", "TensorRT LLM C++ Runtime Benchmark for BERT.");`
`139`	`139`	`options.add_options()("h,help", "Print usage");`
`140`	`140`	`options.add_options()(`
`141`	`141`	`"m,model", "Model name specified for engines.", cxxopts::value<std::string>()->default_value("bert_base"));`
Original file line number	Diff line number	Diff line change
`@@ -1146,7 +1146,7 @@ void benchmark(std::vector<std::filesystem::path> const& contextEngineDirs,`
`1146`	`1146`	`int main(int argc, char* argv[])`
`1147`	`1147`
`1148`	`1148`	`{`
`1149`		`- cxxopts::Options options("TensorRT-LLm DisaggServer Benchmark");`
	`1149`	`+ cxxopts::Options options("TensorRT LLM DisaggServer Benchmark");`
`1150`	`1150`	`options.add_options()("h,help", "Print usage");`
`1151`	`1151`	`options.add_options()("context_engine_dirs", "Directories that store context engines,separator is a ,",`
`1152`	`1152`	`cxxopts::value<std::vector<std::string>>());`
Original file line number	Diff line number	Diff line change
`@@ -1056,7 +1056,7 @@ void benchmarkExecutor(std::optional<std::filesystem::path> const& decoderEngine`
`1056`	`1056`	`int main(int argc, char* argv[])`
`1057`	`1057`	`{`
`1058`	`1058`	`cxxopts::Options options(`
`1059`		`- "TensorRT-LLM BatchManager Benchmark", "TensorRT-LLM BatchManager Benchmark for GPT and GPT-like models.");`
	`1059`	`+ "TensorRT LLM BatchManager Benchmark", "TensorRT LLM BatchManager Benchmark for GPT and GPT-like models.");`
`1060`	`1060`	`options.add_options()("h,help", "Print usage");`
`1061`	`1061`	`options.add_options()("engine_dir, decoder_engine_dir", "Directory that store the engines of decoder models.",`
`1062`	`1062`	`cxxopts::value<std::string>());`
Original file line number	Diff line number	Diff line change
`@@ -217,7 +217,7 @@ std::vector<std::filesystem::path> getJitIncludeDirs()`
`217`	`217`	`}`
`218`	`218`	`else`
`219`	`219`	`{`
`220`		`- TLLM_LOG_WARNING("Failed to find TensorRT-LLM installation, DeepGEMM will be disabled.");`
	`220`	`+ TLLM_LOG_WARNING("Failed to find TensorRT LLM installation, DeepGEMM will be disabled.");`
`221`	`221`	`}`
`222`	`222`	`}`
`223`	`223`	`return includeDirs;`
Original file line number	Diff line number	Diff line change
`@@ -174,7 +174,7 @@ CacheTransceiver::CacheTransceiver(kv_cache_manager::BaseKVCacheManager* cacheMa`
`174`	`174`	`{`
`175`	`175`	`void* ret = dllGetSym(handle, name);`
`176`	`176`	`TLLM_CHECK_WITH_INFO(ret != nullptr,`
`177`		`- "Unable to load UCX wrapper library symbol, possible cause is that TensorRT-LLM library is not "`
	`177`	`+ "Unable to load UCX wrapper library symbol, possible cause is that TensorRT LLM library is not "`
`178`	`178`	`"built with UCX support, please rebuild in UCX-enabled environment.");`
`179`	`179`	`return ret;`
`180`	`180`	`};`
Original file line number	Diff line number	Diff line change
`@@ -106,7 +106,7 @@ size_t dispatchNVFP4xNVFP4GemmClusterShapeSm10x(T* D, void const* A, void const*`
`106`	`106`	`break;`
`107`	`107`	`default:`
`108`	`108`	`throw std::runtime_error(`
`109`		`- "[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");`
	`109`	`+ "[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");`
`110`	`110`	`break;`
`111`	`111`	`}`
`112`	`112`	`}`
`@@ -187,7 +187,7 @@ size_t dispatchNVFP4xNVFP4GemmClusterShapeSm120(T* D, void const* A, void const*`
`187`	`187`	`break;`
`188`	`188`	`default:`
`189`	`189`	`throw std::runtime_error(`
`190`		`- "[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");`
	`190`	`+ "[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");`
`191`	`191`	`break;`
`192`	`192`	`}`
`193`	`193`	`}`
`@@ -215,16 +215,16 @@ size_t dispatchNVFP4xNVFP4GemmCTAShapeSm120(T* D, void const* A, void const* B,`
`215`	`215`	`occupancy);`
`216`	`216`	`break;`
`217`	`217`	`case tkc::CutlassTileConfigSM120::Undefined:`
`218`		`- throw std::runtime_error("[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config undefined.");`
	`218`	`+ throw std::runtime_error("[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config undefined.");`
`219`	`219`	`break;`
`220`	`220`	`case tkc::CutlassTileConfigSM120::ChooseWithHeuristic:`
`221`	`221`	`throw std::runtime_error(`
`222`		`- "[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config should have already been set by "`
	`222`	`+ "[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Gemm config should have already been set by "`
`223`	`223`	`"heuristic.");`
`224`	`224`	`break;`
`225`	`225`	`default:`
`226`	`226`	`throw std::runtime_error(`
`227`		`- "[TensorRT-LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");`
	`227`	`+ "[TensorRT LLM Error][FP4][sm120][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");`
`228`	`228`	`break;`
`229`	`229`	`}`
`230`	`230`	`}`
`@@ -267,7 +267,7 @@ size_t dispatchMXFP8xMXFP4GemmClusterShapeSm100(T* D, void const* A, void const*`
`267`	`267`	`break;`
`268`	`268`	`default:`
`269`	`269`	`throw std::runtime_error(`
`270`		`- "[TensorRT-LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");`
	`270`	`+ "[TensorRT LLM Error][FP4][dispatch_gemm_cluster_shape] Config is invalid for FP4 GEMM.");`
`271`	`271`	`break;`
`272`	`272`	`}`
`273`	`273`	`}`
`@@ -303,15 +303,15 @@ size_t dispatchMXFP8xMXFP4GemmCTAShapeSm100(T* D, void const* A, void const* B,`
`303`	`303`	`occupancy);`
`304`	`304`	`break;`
`305`	`305`	`case tkc::CutlassTileConfigSM100::Undefined:`
`306`		`- throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");`
	`306`	`+ throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config undefined.");`
`307`	`307`	`break;`
`308`	`308`	`case tkc::CutlassTileConfigSM100::ChooseWithHeuristic:`
`309`	`309`	`throw std::runtime_error(`
`310`		`- "[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "`
	`310`	`+ "[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Gemm config should have already been set by "`
`311`	`311`	`"heuristic.");`
`312`	`312`	`break;`
`313`	`313`	`default:`
`314`		`- throw std::runtime_error("[TensorRT-LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");`
	`314`	`+ throw std::runtime_error("[TensorRT LLM Error][FP4][dispatch_gemm_cta_shape] Config is invalid for FP4 GEMM.");`
`315`	`315`	`break;`
`316`	`316`	`}`
`317`	`317`	`}`
`@@ -348,7 +348,7 @@ size_t CutlassFp4GemmRunner<T, fp4GemmType>::dispatchToArch(T* D, void const* A,`
`348`	`348`	`else`
`349`	`349`	`{`
`350`	`350`	`throw std::runtime_error(`
`351`		`- "[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");`
	`351`	`+ "[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");`
`352`	`352`	`}`
`353`	`353`	`}`
`354`	`354`	`else if constexpr (fp4GemmType == FP4GemmType::W4A4_NVFP4_NVFP4)`
`@@ -376,13 +376,13 @@ size_t CutlassFp4GemmRunner<T, fp4GemmType>::dispatchToArch(T* D, void const* A,`
`376`	`376`	`else`
`377`	`377`	`{`
`378`	`378`	`throw std::runtime_error(`
`379`		`- "[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");`
	`379`	`+ "[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] Arch unsupported for CUTLASS FP4 GEMM");`
`380`	`380`	`}`
`381`	`381`	`}`
`382`	`382`	`else`
`383`	`383`	`{`
`384`	`384`	`throw std::runtime_error(`
`385`		`- "[TensorRT-LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] FP4 Gemm type unsupported for CUTLASS FP4 GEMM");`
	`385`	`+ "[TensorRT LLM Error][CutlassFp4GemmRunner][GEMM Dispatch] FP4 Gemm type unsupported for CUTLASS FP4 GEMM");`
`386`	`386`	`}`
`387`	`387`	`}`
`388`	`388`