Skip to content

Commit

Permalink
Update TensorRT-LLM (#2532)
Browse files Browse the repository at this point in the history
* blossom-ci.yml: run vulnerability scan on blossom

* open source efb18c1256f8c9c3d47b7d0c740b83e5d5ebe0ec

---------

Co-authored-by: niukuo <[email protected]>
Co-authored-by: pei0033 <[email protected]>
Co-authored-by: Kyungmin Lee <[email protected]>
Co-authored-by: Kaiyu Xie <[email protected]>
  • Loading branch information
5 people authored Dec 4, 2024
1 parent 4420547 commit 548b5b7
Show file tree
Hide file tree
Showing 762 changed files with 1,673,620 additions and 1,550,597 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ docs/source/llm-api-examples/llm_*.rst
# Testing
.coverage.*
results_trt/
llm-test-workspace/

# build/debug
*.safetensors
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,6 @@
[submodule "3rdparty/pybind11"]
path = 3rdparty/pybind11
url = https://github.com/pybind/pybind11.git
[submodule "3rdparty/xgrammar"]
path = 3rdparty/xgrammar
url = https://github.com/mlc-ai/xgrammar.git
1 change: 1 addition & 0 deletions 3rdparty/xgrammar
Submodule xgrammar added at b9a16d
21 changes: 14 additions & 7 deletions benchmarks/cpp/disaggServerBenchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -812,8 +812,9 @@ class DisaggExecutorServer
}
if (mEnableCollectIterStats)
{
for (auto const& iterStats : contextStats)
for (std::size_t i = 0; i < contextStats.size(); i++)
{
auto const& iterStats = contextStats.at(i);
for (auto const& stat : iterStats)
{
SizeType32 numNewActiveRequests = stat.numNewActiveRequests;
Expand All @@ -826,13 +827,15 @@ class DisaggExecutorServer
}
if (mLogIterationData)
{
TLLM_LOG_INFO(texec::JsonSerialization::toJsonStr(stat));
TLLM_LOG_INFO(
"ctx_id %d, ctx_stat: %s", i, texec::JsonSerialization::toJsonStr(stat).c_str());
}
}
}

for (auto const& iterStats : generationStats)
for (std::size_t i = 0; i < generationStats.size(); i++)
{
auto const& iterStats = generationStats.at(i);
for (auto const& stat : iterStats)
{
SizeType32 numNewActiveRequests = stat.numNewActiveRequests;
Expand All @@ -845,7 +848,8 @@ class DisaggExecutorServer
}
if (mLogIterationData)
{
TLLM_LOG_INFO(texec::JsonSerialization::toJsonStr(stat));
TLLM_LOG_INFO(
"gen_id %d, gen_stat: %s", i, texec::JsonSerialization::toJsonStr(stat).c_str());
}
}
}
Expand All @@ -854,9 +858,9 @@ class DisaggExecutorServer
{
continue;
}
for (auto const& stats : generationRequestStatsPerIteration)
for (std::size_t i = 0; i < generationRequestStatsPerIteration.size(); i++)
{

auto const& stats = generationRequestStatsPerIteration.at(i);
for (auto const& stat : stats)
{
std::vector<float> kvCacheTransferMs;
Expand All @@ -874,7 +878,8 @@ class DisaggExecutorServer
}
if (mLogIterationData)
{
TLLM_LOG_INFO(texec::JsonSerialization::toJsonStr(stat));
TLLM_LOG_INFO(
"gen_id %d, gen_req_stat: %s", i, texec::JsonSerialization::toJsonStr(stat).c_str());
}
}
}
Expand Down Expand Up @@ -973,6 +978,7 @@ void benchmark(std::vector<std::filesystem::path> const& contextEngineDirs,
if (worldRank == 0)
{
{ // warmup
TLLM_LOG_INFO("Warmup start");
std::vector<tensorrt_llm::executor::Request> contextRequests;
contextRequests.reserve(warmUp);
for (int i = 0; i < warmUp; ++i)
Expand All @@ -989,6 +995,7 @@ void benchmark(std::vector<std::filesystem::path> const& contextEngineDirs,
disaggExecutor->waitForGenResponse(warmUp, true);
auto const warmUpWaitSleep = std::chrono::milliseconds(50);
std::this_thread::sleep_for(warmUpWaitSleep);
TLLM_LOG_INFO("Warmup done");
}

{
Expand Down
Loading

0 comments on commit 548b5b7

Please sign in to comment.