Skip to content

Commit 898f37f

Browse files
[None][feat] Enable nanobind as the default binding library (#6608)
Signed-off-by: Linda-Stadter <[email protected]>
1 parent a49cf68 commit 898f37f

File tree

11 files changed

+58
-25
lines changed

11 files changed

+58
-25
lines changed

cpp/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ add_compile_definitions("TLLM_GEN_EXPORT_INTERFACE")
6969
add_compile_definitions("TLLM_ENABLE_CUDA")
7070

7171
set(BINDING_TYPE
72-
"pybind"
72+
"nanobind"
7373
CACHE STRING
7474
"Binding type of Python bindings for C++ runtime and batch manager")
7575

cpp/tensorrt_llm/nanobind/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ target_link_libraries(
4343
${Python3_LIBRARIES}
4444
${TORCH_LIBRARIES}
4545
torch_python
46-
CUDA::cuda_driver
46+
${CUDA_DRV_LIB}
4747
${CUDA_NVML_LIB}
4848
th_common)
4949
target_compile_definitions(

cpp/tensorrt_llm/nanobind/common/customCasters.h

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,5 +285,35 @@ struct type_caster<std::vector<std::reference_wrapper<T const>>>
285285
return make_caster<std::vector<T>>::from_cpp(result, policy, cleanup);
286286
}
287287
};
288+
289+
template <>
290+
struct type_caster<torch::ScalarType>
291+
{
292+
NB_TYPE_CASTER(torch::ScalarType, const_name("torch.dtype"));
293+
294+
bool from_python(handle src, uint8_t flags, cleanup_list* cleanup) noexcept
295+
{
296+
std::string dtype_name = nb::cast<std::string>(nb::str(src));
297+
if (dtype_name.substr(0, 6) == "torch.")
298+
{
299+
dtype_name = dtype_name.substr(6);
300+
}
301+
302+
auto const& dtype_map = c10::getStringToDtypeMap();
303+
auto it = dtype_map.find(dtype_name);
304+
if (it != dtype_map.end())
305+
{
306+
value = it->second;
307+
return true;
308+
}
309+
310+
return false;
311+
}
312+
313+
static handle from_cpp(torch::ScalarType src, rv_policy policy, cleanup_list* cleanup)
314+
{
315+
throw std::runtime_error("from_cpp for torch::ScalarType is not implemented");
316+
}
317+
};
288318
} // namespace detail
289319
} // namespace NB_NAMESPACE

cpp/tensorrt_llm/nanobind/executor/bindings.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,8 @@ void initBindings(nb::module_& m)
240240
nb::class_<tle::KVCacheEvent>(executor_kv_cache, "KVCacheEvent")
241241
.def_ro("event_id", &tle::KVCacheEvent::eventId)
242242
.def_ro("data", &tle::KVCacheEvent::data)
243-
.def_ro("window_size", &tle::KVCacheEvent::windowSize);
243+
.def_ro("window_size", &tle::KVCacheEvent::windowSize)
244+
.def_ro("attention_dp_rank", &tle::KVCacheEvent::attentionDpRank);
244245

245246
nb::class_<tle::KVCacheEventManager>(executor_kv_cache, "KVCacheEventManager")
246247
.def(

cpp/tensorrt_llm/nanobind/executor/request.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727

2828
#include <nanobind/nanobind.h>
2929
#include <nanobind/stl/chrono.h>
30+
#include <nanobind/stl/function.h>
3031
#include <nanobind/stl/list.h>
3132
#include <nanobind/stl/optional.h>
3233
#include <nanobind/stl/shared_ptr.h>

cpp/tensorrt_llm/nanobind/runtime/bindings.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ void initBindings(nb::module_& m)
279279
.def(nb::init<tr::GptDecoderBatched::CudaStreamPtr>(), nb::arg("stream"))
280280
.def("setup", &tr::GptDecoderBatched::setup, nb::arg("mode"), nb::arg("max_num_sequences"),
281281
nb::arg("max_beam_width"), nb::arg("dtype"), nb::arg("model_config"), nb::arg("world_config"))
282-
.def("forward_async", &tr::GptDecoderBatched::forwardAsync, nb::arg("output"), nb::arg("input"))
282+
.def("forward_async", &tr::GptDecoderBatched::forwardAsync, nb::arg("decoder_state"), nb::arg("input"))
283283
.def("underlying_decoder", &tr::GptDecoderBatched::getUnderlyingDecoder, nb::rv_policy::reference)
284284
.def("finalize", &tr::GptDecoderBatched::finalize, nb::arg("decoder_state"), nb::arg("batch_idx"),
285285
nb::arg("sampling_config"), nb::arg("streaming"))

cpp/tensorrt_llm/pybind/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ target_link_libraries(
4444
${Python3_LIBRARIES}
4545
${TORCH_LIBRARIES}
4646
torch_python
47-
CUDA::cuda_driver
47+
${CUDA_DRV_LIB}
4848
${CUDA_NVML_LIB}
4949
th_common)
5050
target_compile_definitions(

jenkins/Build.groovy

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,10 @@ CONFIG_LINUX_AARCH64 = "linux_aarch64"
4848
def CONFIG_LINUX_AARCH64_LLVM = "linux_aarch64_LLVM"
4949

5050
@Field
51-
def CONFIG_LINUX_X86_64_NANOBIND = "linux_x86_64_Nanobind"
51+
def CONFIG_LINUX_X86_64_PYBIND = "linux_x86_64_Pybind"
5252

5353
@Field
54-
def CONFIG_LINUX_AARCH64_NANOBIND = "linux_aarch64_Nanobind"
54+
def CONFIG_LINUX_AARCH64_PYBIND = "linux_aarch64_Pybind"
5555

5656
@Field
5757
def BUILD_CONFIGS = [
@@ -62,9 +62,9 @@ def BUILD_CONFIGS = [
6262
(TARNAME) : "TensorRT-LLM.tar.gz",
6363
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;120-real",
6464
],
65-
(CONFIG_LINUX_X86_64_NANOBIND) : [
66-
(WHEEL_EXTRA_ARGS) : "--binding_type nanobind --extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
67-
(TARNAME) : "nanobind-TensorRT-LLM.tar.gz",
65+
(CONFIG_LINUX_X86_64_PYBIND) : [
66+
(WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars ENABLE_MULTI_DEVICE=1 --extra-cmake-vars WARNING_IS_ERROR=ON --extra-cmake-vars NIXL_ROOT=/opt/nvidia/nvda_nixl --micro_benchmarks",
67+
(TARNAME) : "pybind-TensorRT-LLM.tar.gz",
6868
(WHEEL_ARCHS): "80-real;86-real;89-real;90-real;100-real;120-real",
6969
],
7070
(CONFIG_LINUX_X86_64_SINGLE_DEVICE) : [
@@ -82,9 +82,9 @@ def BUILD_CONFIGS = [
8282
(TARNAME) : "TensorRT-LLM-GH200.tar.gz",
8383
(WHEEL_ARCHS): "90-real;100-real;120-real",
8484
],
85-
(CONFIG_LINUX_AARCH64_NANOBIND): [
86-
(WHEEL_EXTRA_ARGS) : "--binding_type nanobind --extra-cmake-vars WARNING_IS_ERROR=ON",
87-
(TARNAME) : "nanobind-TensorRT-LLM-GH200.tar.gz",
85+
(CONFIG_LINUX_AARCH64_PYBIND): [
86+
(WHEEL_EXTRA_ARGS) : "--binding_type pybind --extra-cmake-vars WARNING_IS_ERROR=ON",
87+
(TARNAME) : "pybind-TensorRT-LLM-GH200.tar.gz",
8888
(WHEEL_ARCHS): "90-real;100-real;120-real",
8989
],
9090
(CONFIG_LINUX_AARCH64_LLVM) : [
@@ -542,8 +542,8 @@ def launchStages(pipeline, cpu_arch, enableFailFast, globalVars)
542542
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64 : CONFIG_LINUX_X86_64_VANILLA),
543543
"Build TRT-LLM LLVM": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
544544
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_LLVM : CONFIG_LINUX_X86_64_LLVM),
545-
"Build TRT-LLM Nanobind": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
546-
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_NANOBIND : CONFIG_LINUX_X86_64_NANOBIND),
545+
"Build TRT-LLM Pybind": [LLM_DOCKER_IMAGE] + prepareLLMBuild(
546+
pipeline, cpu_arch == AARCH64_TRIPLE ? CONFIG_LINUX_AARCH64_PYBIND : CONFIG_LINUX_X86_64_PYBIND),
547547
]
548548

549549
if (cpu_arch == X86_64_TRIPLE) {

jenkins/L0_Test.groovy

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ def LLVM_CONFIG = "LLVM"
6565
LINUX_AARCH64_CONFIG = "linux_aarch64"
6666

6767
@Field
68-
def NANOBIND_CONFIG = "Nanobind"
68+
def PYBIND_CONFIG = "Pybind"
6969

7070
@Field
7171
def BUILD_CONFIGS = [
@@ -74,7 +74,7 @@ def BUILD_CONFIGS = [
7474
(SINGLE_DEVICE_CONFIG) : [(TARNAME) : "single-device-TensorRT-LLM.tar.gz"],
7575
(LLVM_CONFIG) : [(TARNAME) : "llvm-TensorRT-LLM.tar.gz"],
7676
(LINUX_AARCH64_CONFIG) : [(TARNAME) : "TensorRT-LLM-GH200.tar.gz"],
77-
(NANOBIND_CONFIG) : [(TARNAME) : "nanobind-TensorRT-LLM.tar.gz"],
77+
(PYBIND_CONFIG) : [(TARNAME) : "pybind-TensorRT-LLM.tar.gz"],
7878
]
7979

8080
// TODO: Move common variables to an unified location
@@ -1775,7 +1775,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
17751775
"A10-TensorRT-4": ["a10", "l0_a10", 4, 6],
17761776
"A10-TensorRT-5": ["a10", "l0_a10", 5, 6],
17771777
"A10-TensorRT-6": ["a10", "l0_a10", 6, 6],
1778-
"A10-Nanobind": ["a10", "l0_a10_nanobind", 1, 1],
1778+
"A10-Pybind": ["a10", "l0_a10_pybind", 1, 1],
17791779
"A30-Triton-1": ["a30", "l0_a30", 1, 1],
17801780
"A30-PyTorch-1": ["a30", "l0_a30", 1, 2],
17811781
"A30-PyTorch-2": ["a30", "l0_a30", 2, 2],
@@ -1856,8 +1856,8 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
18561856
if (key.contains("llvm")) {
18571857
config = LLVM_CONFIG
18581858
}
1859-
if (key.contains("Nanobind")) {
1860-
config = NANOBIND_CONFIG
1859+
if (key.contains("Pybind")) {
1860+
config = PYBIND_CONFIG
18611861
}
18621862
runLLMTestlistOnPlatform(pipeline, values[0], values[1], config, key.contains("Perf"), key, values[2], values[3])
18631863
}]]}

scripts/build_wheel.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -435,7 +435,7 @@ def main(*,
435435
install: bool = False,
436436
skip_building_wheel: bool = False,
437437
linking_install_binary: bool = False,
438-
binding_type: str = "pybind",
438+
binding_type: str = "nanobind",
439439
benchmarks: bool = False,
440440
micro_benchmarks: bool = False,
441441
nvtx: bool = False,
@@ -984,8 +984,8 @@ def add_arguments(parser: ArgumentParser):
984984
)
985985
parser.add_argument("--binding_type",
986986
choices=["pybind", "nanobind"],
987-
default="pybind",
988-
help="Which binding type to build: pybind or nanobind")
987+
default="nanobind",
988+
help="Which binding library to use: pybind or nanobind")
989989
parser.add_argument("--benchmarks",
990990
action="store_true",
991991
help="Build the benchmarks for the C++ runtime")

0 commit comments

Comments
 (0)