Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Windows Port] IPC Capability Check #340

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,12 +41,6 @@ option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
option(TRITON_ENABLE_NVTX "Include nvtx markers collection in backend." OFF)

# FIXME: CI needs to enable the GPU flag. Python for window currently does not
# support GPU tensors. For simplicity, we will override this option here.
if(WIN32)
set(TRITON_ENABLE_GPU OFF CACHE BOOL "GPU disabled" FORCE)
endif()

set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")
Expand Down
9 changes: 9 additions & 0 deletions src/pb_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -286,6 +286,15 @@ IsUsingCUDAPool(
reinterpret_cast<void*>(cuda_pool_address));
}

bool
DeviceSupportsIPC(const int64_t device_id)
{
int supports_ipc = 0;
THROW_IF_CUDA_ERROR(cudaDeviceGetAttribute(
&supports_ipc, cudaDevAttrIpcEventSupport, device_id));
return (supports_ipc == 1);
}

#endif // TRITON_ENABLE_GPU

// FIXME: [DLIS-6078]: We should not need this function. However, some paths are
Expand Down
1 change: 1 addition & 0 deletions src/pb_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ bool IsUsingCUDAPool(
std::unique_ptr<CUDAMemoryPoolManager>& cuda_pool, int64_t memory_type_id,
void* data);

bool DeviceSupportsIPC(const int64_t device_id);
#endif // TRITON_ENABLE_GPU

// FIXME: [DLIS-6078]: We should not need this function. However, some paths are
Expand Down
21 changes: 19 additions & 2 deletions src/python_be.cc
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,25 @@ ModelInstanceState::GetInputTensor(
cpu_only_tensors = true;
#endif // TRITON_ENABLE_GPU

// For Windows, force CPU tensors if IPC is not supported on
// the target GPU device
#if defined(TRITON_ENABLE_GPU) && defined(_WIN32)
if (src_memory_type == TRITONSERVER_MEMORY_GPU) {
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Merge if-statements for legibility.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is likely fine. The compiler should be able to optimize this and if not, it's a well predicted branch

bool supports_ipc = DeviceSupportsIPC(src_memory_type_id);
if (!supports_ipc) {
LOG_MESSAGE(
TRITONSERVER_LOG_WARN,
(std::string(
"GPU memory storage requested, but GPU device " +
std::to_string(src_memory_type_id) +
" does not support IPC, which is necessary to support GPU "
"tensors. Forcing CPU only input tensors.")
.c_str()));
cpu_only_tensors = true;
}
}
#endif // TRITON_ENABLE_GPU && _WIN32

if (cpu_only_tensors || src_memory_type != TRITONSERVER_MEMORY_GPU) {
input_tensor = std::make_shared<PbTensor>(
std::string(input_name),
Expand Down Expand Up @@ -611,9 +630,7 @@ ModelInstanceState::GetInputTensor(
&cuda_used));

if (cuda_used) {
#ifdef TRITON_ENABLE_GPU
cudaStreamSynchronize(stream_);
#endif
}

input_tensor = std::make_shared<PbTensor>(
Expand Down
Loading