triton-inference-server · fpetrini15 · Feb 15, 2024 · Feb 20, 2024 · fpetrini15 · Feb 23, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -41,12 +41,6 @@ option(TRITON_ENABLE_GPU "Enable GPU support in backend" ON)
 option(TRITON_ENABLE_STATS "Include statistics collections in backend" ON)
 option(TRITON_ENABLE_NVTX "Include nvtx markers collection in backend." OFF)
 
-# FIXME: CI needs to enable the GPU flag. Python for window currently does not
-# support GPU tensors. For simplicity, we will override this option here.
-if(WIN32)
-  set(TRITON_ENABLE_GPU OFF CACHE BOOL "GPU disabled" FORCE)
-endif()
-
 set(TRITON_BACKEND_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/backend repo")
 set(TRITON_COMMON_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/common repo")
 set(TRITON_CORE_REPO_TAG "main" CACHE STRING "Tag for triton-inference-server/core repo")

diff --git a/src/pb_utils.cc b/src/pb_utils.cc
@@ -286,6 +286,15 @@ IsUsingCUDAPool(
       reinterpret_cast<void*>(cuda_pool_address));
 }
 
+bool
+DeviceSupportsIPC(const int64_t device_id)
+{
+  int supports_ipc = 0;
+  THROW_IF_CUDA_ERROR(cudaDeviceGetAttribute(
+      &supports_ipc, cudaDevAttrIpcEventSupport, device_id));
+  return (supports_ipc == 1);
+}
+
 #endif  // TRITON_ENABLE_GPU
 
 // FIXME: [DLIS-6078]: We should not need this function. However, some paths are

diff --git a/src/pb_utils.h b/src/pb_utils.h
@@ -324,6 +324,7 @@ bool IsUsingCUDAPool(
     std::unique_ptr<CUDAMemoryPoolManager>& cuda_pool, int64_t memory_type_id,
     void* data);
 
+bool DeviceSupportsIPC(const int64_t device_id);
 #endif  // TRITON_ENABLE_GPU
 
 // FIXME: [DLIS-6078]: We should not need this function. However, some paths are

diff --git a/src/python_be.cc b/src/python_be.cc
@@ -512,6 +512,25 @@ ModelInstanceState::GetInputTensor(
   cpu_only_tensors = true;
 #endif  // TRITON_ENABLE_GPU
 
+// For Windows, force CPU tensors if IPC is not supported on
+// the target GPU device
+#if defined(TRITON_ENABLE_GPU) && defined(_WIN32)
+  if (src_memory_type == TRITONSERVER_MEMORY_GPU) {
+    bool supports_ipc = DeviceSupportsIPC(src_memory_type_id);
+    if (!supports_ipc) {
+      LOG_MESSAGE(
+          TRITONSERVER_LOG_WARN,
+          (std::string(
+               "GPU memory storage requested, but GPU device " +
+               std::to_string(src_memory_type_id) +
+               " does not support IPC, which is necessary to support GPU "
+               "tensors. Forcing CPU only input tensors.")
+               .c_str()));
+      cpu_only_tensors = true;
+    }
+  }
+#endif  // TRITON_ENABLE_GPU && _WIN32
+
   if (cpu_only_tensors || src_memory_type != TRITONSERVER_MEMORY_GPU) {
     input_tensor = std::make_shared<PbTensor>(
         std::string(input_name),
@@ -611,9 +630,7 @@ ModelInstanceState::GetInputTensor(
           &cuda_used));
 
       if (cuda_used) {
-#ifdef TRITON_ENABLE_GPU
         cudaStreamSynchronize(stream_);
-#endif
       }
 
       input_tensor = std::make_shared<PbTensor>(