flashinfer-ai · yiakwy-xpu-ml-framework-team · Sep 4, 2024 · Sep 22, 2024 · Sep 22, 2024 · Sep 23, 2024
diff --git a/.gitmodules b/.gitmodules
@@ -1,6 +1,10 @@
 [submodule "3rdparty/nvbench"]
 	path = 3rdparty/nvbench
 	url = https://github.com/NVIDIA/nvbench.git
+[submodule "3rdparty/hipbench"]
+    path = 3rdparty/hipbench
+    # url = https://github.com/ROCm/hipBench.git
+	url = https://github.com/yiakwy-xpu-ml-framework-team/hipbench
 [submodule "3rdparty/googletest"]
 	path = 3rdparty/googletest
 	url = https://github.com/google/googletest.git

diff --git a/CMakeLists.txt b/CMakeLists.txt
diff --git a/cmake/config.cmake b/cmake/config.cmake
@@ -40,4 +40,4 @@ set(FLASHINFER_GEN_MASK_MODES 0 1 2)
 # So it's recommended to set it to a specific value if you know the architecture of the target GPU.
 # Example:
 # set(FLASHINFER_CUDA_ARCHITECTURES 80)
-set(FLASHINFER_CUDA_ARCHITECTURES native)
+set(FLASHINFER_CUDA_ARCHITECTURES native)
diff --git a/cmake/modules/FindThrust.cmake b/cmake/modules/FindThrust.cmake
@@ -33,7 +33,9 @@ find_path( THRUST_INCLUDE_DIR
         /usr/include/cuda
         /usr/local/include
         /usr/local/cuda/include
+        /opt/rocm/include
         ${CUDA_INCLUDE_DIRS}
+        ${HIP_INCLUDE_DIRS}
   NAMES thrust/version.h
   DOC "Thrust headers"
   )

diff --git a/cmake/utils/Utils.cmake b/cmake/utils/Utils.cmake
@@ -36,14 +36,18 @@ macro(flashinfer_option variable description value)
     if("${__value}" MATCHES ";")
       # list values directly pass through
       __flashinfer_option(${variable} "${description}" "${__value}")
+      message(STATUS "1 : creating ${variable} option, description : ${description}, value : ${__value}")
     elseif(DEFINED ${__value})
       if(${__value})
         __flashinfer_option(${variable} "${description}" ON)
+        message(STATUS "2 : creating ${variable} option, description : ${description}, value : ON")
       else()
         __flashinfer_option(${variable} "${description}" OFF)
+        message(STATUS "3 : creating ${variable} option, description : ${description}, value : OFF")
       endif()
     else()
       __flashinfer_option(${variable} "${description}" "${__value}")
+      message(STATUS "4 : creating ${variable} option, description : ${description}, value : ${__value}")
     endif()
   else()
     unset(${variable} CACHE)

diff --git a/include/flashinfer/attention/cascade.cuh b/include/flashinfer/attention/cascade.cuh
@@ -16,7 +16,15 @@
 #ifndef FLASHINFER_CASCADE_CUH_
 #define FLASHINFER_CASCADE_CUH_
 
+#ifdef USE_ROCM
+
+#include <hip/hip_cooperative_groups.h>
+// CUDA API Portable interfaces
+#include "flashinfer/hip_defs.h"
+
+# else
 #include <cooperative_groups.h>
+#endif //  USE_ROCM
 
 #include "../cp_async.cuh"
 #include "../math.cuh"

diff --git a/include/flashinfer/attention/decode.cuh b/include/flashinfer/attention/decode.cuh
@@ -15,14 +15,28 @@
  */
 #ifndef FLASHINFER_DECODE_CUH_
 #define FLASHINFER_DECODE_CUH_
+
+#ifdef USE_ROCM
+
+#include <hip/hip_cooperative_groups.h>
+#include <hip/pipeline.h>
+
+#include "flashinfer/hip_cuda_type_utils.h"
+// CUDA API Portable interfaces
+#include "flashinfer/hip_defs.h"
+
+# else
 #include <cooperative_groups.h>
 #include <cuda_bf16.h>
 #include <cuda_fp16.h>
 #include <cuda_fp8.h>
 #include <cuda_runtime.h>
+// this is used 
+#include <cuda/pipeline>
+#endif //  USE_ROCM
 
 #include <cstddef>
-#include <cuda/pipeline>
+
 #include <iostream>
 #include <optional>
 #include <random>

diff --git a/include/flashinfer/attention/handler.cuh b/include/flashinfer/attention/handler.cuh
@@ -16,9 +16,23 @@
 #ifndef FLASHINFER_ATTENTION_HANDLER_CUH_
 #define FLASHINFER_ATTENTION_HANDLER_CUH_
 
+#ifdef USE_ROCM
+
+#include <hip/hip_runtime_api.h>
+// CUDA API Portable interfaces
+#include "flashinfer/hip_defs.h"
+
+#include <hip/driver_types.h>
+
+#else
+
 #include <cuda_runtime_api.h>
+
+// Note this is part of NV SDK
 #include <driver_types.h>
 
+#endif // USE_ROCM
+
 #include <algorithm>
 #include <cstddef>
 #include <sstream>