Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ROCm and Triton LLVM assets at build #9

Draft
wants to merge 4 commits into
base: superbuild/submodules
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 55 additions & 43 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,61 +1,75 @@
cmake_minimum_required(VERSION 3.21)
project(InstrumentAMDGPUKernel LANGUAGES C CXX)


set(LLVM_INSTALL_DIR "" CACHE PATH "LLVM installation directory")

set(LLVM_INCLUDE_DIR "${LLVM_INSTALL_DIR}/include/llvm")
if(NOT EXISTS "${LLVM_INCLUDE_DIR}")
message(FATAL_ERROR
" LLVM_INSTALL_DIR (${LLVM_INCLUDE_DIR}) is invalid.")
endif()

# Check that the LLVMConfig.cmake file exists
set(- FALSE)

# Ubuntu + Darwin
if(EXISTS "${LLVM_INSTALL_DIR}/lib/cmake/llvm/LLVMConfig.cmake")
set(VALID_INSTALLATION TRUE)
if(DEFINED ENV{ROCM_PATH})
set(ROCM_PATH $ENV{ROCM_PATH})
else()
set(ROCM_PATH "/opt/rocm")
endif()
message("ROCM_PATH: ${ROCM_PATH}")

# Fedora
if(EXISTS "${LLVM_INSTALL_DIR}/lib64/cmake/llvm/LLVMConfig.cmake")
set(VALID_INSTALLATION TRUE)
endif()
# Define LLVM versions
set(TRITON_LLVM "" CACHE PATH "Path to the Triton LLVM directory")
set(ROCM_LLVM "${ROCM_PATH}/llvm" CACHE PATH "Path to the ROCm LLVM directory")

if(NOT ${VALID_INSTALLATION})
# Function to find and configure LLVM
function(find_and_configure_llvm llvm_install_dir llvm_version)
# Confirm that the LLVM installation directory exists
set(LLVM_INCLUDE_DIR "${llvm_install_dir}/include/llvm")
if(NOT EXISTS "${LLVM_INCLUDE_DIR}")
message(FATAL_ERROR
"LLVM installation directory, (${LLVM_INSTALL_DIR}), is invalid. Couldn't
" llvm_install_dir (${LLVM_INCLUDE_DIR}) is invalid.")
endif()

# Check that the LLVMConfig.cmake file exists
set(- FALSE)

# Ubuntu + Darwin
if(EXISTS "${llvm_install_dir}/lib/cmake/llvm/LLVMConfig.cmake")
set(VALID_INSTALLATION TRUE)
endif()

# Fedora
if(EXISTS "${llvm_install_dir}/lib64/cmake/llvm/LLVMConfig.cmake")
set(VALID_INSTALLATION TRUE)
endif()

if(NOT ${VALID_INSTALLATION})
message(FATAL_ERROR
"LLVM installation directory, (${llvm_install_dir}), is invalid. Couldn't
find LLVMConfig.cmake.")
endif()
endif()

list(APPEND CMAKE_PREFIX_PATH "${LLVM_INSTALL_DIR}/lib/cmake/llvm/")
include("${llvm_install_dir}/lib/cmake/llvm/LLVMConfig.cmake")
#include("${llvm_install_dir}/lib/cmake/llvm/LLVM-Config.cmake")

find_package(LLVM CONFIG)
if("${LLVM_VERSION_MAJOR}" VERSION_LESS 17)
message(FATAL_ERROR "Found LLVM ${LLVM_VERSION_MAJOR}, but need LLVM 17 or above")
endif()

if("${LLVM_VERSION_MAJOR}" VERSION_LESS 17)
message(FATAL_ERROR "Found LLVM ${LLVM_VERSION_MAJOR}, but need LLVM 17 or above")
endif()
message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION} in ${llvm_install_dir}")
message(STATUS "Using LLVMConfig.cmake in: ${llvm_install_dir}")

message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
message(STATUS "Using LLVMConfig.cmake in: ${LLVM_INSTALL_DIR}")
message("${llvm_version} LLVM STATUS:
Definitions ${LLVM_DEFINITIONS}
Includes ${LLVM_INCLUDE_DIRS}
Libraries ${LLVM_LIBRARY_DIRS}
Targets ${LLVM_TARGETS_TO_BUILD}"
)

message("LLVM STATUS:
Definitions ${LLVM_DEFINITIONS}
Includes ${LLVM_INCLUDE_DIRS}
Libraries ${LLVM_LIBRARY_DIRS}
Targets ${LLVM_TARGETS_TO_BUILD}"
)
set(LLVM_INCLUDE_DIRS_${llvm_version} ${LLVM_INCLUDE_DIRS} PARENT_SCOPE)
set(LLVM_LIBRARY_DIRS_${llvm_version} ${LLVM_LIBRARY_DIRS} PARENT_SCOPE)
set(LLVM_DEFINITIONS_${llvm_version} ${LLVM_DEFINITIONS} PARENT_SCOPE)

include_directories(SYSTEM ${LLVM_INCLUDE_DIRS})
link_directories(${LLVM_LIBRARY_DIRS})
add_definitions(${LLVM_DEFINITIONS})
endfunction()

find_and_configure_llvm(${ROCM_LLVM} "ROCM")
find_and_configure_llvm(${TRITON_LLVM} "TRITON")

set(CMAKE_CXX_STANDARD 17 CACHE STRING "")

if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Debug CACHE
STRING "Build type (default Debug):" FORCE)
set(CMAKE_BUILD_TYPE Debug CACHE STRING "Build type (default Debug):" FORCE)
endif()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall \
Expand All @@ -78,10 +92,9 @@ set(CMAKE_LIBRARY_OUTPUT_DIRECTORY "${PROJECT_BINARY_DIR}/lib")

# Avoid warning about DOWNLOAD_EXTRACT_TIMESTAMP in CMake 3.24:
if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0")
cmake_policy(SET CMP0135 NEW)
cmake_policy(SET CMP0135 NEW)
endif()


# enable_testing()

add_subdirectory(lib)
Expand All @@ -107,4 +120,3 @@ endif()
install(TARGETS ${AMDGCN_INSTRUMENTATION_PLUGINS}
LIBRARY DESTINATION lib/${CMAKE_PROJECT_NAME}
PUBLIC_HEADER DESTINATION include/${CMAKE_PROJECT_NAME})

109 changes: 55 additions & 54 deletions instrumentation/MemTraceInstrumentationKernel.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,65 +10,66 @@ __device__ uint32_t getThreadIdInBlock() { return __builtin_amdgcn_workitem_id_x

__attribute__((always_inline))
__device__ uint32_t getWaveId() {
return getThreadIdInBlock() / WaveFrontSize;
return getThreadIdInBlock() / WaveFrontSize;
}

__attribute__((always_inline))
__device__ bool isSharedMemPtr(const void *Ptr) {
return __builtin_amdgcn_is_shared(
(const __attribute__((address_space(0))) void *)Ptr);
__device__ bool isSharedMemPtr(const void *Ptr) {
return __builtin_amdgcn_is_shared(
(const __attribute__((address_space(0))) void *)Ptr);
}

__attribute__((used))
__device__ void memTrace(void* addressPtr, uint32_t LocationId){
if(isSharedMemPtr(addressPtr))
return;
uint64_t address = reinterpret_cast<uint64_t>(addressPtr);
//Mask of the active threads in the wave
int activeMask = __builtin_amdgcn_read_exec();
// //Find first active thread in the wave by finding the position of the least significant bit set to 1 in the activeMask
const int firstActiveLane = __ffs(activeMask) - 1;
uint64_t addrArray[WaveFrontSize];
for(int i = 0; i < WaveFrontSize; i++){
addrArray[i] = __shfl(address, i, WaveFrontSize);
}
uint32_t Lane = __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));

if(Lane == firstActiveLane){
unsigned int hw_id = 0;
uint64_t Time = 0;
#if !defined(__gfx1100__) && !defined(__gfx1101__)
Time = __builtin_amdgcn_s_memrealtime();
asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID)" : "=s"(hw_id));
#endif
char hex_str[]= "0123456789abcdef";
char out[WaveFrontSize*HexLen + 1];
(out)[WaveFrontSize*HexLen] = '\0';
for (size_t i = 0; i < WaveFrontSize; i++) {
(out)[i * HexLen + 0] = '0';
(out)[i * HexLen + 1] = 'x';
(out)[i * HexLen + 2] = hex_str[(addrArray[i] >> 44) & 0x0F];
(out)[i * HexLen + 3] = hex_str[(addrArray[i] >> 40) & 0x0F];
(out)[i * HexLen + 4] = hex_str[(addrArray[i] >> 36) & 0x0F];
(out)[i * HexLen + 5] = hex_str[(addrArray[i] >> 32) & 0x0F];
(out)[i * HexLen + 6] = hex_str[(addrArray[i] >> 28) & 0x0F];
(out)[i * HexLen + 7] = hex_str[(addrArray[i] >> 24) & 0x0F];
(out)[i * HexLen + 8] = hex_str[(addrArray[i] >> 20) & 0x0F];
(out)[i * HexLen + 9] = hex_str[(addrArray[i] >> 16) & 0x0F];
(out)[i * HexLen + 10] = hex_str[(addrArray[i] >> 12) & 0x0F];
(out)[i * HexLen + 11] = hex_str[(addrArray[i] >> 8) & 0x0F];
(out)[i * HexLen + 12] = hex_str[(addrArray[i] >> 4) & 0x0F];
(out)[i * HexLen + 13] = hex_str[(addrArray[i] ) & 0x0F];
(out)[i * HexLen + 14] = ',';
__device__ void memTrace(void* addressPtr, uint32_t LocationId) {
printf("Hello! You are inside memTrace()\n");
if (isSharedMemPtr(addressPtr))
return;
uint64_t address = reinterpret_cast<uint64_t>(addressPtr);
// Mask of the active threads in the wave
int activeMask = __builtin_amdgcn_read_exec();
// Find first active thread in the wave by finding the position of the least significant bit set to 1 in the activeMask
const int firstActiveLane = __ffs(activeMask) - 1;
uint64_t addrArray[WaveFrontSize];
for (int i = 0; i < WaveFrontSize; i++) {
addrArray[i] = __shfl(address, i, WaveFrontSize);
}
(out)[WaveFrontSize * HexLen - 1] = '\n';
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
unsigned int xcc_id;
asm volatile("s_getreg_b32 %0, hwreg(HW_REG_XCC_ID)" : "=s"(xcc_id));
printf("%ld,%d,%d,%d,%d,%d,%d, %s", Time, LocationId, (hw_id & 0xf), ((hw_id & 0x30) >> 4), ((hw_id & 0xf00) >> 8), ((hw_id & 0xe000) >> 13), xcc_id, out);
#else
printf("%ld,%d,%d,%d,%d,%d,%s", Time, LocationId, (hw_id & 0xf), ((hw_id & 0x30) >> 4), ((hw_id & 0xf00) >> 8), ((hw_id & 0xe000) >> 13),out);
#endif
uint32_t Lane = __builtin_amdgcn_mbcnt_hi(~0u, __builtin_amdgcn_mbcnt_lo(~0u, 0u));

}
if (Lane == firstActiveLane) {
unsigned int hw_id = 0;
uint64_t Time = 0;
#if !defined(__gfx1100__) && !defined(__gfx1101__)
Time = __builtin_amdgcn_s_memrealtime();
asm volatile("s_getreg_b32 %0, hwreg(HW_REG_HW_ID)" : "=s"(hw_id));
#endif
char hex_str[] = "0123456789abcdef";
char out[WaveFrontSize * HexLen + 1];
(out)[WaveFrontSize * HexLen] = '\0';
for (size_t i = 0; i < WaveFrontSize; i++) {
(out)[i * HexLen + 0] = '0';
(out)[i * HexLen + 1] = 'x';
(out)[i * HexLen + 2] = hex_str[(addrArray[i] >> 44) & 0x0F];
(out)[i * HexLen + 3] = hex_str[(addrArray[i] >> 40) & 0x0F];
(out)[i * HexLen + 4] = hex_str[(addrArray[i] >> 36) & 0x0F];
(out)[i * HexLen + 5] = hex_str[(addrArray[i] >> 32) & 0x0F];
(out)[i * HexLen + 6] = hex_str[(addrArray[i] >> 28) & 0x0F];
(out)[i * HexLen + 7] = hex_str[(addrArray[i] >> 24) & 0x0F];
(out)[i * HexLen + 8] = hex_str[(addrArray[i] >> 20) & 0x0F];
(out)[i * HexLen + 9] = hex_str[(addrArray[i] >> 16) & 0x0F];
(out)[i * HexLen + 10] = hex_str[(addrArray[i] >> 12) & 0x0F];
(out)[i * HexLen + 11] = hex_str[(addrArray[i] >> 8) & 0x0F];
(out)[i * HexLen + 12] = hex_str[(addrArray[i] >> 4) & 0x0F];
(out)[i * HexLen + 13] = hex_str[(addrArray[i]) & 0x0F];
(out)[i * HexLen + 14] = ',';
}
(out)[WaveFrontSize * HexLen - 1] = '\n';
#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__)
unsigned int xcc_id;
asm volatile("s_getreg_b32 %0, hwreg(HW_REG_XCC_ID)" : "=s"(xcc_id));
printf("%ld,%d,%d,%d,%d,%d,%d, %s", Time, LocationId, (hw_id & 0xf), ((hw_id & 0x30) >> 4), ((hw_id & 0xf00) >> 8), ((hw_id & 0xe000) >> 13), xcc_id, out);
#else
printf("Hello!\n");
printf("%ld,%d,%d,%d,%d,%d,%s", Time, LocationId, (hw_id & 0xf), ((hw_id & 0x30) >> 4), ((hw_id & 0xf00) >> 8), ((hw_id & 0xe000) >> 13), out);
#endif
}
}

3 changes: 3 additions & 0 deletions lib/AMDGCNMemTrace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,11 +62,13 @@ void InjectingInstrumentationFunction(const BasicBlock::iterator &I, const Funct


bool AMDGCNMemTrace::runOnModule(Module &M) {
printf("Hello from AMDGCNMemTrace.cpp, runModule()\n");
bool ModifiedCodeGen = false;
auto &CTX = M.getContext();
uint32_t LocationCounter = 0;
std::string errorMsg;
std::unique_ptr<llvm::Module> InstrumentationModule;
printf("Calling loadInstrumentationFile() with file %s\n", InstrumentationFunctionFile.c_str());
if (!loadInstrumentationFile(InstrumentationFunctionFile, CTX, InstrumentationModule, errorMsg)) {
printf("error loading program '%s': %s", InstrumentationFunctionFile.c_str(),
errorMsg.c_str());
Expand Down Expand Up @@ -94,6 +96,7 @@ bool AMDGCNMemTrace::runOnModule(Module &M) {
}

PassPluginLibraryInfo getPassPluginInfo() {
printf("Hello from AMDGCNMemTrace.cpp, getPassPluginInfo()\n");
const auto callback = [](PassBuilder &PB) {
PB.registerOptimizerLastEPCallback([&](ModulePassManager &MPM, auto&&... args) {
MPM.addPass(AMDGCNMemTrace());
Expand Down
78 changes: 51 additions & 27 deletions lib/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
set(AMDGCN_INSTRUMENTATION_PASSES
InjectAMDGCNFunction
InjectAMDGCNInlineASM
InjectAMDGCNSharedMemTtrace
AMDGCNNumCacheLines
AMDGCNMemTrace
)
InjectAMDGCNInlineASM
InjectAMDGCNSharedMemTtrace
AMDGCNNumCacheLines
AMDGCNMemTrace
)

set(InjectAMDGCNFunction_SOURCES
InjectAMDGCNFunction.cpp)
Expand All @@ -24,29 +24,53 @@ set(AMDGCNMemTrace_SOURCES

set(AMDGCN_INSTRUMENTATION_PLUGINS "")

foreach( plugin ${AMDGCN_INSTRUMENTATION_PASSES} )
add_library(
${plugin}
SHARED
${${plugin}_SOURCES}
)

target_include_directories(
${plugin}
PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/../include"
)

target_link_libraries(
${plugin}
PRIVATE
LLVMCore
LLVMIRReader
LLVMLinker
"$<$<PLATFORM_ID:Darwin>:-undefined dynamic_lookup>"
function(link_against_llvm llvm_version)
string(TOLOWER ${llvm_version} install_suffix)
foreach( plugin ${AMDGCN_INSTRUMENTATION_PASSES} )
add_library(
${plugin}-${install_suffix}
SHARED
${${plugin}_SOURCES}
)

target_include_directories(
${plugin}-${install_suffix}
PRIVATE
"${CMAKE_CURRENT_SOURCE_DIR}/../include"
${LLVM_INCLUDE_DIRS_${llvm_version}}
)

list(APPEND AMDGCN_INSTRUMENTATION_PLUGINS ${plugin})
endforeach()
# target_link_directories(
# ${plugin}-${install_suffix}
# PRIVATE
# ${LLVM_LIBRARY_DIRS_${llvm_version}}
# )

# target_link_libraries(
# ${plugin}-${install_suffix}
# PRIVATE
# LLVMCore
# LLVMIRReader
# LLVMLinker
# "$<$<PLATFORM_ID:Darwin>:-undefined dynamic_lookup>"
# # ${LLVM_LIBRARY_DIRS_${llvm_version}}/libLLVMCore.a
# # ${LLVM_LIBRARY_DIRS_${llvm_version}}/libLLVMIRReader.a
# # ${LLVM_LIBRARY_DIRS_${llvm_version}}/libLLVMLinker.a
# # ${LLVM_LIBRARY_DIRS_${llvm_version}}/libLLVMSupport.a
# )

# set_target_properties(
# ${plugin}-${install_suffix}
# PROPERTIES
# LINK_SEARCH_START_STATIC ON
# LINK_SEARCH_END_STATIC ON
# )

list(APPEND AMDGCN_INSTRUMENTATION_PLUGINS ${plugin}-${install_suffix})
endforeach()
endfunction()

link_against_llvm("ROCM")
link_against_llvm("TRITON")

set(AMDGCN_INSTRUMENTATION_PLUGINS "${AMDGCN_INSTRUMENTATION_PLUGINS}" PARENT_SCOPE)