Skip to content

Commit

Permalink
Merge pull request #2325 from AllanZyne/review/yang/restructure_asan_…
Browse files Browse the repository at this point in the history
…msan

[DeviceMSAN] Support MemorySanitizer for device offloading
  • Loading branch information
kbenzie authored Dec 11, 2024
2 parents 45f3d8a + 064da15 commit 8818ab5
Show file tree
Hide file tree
Showing 24 changed files with 3,640 additions and 158 deletions.
16 changes: 16 additions & 0 deletions source/loader/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,21 @@ if(UR_ENABLE_SANITIZER)
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_statistics.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_validator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_validator.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_allocator.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_allocator.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_buffer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_buffer.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_ddi.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_ddi.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_interceptor.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_interceptor.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_libdevice.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_options.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_options.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_report.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_report.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_shadow.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_shadow.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/backtrace.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp
Expand All @@ -160,6 +175,7 @@ if(UR_ENABLE_SANITIZER)
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanddi.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.cpp
${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.hpp
)
Expand Down
25 changes: 7 additions & 18 deletions source/loader/layers/sanitizer/asan/asan_ddi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2053,26 +2053,10 @@ __urdlllocal ur_result_t UR_APICALL urGetVirtualMemProcAddrTable(
}
} // namespace asan

ur_result_t context_t::init(ur_dditable_t *dditable,
const std::set<std::string> &enabledLayerNames,
[[maybe_unused]] codeloc_data codelocData) {
ur_result_t initAsanDDITable(ur_dditable_t *dditable) {
ur_result_t result = UR_RESULT_SUCCESS;

if (enabledLayerNames.count("UR_LAYER_ASAN")) {
enabledType = SanitizerType::AddressSanitizer;
initAsanInterceptor();
} else if (enabledLayerNames.count("UR_LAYER_MSAN")) {
enabledType = SanitizerType::MemorySanitizer;
} else if (enabledLayerNames.count("UR_LAYER_TSAN")) {
enabledType = SanitizerType::ThreadSanitizer;
}

// Only support AddressSanitizer now
if (enabledType != SanitizerType::AddressSanitizer) {
return result;
}

urDdiTable = *dditable;
getContext()->logger.always("==== DeviceSanitizer: ASAN");

if (UR_RESULT_SUCCESS == result) {
result = ur_sanitizer_layer::asan::urGetGlobalProcAddrTable(
Expand Down Expand Up @@ -2134,6 +2118,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable,
UR_API_VERSION_CURRENT, &dditable->VirtualMem);
}

if (result != UR_RESULT_SUCCESS) {
getContext()->logger.error("Initialize ASAN DDI table failed: {}",
result);
}

return result;
}

Expand Down
2 changes: 2 additions & 0 deletions source/loader/layers/sanitizer/asan/asan_ddi.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,6 @@ namespace ur_sanitizer_layer {
void initAsanInterceptor();
void destroyAsanInterceptor();

ur_result_t initAsanDDITable(ur_dditable_t *dditable);

} // namespace ur_sanitizer_layer
269 changes: 131 additions & 138 deletions source/loader/layers/sanitizer/asan/asan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -676,162 +676,155 @@ ur_result_t AsanInterceptor::prepareLaunch(
std::shared_ptr<DeviceInfo> &DeviceInfo, ur_queue_handle_t Queue,
ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo) {

do {
auto KernelInfo = getKernelInfo(Kernel);
assert(KernelInfo && "Kernel should be instrumented");

// Validate pointer arguments
if (getOptions().DetectKernelArguments) {
for (const auto &[ArgIndex, PtrPair] : KernelInfo->PointerArgs) {
auto Ptr = PtrPair.first;
if (Ptr == nullptr) {
continue;
}
if (auto ValidateResult = ValidateUSMPointer(
ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) {
ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr,
ValidateResult, PtrPair.second);
exitWithErrors();
}
auto KernelInfo = getKernelInfo(Kernel);
assert(KernelInfo && "Kernel should be instrumented");

// Validate pointer arguments
if (getOptions().DetectKernelArguments) {
for (const auto &[ArgIndex, PtrPair] : KernelInfo->PointerArgs) {
auto Ptr = PtrPair.first;
if (Ptr == nullptr) {
continue;
}
if (auto ValidateResult = ValidateUSMPointer(
ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) {
ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr,
ValidateResult, PtrPair.second);
exitWithErrors();
}
}
}

// Set membuffer arguments
for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs) {
char *ArgPointer = nullptr;
UR_CALL(MemBuffer->getHandle(DeviceInfo->Handle, ArgPointer));
ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer(
Kernel, ArgIndex, nullptr, ArgPointer);
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error(
"Failed to set buffer {} as the {} arg to kernel {}: {}",
ur_cast<ur_mem_handle_t>(MemBuffer.get()), ArgIndex, Kernel,
URes);
}
// Set membuffer arguments
for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs) {
char *ArgPointer = nullptr;
UR_CALL(MemBuffer->getHandle(DeviceInfo->Handle, ArgPointer));
ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer(
Kernel, ArgIndex, nullptr, ArgPointer);
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error(
"Failed to set buffer {} as the {} arg to kernel {}: {}",
ur_cast<ur_mem_handle_t>(MemBuffer.get()), ArgIndex, Kernel,
URes);
}
}

auto ArgNums = GetKernelNumArgs(Kernel);
// We must prepare all kernel args before call
// urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on
// CPU device.
if (ArgNums) {
ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer(
Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.getDevicePtr());
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("Failed to set launch info: {}",
URes);
return URes;
}
auto ArgNums = GetKernelNumArgs(Kernel);
// We must prepare all kernel args before call
// urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on
// CPU device.
if (ArgNums) {
ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer(
Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.getDevicePtr());
if (URes != UR_RESULT_SUCCESS) {
getContext()->logger.error("Failed to set launch info: {}", URes);
return URes;
}
}

if (LaunchInfo.LocalWorkSize.empty()) {
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
auto URes =
getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize(
Kernel, Queue, LaunchInfo.WorkDim,
LaunchInfo.GlobalWorkOffset, LaunchInfo.GlobalWorkSize,
LaunchInfo.LocalWorkSize.data());
if (URes != UR_RESULT_SUCCESS) {
if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
return URes;
}
// If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback
// to inefficient implementation
for (size_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) {
LaunchInfo.LocalWorkSize[Dim] = 1;
}
if (LaunchInfo.LocalWorkSize.empty()) {
LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim);
auto URes =
getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize(
Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset,
LaunchInfo.GlobalWorkSize, LaunchInfo.LocalWorkSize.data());
if (URes != UR_RESULT_SUCCESS) {
if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
return URes;
}
// If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback
// to inefficient implementation
for (size_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) {
LaunchInfo.LocalWorkSize[Dim] = 1;
}
}
}

const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize.data();
uint32_t NumWG = 1;
for (uint32_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) {
NumWG *= (LaunchInfo.GlobalWorkSize[Dim] + LocalWorkSize[Dim] - 1) /
LocalWorkSize[Dim];
}
const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize.data();
uint32_t NumWG = 1;
for (uint32_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) {
NumWG *= (LaunchInfo.GlobalWorkSize[Dim] + LocalWorkSize[Dim] - 1) /
LocalWorkSize[Dim];
}

// Prepare asan runtime data
LaunchInfo.Data.Host.GlobalShadowOffset =
DeviceInfo->Shadow->ShadowBegin;
LaunchInfo.Data.Host.GlobalShadowOffsetEnd =
DeviceInfo->Shadow->ShadowEnd;
LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type;
LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0;

auto LocalMemoryUsage =
GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle);
auto PrivateMemoryUsage =
GetKernelPrivateMemorySize(Kernel, DeviceInfo->Handle);

getContext()->logger.info(
"KernelInfo {} (LocalMemory={}, PrivateMemory={})", (void *)Kernel,
LocalMemoryUsage, PrivateMemoryUsage);

// Write shadow memory offset for local memory
if (getOptions().DetectLocals) {
if (DeviceInfo->Shadow->AllocLocalShadow(
Queue, NumWG, LaunchInfo.Data.Host.LocalShadowOffset,
LaunchInfo.Data.Host.LocalShadowOffsetEnd) !=
UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for local "
"memory, maybe the number of workgroup ({}) is too "
"large",
NumWG);
getContext()->logger.warning(
"Skip checking local memory of kernel <{}>",
GetKernelName(Kernel));
} else {
getContext()->logger.info(
"ShadowMemory(Local, WorkGroup{}, {} - {})", NumWG,
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
(void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
}
// Prepare asan runtime data
LaunchInfo.Data.Host.GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin;
LaunchInfo.Data.Host.GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd;
LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type;
LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0;

auto LocalMemoryUsage =
GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle);
auto PrivateMemoryUsage =
GetKernelPrivateMemorySize(Kernel, DeviceInfo->Handle);

getContext()->logger.info(
"KernelInfo {} (LocalMemory={}, PrivateMemory={})", (void *)Kernel,
LocalMemoryUsage, PrivateMemoryUsage);

// Write shadow memory offset for local memory
if (getOptions().DetectLocals) {
if (DeviceInfo->Shadow->AllocLocalShadow(
Queue, NumWG, LaunchInfo.Data.Host.LocalShadowOffset,
LaunchInfo.Data.Host.LocalShadowOffsetEnd) !=
UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for local "
"memory, maybe the number of workgroup ({}) is too "
"large",
NumWG);
getContext()->logger.warning(
"Skip checking local memory of kernel <{}>",
GetKernelName(Kernel));
} else {
getContext()->logger.info(
"ShadowMemory(Local, WorkGroup{}, {} - {})", NumWG,
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
(void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
}
}

// Write shadow memory offset for private memory
if (getOptions().DetectPrivates) {
if (DeviceInfo->Shadow->AllocPrivateShadow(
Queue, NumWG, LaunchInfo.Data.Host.PrivateShadowOffset,
LaunchInfo.Data.Host.PrivateShadowOffsetEnd) !=
UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for private "
"memory, maybe the number of workgroup ({}) is too "
"large",
NumWG);
getContext()->logger.warning(
"Skip checking private memory of kernel <{}>",
GetKernelName(Kernel));
} else {
getContext()->logger.info(
"ShadowMemory(Private, WorkGroup{}, {} - {})", NumWG,
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
}
// Write shadow memory offset for private memory
if (getOptions().DetectPrivates) {
if (DeviceInfo->Shadow->AllocPrivateShadow(
Queue, NumWG, LaunchInfo.Data.Host.PrivateShadowOffset,
LaunchInfo.Data.Host.PrivateShadowOffsetEnd) !=
UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for private "
"memory, maybe the number of workgroup ({}) is too "
"large",
NumWG);
getContext()->logger.warning(
"Skip checking private memory of kernel <{}>",
GetKernelName(Kernel));
} else {
getContext()->logger.info(
"ShadowMemory(Private, WorkGroup{}, {} - {})", NumWG,
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
}
}

// Write local arguments info
if (!KernelInfo->LocalArgs.empty()) {
std::vector<LocalArgsInfo> LocalArgsInfo;
for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs) {
LocalArgsInfo.push_back(ArgInfo);
getContext()->logger.debug(
"local_args (argIndex={}, size={}, sizeWithRZ={})",
ArgIndex, ArgInfo.Size, ArgInfo.SizeWithRedZone);
}
UR_CALL(LaunchInfo.Data.importLocalArgsInfo(Queue, LocalArgsInfo));
// Write local arguments info
if (!KernelInfo->LocalArgs.empty()) {
std::vector<LocalArgsInfo> LocalArgsInfo;
for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs) {
LocalArgsInfo.push_back(ArgInfo);
getContext()->logger.debug(
"local_args (argIndex={}, size={}, sizeWithRZ={})", ArgIndex,
ArgInfo.Size, ArgInfo.SizeWithRedZone);
}
UR_CALL(LaunchInfo.Data.importLocalArgsInfo(Queue, LocalArgsInfo));
}

// sync asan runtime data to device side
UR_CALL(LaunchInfo.Data.syncToDevice(Queue));
// sync asan runtime data to device side
UR_CALL(LaunchInfo.Data.syncToDevice(Queue));

getContext()->logger.debug(
"launch_info {} (numLocalArgs={}, localArgs={})",
(void *)LaunchInfo.Data.getDevicePtr(),
LaunchInfo.Data.Host.NumLocalArgs,
(void *)LaunchInfo.Data.Host.LocalArgs);
} while (false);
getContext()->logger.debug("launch_info {} (numLocalArgs={}, localArgs={})",
(void *)LaunchInfo.Data.getDevicePtr(),
LaunchInfo.Data.Host.NumLocalArgs,
(void *)LaunchInfo.Data.Host.LocalArgs);

return UR_RESULT_SUCCESS;
}
Expand Down
4 changes: 2 additions & 2 deletions source/loader/layers/sanitizer/asan/asan_shadow.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

#pragma once

#include "asan/asan_allocator.hpp"
#include "asan_allocator.hpp"
#include "sanitizer_common/sanitizer_libdevice.hpp"

#include <unordered_set>
Expand Down Expand Up @@ -134,7 +134,7 @@ struct ShadowMemoryPVC final : public ShadowMemoryGPU {
size_t GetShadowSize() override { return 0x180000000000ULL; }
};

/// Shadow Memory layout of GPU PVC device
/// Shadow Memory layout of GPU DG2 device
///
/// USM Allocation Range (48 bits)
/// Host/Shared USM : 0x0000_0000_0000_0000 ~ 0x0000_7fff_ffff_ffff
Expand Down
Loading

0 comments on commit 8818ab5

Please sign in to comment.