From 064da157b3bb3a32354dc2e95a9bf59ea119d9db Mon Sep 17 00:00:00 2001 From: "Zhao, Yang2" Date: Wed, 23 Oct 2024 01:27:32 -0700 Subject: [PATCH] [DeviceMSAN] Support MemorySanitizer for device offloading --- source/loader/CMakeLists.txt | 16 + .../loader/layers/sanitizer/asan/asan_ddi.cpp | 25 +- .../loader/layers/sanitizer/asan/asan_ddi.hpp | 2 + .../sanitizer/asan/asan_interceptor.cpp | 269 ++- .../layers/sanitizer/asan/asan_shadow.hpp | 4 +- .../layers/sanitizer/msan/msan_allocator.cpp | 26 + .../layers/sanitizer/msan/msan_allocator.hpp | 41 + .../layers/sanitizer/msan/msan_buffer.cpp | 204 +++ .../layers/sanitizer/msan/msan_buffer.hpp | 82 + .../loader/layers/sanitizer/msan/msan_ddi.cpp | 1528 +++++++++++++++++ .../loader/layers/sanitizer/msan/msan_ddi.hpp | 22 + .../sanitizer/msan/msan_interceptor.cpp | 490 ++++++ .../sanitizer/msan/msan_interceptor.hpp | 323 ++++ .../layers/sanitizer/msan/msan_libdevice.hpp | 66 + .../layers/sanitizer/msan/msan_options.cpp | 90 + .../layers/sanitizer/msan/msan_options.hpp | 27 + .../layers/sanitizer/msan/msan_report.cpp | 43 + .../layers/sanitizer/msan/msan_report.hpp | 27 + .../layers/sanitizer/msan/msan_shadow.cpp | 291 ++++ .../layers/sanitizer/msan/msan_shadow.hpp | 144 ++ .../linux/sanitizer_utils.cpp | 18 + .../sanitizer_common/sanitizer_common.hpp | 2 + source/loader/layers/sanitizer/ur_sanddi.cpp | 54 + .../layers/sanitizer/ur_sanitizer_layer.cpp | 4 + 24 files changed, 3640 insertions(+), 158 deletions(-) create mode 100644 source/loader/layers/sanitizer/msan/msan_allocator.cpp create mode 100644 source/loader/layers/sanitizer/msan/msan_allocator.hpp create mode 100644 source/loader/layers/sanitizer/msan/msan_buffer.cpp create mode 100644 source/loader/layers/sanitizer/msan/msan_buffer.hpp create mode 100644 source/loader/layers/sanitizer/msan/msan_ddi.cpp create mode 100644 source/loader/layers/sanitizer/msan/msan_ddi.hpp create mode 100644 source/loader/layers/sanitizer/msan/msan_interceptor.cpp create mode 100644 source/loader/layers/sanitizer/msan/msan_interceptor.hpp create mode 100644 source/loader/layers/sanitizer/msan/msan_libdevice.hpp create mode 100644 source/loader/layers/sanitizer/msan/msan_options.cpp create mode 100644 source/loader/layers/sanitizer/msan/msan_options.hpp create mode 100644 source/loader/layers/sanitizer/msan/msan_report.cpp create mode 100644 source/loader/layers/sanitizer/msan/msan_report.hpp create mode 100644 source/loader/layers/sanitizer/msan/msan_shadow.cpp create mode 100644 source/loader/layers/sanitizer/msan/msan_shadow.hpp create mode 100644 source/loader/layers/sanitizer/ur_sanddi.cpp diff --git a/source/loader/CMakeLists.txt b/source/loader/CMakeLists.txt index aaca3b1569..d8f6056ae9 100644 --- a/source/loader/CMakeLists.txt +++ b/source/loader/CMakeLists.txt @@ -151,6 +151,21 @@ if(UR_ENABLE_SANITIZER) ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_statistics.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_validator.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/asan/asan_validator.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_allocator.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_allocator.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_buffer.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_buffer.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_ddi.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_ddi.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_interceptor.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_interceptor.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_libdevice.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_options.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_options.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_report.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_report.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_shadow.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/msan/msan_shadow.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/backtrace.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_allocator.hpp @@ -160,6 +175,7 @@ if(UR_ENABLE_SANITIZER) ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_stacktrace.hpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/sanitizer_common/sanitizer_utils.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanddi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.cpp ${CMAKE_CURRENT_SOURCE_DIR}/layers/sanitizer/ur_sanitizer_layer.hpp ) diff --git a/source/loader/layers/sanitizer/asan/asan_ddi.cpp b/source/loader/layers/sanitizer/asan/asan_ddi.cpp index 73415c1bcf..f8ded3ec7a 100644 --- a/source/loader/layers/sanitizer/asan/asan_ddi.cpp +++ b/source/loader/layers/sanitizer/asan/asan_ddi.cpp @@ -2053,26 +2053,10 @@ __urdlllocal ur_result_t UR_APICALL urGetVirtualMemProcAddrTable( } } // namespace asan -ur_result_t context_t::init(ur_dditable_t *dditable, - const std::set &enabledLayerNames, - [[maybe_unused]] codeloc_data codelocData) { +ur_result_t initAsanDDITable(ur_dditable_t *dditable) { ur_result_t result = UR_RESULT_SUCCESS; - if (enabledLayerNames.count("UR_LAYER_ASAN")) { - enabledType = SanitizerType::AddressSanitizer; - initAsanInterceptor(); - } else if (enabledLayerNames.count("UR_LAYER_MSAN")) { - enabledType = SanitizerType::MemorySanitizer; - } else if (enabledLayerNames.count("UR_LAYER_TSAN")) { - enabledType = SanitizerType::ThreadSanitizer; - } - - // Only support AddressSanitizer now - if (enabledType != SanitizerType::AddressSanitizer) { - return result; - } - - urDdiTable = *dditable; + getContext()->logger.always("==== DeviceSanitizer: ASAN"); if (UR_RESULT_SUCCESS == result) { result = ur_sanitizer_layer::asan::urGetGlobalProcAddrTable( @@ -2134,6 +2118,11 @@ ur_result_t context_t::init(ur_dditable_t *dditable, UR_API_VERSION_CURRENT, &dditable->VirtualMem); } + if (result != UR_RESULT_SUCCESS) { + getContext()->logger.error("Initialize ASAN DDI table failed: {}", + result); + } + return result; } diff --git a/source/loader/layers/sanitizer/asan/asan_ddi.hpp b/source/loader/layers/sanitizer/asan/asan_ddi.hpp index 735c4409d8..fe67d3d6bf 100644 --- a/source/loader/layers/sanitizer/asan/asan_ddi.hpp +++ b/source/loader/layers/sanitizer/asan/asan_ddi.hpp @@ -17,4 +17,6 @@ namespace ur_sanitizer_layer { void initAsanInterceptor(); void destroyAsanInterceptor(); +ur_result_t initAsanDDITable(ur_dditable_t *dditable); + } // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp index 1a1185e1ba..19af8546c2 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -676,162 +676,155 @@ ur_result_t AsanInterceptor::prepareLaunch( std::shared_ptr &DeviceInfo, ur_queue_handle_t Queue, ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo) { - do { - auto KernelInfo = getKernelInfo(Kernel); - assert(KernelInfo && "Kernel should be instrumented"); - - // Validate pointer arguments - if (getOptions().DetectKernelArguments) { - for (const auto &[ArgIndex, PtrPair] : KernelInfo->PointerArgs) { - auto Ptr = PtrPair.first; - if (Ptr == nullptr) { - continue; - } - if (auto ValidateResult = ValidateUSMPointer( - ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) { - ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr, - ValidateResult, PtrPair.second); - exitWithErrors(); - } + auto KernelInfo = getKernelInfo(Kernel); + assert(KernelInfo && "Kernel should be instrumented"); + + // Validate pointer arguments + if (getOptions().DetectKernelArguments) { + for (const auto &[ArgIndex, PtrPair] : KernelInfo->PointerArgs) { + auto Ptr = PtrPair.first; + if (Ptr == nullptr) { + continue; + } + if (auto ValidateResult = ValidateUSMPointer( + ContextInfo->Handle, DeviceInfo->Handle, (uptr)Ptr)) { + ReportInvalidKernelArgument(Kernel, ArgIndex, (uptr)Ptr, + ValidateResult, PtrPair.second); + exitWithErrors(); } } + } - // Set membuffer arguments - for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs) { - char *ArgPointer = nullptr; - UR_CALL(MemBuffer->getHandle(DeviceInfo->Handle, ArgPointer)); - ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( - Kernel, ArgIndex, nullptr, ArgPointer); - if (URes != UR_RESULT_SUCCESS) { - getContext()->logger.error( - "Failed to set buffer {} as the {} arg to kernel {}: {}", - ur_cast(MemBuffer.get()), ArgIndex, Kernel, - URes); - } + // Set membuffer arguments + for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs) { + char *ArgPointer = nullptr; + UR_CALL(MemBuffer->getHandle(DeviceInfo->Handle, ArgPointer)); + ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( + Kernel, ArgIndex, nullptr, ArgPointer); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error( + "Failed to set buffer {} as the {} arg to kernel {}: {}", + ur_cast(MemBuffer.get()), ArgIndex, Kernel, + URes); } + } - auto ArgNums = GetKernelNumArgs(Kernel); - // We must prepare all kernel args before call - // urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on - // CPU device. - if (ArgNums) { - ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( - Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.getDevicePtr()); - if (URes != UR_RESULT_SUCCESS) { - getContext()->logger.error("Failed to set launch info: {}", - URes); - return URes; - } + auto ArgNums = GetKernelNumArgs(Kernel); + // We must prepare all kernel args before call + // urKernelGetSuggestedLocalWorkSize, otherwise the call will fail on + // CPU device. + if (ArgNums) { + ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( + Kernel, ArgNums - 1, nullptr, LaunchInfo.Data.getDevicePtr()); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("Failed to set launch info: {}", URes); + return URes; } + } - if (LaunchInfo.LocalWorkSize.empty()) { - LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim); - auto URes = - getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize( - Kernel, Queue, LaunchInfo.WorkDim, - LaunchInfo.GlobalWorkOffset, LaunchInfo.GlobalWorkSize, - LaunchInfo.LocalWorkSize.data()); - if (URes != UR_RESULT_SUCCESS) { - if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { - return URes; - } - // If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback - // to inefficient implementation - for (size_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) { - LaunchInfo.LocalWorkSize[Dim] = 1; - } + if (LaunchInfo.LocalWorkSize.empty()) { + LaunchInfo.LocalWorkSize.resize(LaunchInfo.WorkDim); + auto URes = + getContext()->urDdiTable.Kernel.pfnGetSuggestedLocalWorkSize( + Kernel, Queue, LaunchInfo.WorkDim, LaunchInfo.GlobalWorkOffset, + LaunchInfo.GlobalWorkSize, LaunchInfo.LocalWorkSize.data()); + if (URes != UR_RESULT_SUCCESS) { + if (URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + return URes; + } + // If urKernelGetSuggestedLocalWorkSize is not supported by driver, we fallback + // to inefficient implementation + for (size_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) { + LaunchInfo.LocalWorkSize[Dim] = 1; } } + } - const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize.data(); - uint32_t NumWG = 1; - for (uint32_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) { - NumWG *= (LaunchInfo.GlobalWorkSize[Dim] + LocalWorkSize[Dim] - 1) / - LocalWorkSize[Dim]; - } + const size_t *LocalWorkSize = LaunchInfo.LocalWorkSize.data(); + uint32_t NumWG = 1; + for (uint32_t Dim = 0; Dim < LaunchInfo.WorkDim; ++Dim) { + NumWG *= (LaunchInfo.GlobalWorkSize[Dim] + LocalWorkSize[Dim] - 1) / + LocalWorkSize[Dim]; + } - // Prepare asan runtime data - LaunchInfo.Data.Host.GlobalShadowOffset = - DeviceInfo->Shadow->ShadowBegin; - LaunchInfo.Data.Host.GlobalShadowOffsetEnd = - DeviceInfo->Shadow->ShadowEnd; - LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type; - LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0; - - auto LocalMemoryUsage = - GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle); - auto PrivateMemoryUsage = - GetKernelPrivateMemorySize(Kernel, DeviceInfo->Handle); - - getContext()->logger.info( - "KernelInfo {} (LocalMemory={}, PrivateMemory={})", (void *)Kernel, - LocalMemoryUsage, PrivateMemoryUsage); - - // Write shadow memory offset for local memory - if (getOptions().DetectLocals) { - if (DeviceInfo->Shadow->AllocLocalShadow( - Queue, NumWG, LaunchInfo.Data.Host.LocalShadowOffset, - LaunchInfo.Data.Host.LocalShadowOffsetEnd) != - UR_RESULT_SUCCESS) { - getContext()->logger.warning( - "Failed to allocate shadow memory for local " - "memory, maybe the number of workgroup ({}) is too " - "large", - NumWG); - getContext()->logger.warning( - "Skip checking local memory of kernel <{}>", - GetKernelName(Kernel)); - } else { - getContext()->logger.info( - "ShadowMemory(Local, WorkGroup{}, {} - {})", NumWG, - (void *)LaunchInfo.Data.Host.LocalShadowOffset, - (void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd); - } + // Prepare asan runtime data + LaunchInfo.Data.Host.GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; + LaunchInfo.Data.Host.GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd; + LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type; + LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0; + + auto LocalMemoryUsage = + GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle); + auto PrivateMemoryUsage = + GetKernelPrivateMemorySize(Kernel, DeviceInfo->Handle); + + getContext()->logger.info( + "KernelInfo {} (LocalMemory={}, PrivateMemory={})", (void *)Kernel, + LocalMemoryUsage, PrivateMemoryUsage); + + // Write shadow memory offset for local memory + if (getOptions().DetectLocals) { + if (DeviceInfo->Shadow->AllocLocalShadow( + Queue, NumWG, LaunchInfo.Data.Host.LocalShadowOffset, + LaunchInfo.Data.Host.LocalShadowOffsetEnd) != + UR_RESULT_SUCCESS) { + getContext()->logger.warning( + "Failed to allocate shadow memory for local " + "memory, maybe the number of workgroup ({}) is too " + "large", + NumWG); + getContext()->logger.warning( + "Skip checking local memory of kernel <{}>", + GetKernelName(Kernel)); + } else { + getContext()->logger.info( + "ShadowMemory(Local, WorkGroup{}, {} - {})", NumWG, + (void *)LaunchInfo.Data.Host.LocalShadowOffset, + (void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd); } + } - // Write shadow memory offset for private memory - if (getOptions().DetectPrivates) { - if (DeviceInfo->Shadow->AllocPrivateShadow( - Queue, NumWG, LaunchInfo.Data.Host.PrivateShadowOffset, - LaunchInfo.Data.Host.PrivateShadowOffsetEnd) != - UR_RESULT_SUCCESS) { - getContext()->logger.warning( - "Failed to allocate shadow memory for private " - "memory, maybe the number of workgroup ({}) is too " - "large", - NumWG); - getContext()->logger.warning( - "Skip checking private memory of kernel <{}>", - GetKernelName(Kernel)); - } else { - getContext()->logger.info( - "ShadowMemory(Private, WorkGroup{}, {} - {})", NumWG, - (void *)LaunchInfo.Data.Host.PrivateShadowOffset, - (void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd); - } + // Write shadow memory offset for private memory + if (getOptions().DetectPrivates) { + if (DeviceInfo->Shadow->AllocPrivateShadow( + Queue, NumWG, LaunchInfo.Data.Host.PrivateShadowOffset, + LaunchInfo.Data.Host.PrivateShadowOffsetEnd) != + UR_RESULT_SUCCESS) { + getContext()->logger.warning( + "Failed to allocate shadow memory for private " + "memory, maybe the number of workgroup ({}) is too " + "large", + NumWG); + getContext()->logger.warning( + "Skip checking private memory of kernel <{}>", + GetKernelName(Kernel)); + } else { + getContext()->logger.info( + "ShadowMemory(Private, WorkGroup{}, {} - {})", NumWG, + (void *)LaunchInfo.Data.Host.PrivateShadowOffset, + (void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd); } + } - // Write local arguments info - if (!KernelInfo->LocalArgs.empty()) { - std::vector LocalArgsInfo; - for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs) { - LocalArgsInfo.push_back(ArgInfo); - getContext()->logger.debug( - "local_args (argIndex={}, size={}, sizeWithRZ={})", - ArgIndex, ArgInfo.Size, ArgInfo.SizeWithRedZone); - } - UR_CALL(LaunchInfo.Data.importLocalArgsInfo(Queue, LocalArgsInfo)); + // Write local arguments info + if (!KernelInfo->LocalArgs.empty()) { + std::vector LocalArgsInfo; + for (auto [ArgIndex, ArgInfo] : KernelInfo->LocalArgs) { + LocalArgsInfo.push_back(ArgInfo); + getContext()->logger.debug( + "local_args (argIndex={}, size={}, sizeWithRZ={})", ArgIndex, + ArgInfo.Size, ArgInfo.SizeWithRedZone); } + UR_CALL(LaunchInfo.Data.importLocalArgsInfo(Queue, LocalArgsInfo)); + } - // sync asan runtime data to device side - UR_CALL(LaunchInfo.Data.syncToDevice(Queue)); + // sync asan runtime data to device side + UR_CALL(LaunchInfo.Data.syncToDevice(Queue)); - getContext()->logger.debug( - "launch_info {} (numLocalArgs={}, localArgs={})", - (void *)LaunchInfo.Data.getDevicePtr(), - LaunchInfo.Data.Host.NumLocalArgs, - (void *)LaunchInfo.Data.Host.LocalArgs); - } while (false); + getContext()->logger.debug("launch_info {} (numLocalArgs={}, localArgs={})", + (void *)LaunchInfo.Data.getDevicePtr(), + LaunchInfo.Data.Host.NumLocalArgs, + (void *)LaunchInfo.Data.Host.LocalArgs); return UR_RESULT_SUCCESS; } diff --git a/source/loader/layers/sanitizer/asan/asan_shadow.hpp b/source/loader/layers/sanitizer/asan/asan_shadow.hpp index 48054378fe..76abb7e35c 100644 --- a/source/loader/layers/sanitizer/asan/asan_shadow.hpp +++ b/source/loader/layers/sanitizer/asan/asan_shadow.hpp @@ -12,7 +12,7 @@ #pragma once -#include "asan/asan_allocator.hpp" +#include "asan_allocator.hpp" #include "sanitizer_common/sanitizer_libdevice.hpp" #include @@ -134,7 +134,7 @@ struct ShadowMemoryPVC final : public ShadowMemoryGPU { size_t GetShadowSize() override { return 0x180000000000ULL; } }; -/// Shadow Memory layout of GPU PVC device +/// Shadow Memory layout of GPU DG2 device /// /// USM Allocation Range (48 bits) /// Host/Shared USM : 0x0000_0000_0000_0000 ~ 0x0000_7fff_ffff_ffff diff --git a/source/loader/layers/sanitizer/msan/msan_allocator.cpp b/source/loader/layers/sanitizer/msan/msan_allocator.cpp new file mode 100644 index 0000000000..e0213c26b5 --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_allocator.cpp @@ -0,0 +1,26 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_allocator.cpp + * + */ + +#include "msan_allocator.hpp" +#include "ur_sanitizer_layer.hpp" + +namespace ur_sanitizer_layer { +namespace msan { + +void MsanAllocInfo::print() { + getContext()->logger.info("AllocInfo(Alloc=[{}-{}), AllocSize={})", + (void *)AllocBegin, + (void *)(AllocBegin + AllocSize), AllocSize); +} + +} // namespace msan +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/msan/msan_allocator.hpp b/source/loader/layers/sanitizer/msan/msan_allocator.hpp new file mode 100644 index 0000000000..32b85e6945 --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_allocator.hpp @@ -0,0 +1,41 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_allocator.hpp + * + */ + +#pragma once + +#include "sanitizer_common/sanitizer_allocator.hpp" +#include "sanitizer_common/sanitizer_common.hpp" +#include "sanitizer_common/sanitizer_stacktrace.hpp" + +namespace ur_sanitizer_layer { +namespace msan { + +struct MsanAllocInfo { + uptr AllocBegin = 0; + size_t AllocSize = 0; + + bool IsReleased = false; + + ur_context_handle_t Context = nullptr; + ur_device_handle_t Device = nullptr; + + StackTrace AllocStack; + StackTrace ReleaseStack; + + void print(); +}; + +using MsanAllocationMap = std::map>; +using MsanAllocationIterator = MsanAllocationMap::iterator; + +} // namespace msan +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/msan/msan_buffer.cpp b/source/loader/layers/sanitizer/msan/msan_buffer.cpp new file mode 100644 index 0000000000..66ebb10326 --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_buffer.cpp @@ -0,0 +1,204 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_buffer.cpp + * + */ + +#include "msan_buffer.hpp" +#include "msan_interceptor.hpp" +#include "sanitizer_common/sanitizer_utils.hpp" +#include "ur_sanitizer_layer.hpp" + +namespace ur_sanitizer_layer { +namespace msan { + +ur_result_t EnqueueMemCopyRectHelper( + ur_queue_handle_t Queue, char *pSrc, char *pDst, ur_rect_offset_t SrcOffset, + ur_rect_offset_t DstOffset, ur_rect_region_t Region, size_t SrcRowPitch, + size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch, + bool Blocking, uint32_t NumEventsInWaitList, + const ur_event_handle_t *EventWaitList, ur_event_handle_t *Event) { + // If user doesn't determine src/dst row pitch and slice pitch, just use + // region for it. + if (SrcRowPitch == 0) { + SrcRowPitch = Region.width; + } + + if (SrcSlicePitch == 0) { + SrcSlicePitch = SrcRowPitch * Region.height; + } + + if (DstRowPitch == 0) { + DstRowPitch = Region.width; + } + + if (DstSlicePitch == 0) { + DstSlicePitch = DstRowPitch * Region.height; + } + + // Calculate the src and dst addresses that actually will be copied. + char *SrcOrigin = pSrc + SrcOffset.x + SrcRowPitch * SrcOffset.y + + SrcSlicePitch * SrcOffset.z; + char *DstOrigin = pDst + DstOffset.x + DstRowPitch * DstOffset.y + + DstSlicePitch * DstOffset.z; + + std::vector Events; + Events.reserve(Region.depth); + // For now, USM doesn't support 3D memory copy operation, so we can only + // loop call 2D memory copy function to implement it. + for (size_t i = 0; i < Region.depth; i++) { + ur_event_handle_t NewEvent{}; + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy2D( + Queue, Blocking, DstOrigin + (i * DstSlicePitch), DstRowPitch, + SrcOrigin + (i * SrcSlicePitch), SrcRowPitch, Region.width, + Region.height, NumEventsInWaitList, EventWaitList, &NewEvent)); + + Events.push_back(NewEvent); + } + + UR_CALL(getContext()->urDdiTable.Enqueue.pfnEventsWait( + Queue, Events.size(), Events.data(), Event)); + + return UR_RESULT_SUCCESS; +} + +ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { + // Sub-buffers don't maintain own allocations but rely on parent buffer. + if (SubBuffer) { + UR_CALL(SubBuffer->Parent->getHandle(Device, Handle)); + Handle += SubBuffer->Origin; + return UR_RESULT_SUCCESS; + } + + // Device may be null, we follow the L0 adapter's practice to use the first + // device + if (!Device) { + auto Devices = GetDevices(Context); + assert(Devices.size() > 0 && "Devices should not be empty"); + Device = Devices[0]; + } + assert((void *)Device != nullptr && "Device cannot be nullptr"); + + std::scoped_lock Guard(Mutex); + auto &Allocation = Allocations[Device]; + ur_result_t URes = UR_RESULT_SUCCESS; + if (!Allocation) { + ur_usm_desc_t USMDesc{}; + USMDesc.align = getAlignment(); + ur_usm_pool_handle_t Pool{}; + URes = getMsanInterceptor()->allocateMemory( + Context, Device, &USMDesc, Pool, Size, + ur_cast(&Allocation)); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error( + "Failed to allocate {} bytes memory for buffer {}", Size, this); + return URes; + } + + if (HostPtr) { + ManagedQueue Queue(Context, Device); + URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, Allocation, HostPtr, Size, 0, nullptr, nullptr); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error( + "Failed to copy {} bytes data from host " + "pointer {} to buffer {}", + Size, HostPtr, this); + return URes; + } + } + } + + Handle = Allocation; + + if (!LastSyncedDevice.hDevice) { + LastSyncedDevice = MemBuffer::Device_t{Device, Handle}; + return URes; + } + + // If the device required to allocate memory is not the previous one, we + // need to do data migration. + if (Device != LastSyncedDevice.hDevice) { + auto &HostAllocation = Allocations[nullptr]; + if (!HostAllocation) { + ur_usm_desc_t USMDesc{}; + USMDesc.align = getAlignment(); + ur_usm_pool_handle_t Pool{}; + URes = getMsanInterceptor()->allocateMemory( + Context, nullptr, &USMDesc, Pool, Size, + ur_cast(&HostAllocation)); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("Failed to allocate {} bytes host " + "USM for buffer {} migration", + Size, this); + return URes; + } + } + + // Copy data from last synced device to host + { + ManagedQueue Queue(Context, LastSyncedDevice.hDevice); + URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, HostAllocation, LastSyncedDevice.MemHandle, Size, + 0, nullptr, nullptr); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error( + "Failed to migrate memory buffer data"); + return URes; + } + } + + // Sync data back to device + { + ManagedQueue Queue(Context, Device); + URes = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, Allocation, HostAllocation, Size, 0, nullptr, + nullptr); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error( + "Failed to migrate memory buffer data"); + return URes; + } + } + } + + LastSyncedDevice = MemBuffer::Device_t{Device, Handle}; + + return URes; +} + +ur_result_t MemBuffer::free() { + for (const auto &[_, Ptr] : Allocations) { + ur_result_t URes = getContext()->urDdiTable.USM.pfnFree(Context, Ptr); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("Failed to free buffer handle {}", Ptr); + return URes; + } + } + Allocations.clear(); + return UR_RESULT_SUCCESS; +} + +size_t MemBuffer::getAlignment() { + // Choose an alignment that is at most 128 and is the next power of 2 + // for sizes less than 128. + // TODO: If we don't set the alignment size explicitly, the device will + // usually choose a very large size (more than 1k). Then sanitizer will + // allocate extra unnessary memory. Not sure if this will impact + // performance. + size_t MsbIdx = 63 - __builtin_clzl(Size); + size_t Alignment = (1ULL << (MsbIdx + 1)); + if (Alignment > 128) { + Alignment = 128; + } + return Alignment; +} + +} // namespace msan +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/msan/msan_buffer.hpp b/source/loader/layers/sanitizer/msan/msan_buffer.hpp new file mode 100644 index 0000000000..e953ac3e66 --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_buffer.hpp @@ -0,0 +1,82 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_buffer.hpp + * + */ + +#pragma once + +#include +#include +#include + +#include "ur/ur.hpp" + +namespace ur_sanitizer_layer { +namespace msan { + +struct MemBuffer { + // Buffer constructor + MemBuffer(ur_context_handle_t Context, size_t Size, char *HostPtr) + : Context(Context), Size(Size), HostPtr(HostPtr) {} + + // Sub-buffer constructor + MemBuffer(std::shared_ptr Parent, size_t Origin, size_t Size) + : Context(Parent->Context), Size(Size), SubBuffer{{Parent, Origin}} {} + + ur_result_t getHandle(ur_device_handle_t Device, char *&Handle); + + ur_result_t free(); + + size_t getAlignment(); + + std::unordered_map Allocations; + + enum AccessMode { UNKNOWN, READ_WRITE, READ_ONLY, WRITE_ONLY }; + + struct Mapping { + size_t Offset; + size_t Size; + }; + + std::unordered_map Mappings; + + ur_context_handle_t Context; + + struct Device_t { + ur_device_handle_t hDevice; + char *MemHandle; + }; + Device_t LastSyncedDevice{}; + + size_t Size; + + char *HostPtr{}; + + struct SubBuffer_t { + std::shared_ptr Parent; + size_t Origin; + }; + + std::optional SubBuffer; + + std::atomic RefCount = 1; + + ur_shared_mutex Mutex; +}; + +ur_result_t EnqueueMemCopyRectHelper( + ur_queue_handle_t Queue, char *pSrc, char *pDst, ur_rect_offset_t SrcOffset, + ur_rect_offset_t DstOffset, ur_rect_region_t Region, size_t SrcRowPitch, + size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch, + bool Blocking, uint32_t NumEventsInWaitList, + const ur_event_handle_t *EventWaitList, ur_event_handle_t *Event); + +} // namespace msan +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.cpp b/source/loader/layers/sanitizer/msan/msan_ddi.cpp new file mode 100644 index 0000000000..87438a1f99 --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_ddi.cpp @@ -0,0 +1,1528 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_ddi.cpp + * + */ + +#include "msan_ddi.hpp" +#include "msan_interceptor.hpp" +#include "sanitizer_common/sanitizer_utils.hpp" +#include "ur_sanitizer_layer.hpp" + +#include + +namespace ur_sanitizer_layer { +namespace msan { + +namespace { + +ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices, + const ur_device_handle_t *phDevices) { + std::shared_ptr CI; + UR_CALL(getMsanInterceptor()->insertContext(Context, CI)); + for (uint32_t i = 0; i < numDevices; ++i) { + auto hDevice = phDevices[i]; + std::shared_ptr DI; + UR_CALL(getMsanInterceptor()->insertDevice(hDevice, DI)); + DI->Type = GetDeviceType(Context, hDevice); + if (DI->Type == DeviceType::UNKNOWN) { + getContext()->logger.error("Unsupport device"); + return UR_RESULT_ERROR_INVALID_DEVICE; + } + getContext()->logger.info( + "DeviceInfo {} (Type={}, IsSupportSharedSystemUSM={})", + (void *)DI->Handle, ToString(DI->Type), + DI->IsSupportSharedSystemUSM); + getContext()->logger.info("Add {} into context {}", (void *)DI->Handle, + (void *)Context); + if (!DI->Shadow) { + UR_CALL(DI->allocShadowMemory(Context)); + } + CI->DeviceList.emplace_back(hDevice); + CI->AllocInfosMap[hDevice]; + } + return UR_RESULT_SUCCESS; +} + +bool isInstrumentedKernel(ur_kernel_handle_t hKernel) { + auto hProgram = GetProgram(hKernel); + auto PI = getMsanInterceptor()->getProgramInfo(hProgram); + return PI->isKernelInstrumented(hKernel); +} + +} // namespace + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urAdapterGet +ur_result_t urAdapterGet( + uint32_t + NumEntries, ///< [in] the number of adapters to be added to phAdapters. + ///< If phAdapters is not NULL, then NumEntries should be greater than + ///< zero, otherwise ::UR_RESULT_ERROR_INVALID_SIZE, + ///< will be returned. + ur_adapter_handle_t * + phAdapters, ///< [out][optional][range(0, NumEntries)] array of handle of adapters. + ///< If NumEntries is less than the number of adapters available, then + ///< ::urAdapterGet shall only retrieve that number of platforms. + uint32_t * + pNumAdapters ///< [out][optional] returns the total number of adapters available. +) { + auto pfnAdapterGet = getContext()->urDdiTable.Global.pfnAdapterGet; + + // FIXME: This is a W/A to disable heap extended for MSAN so that we can reserve large VA of GPU. + setenv("NEOReadDebugKeys", "1", 1); + setenv("AllocateHostAllocationsInHeapExtendedHost", "0", 1); + setenv("UseHighAlignmentForHeapExtended", "0", 1); + + ur_result_t result = pfnAdapterGet(NumEntries, phAdapters, pNumAdapters); + if (result == UR_RESULT_SUCCESS && phAdapters) { + const uint32_t NumAdapters = pNumAdapters ? *pNumAdapters : NumEntries; + for (uint32_t i = 0; i < NumAdapters; ++i) { + UR_CALL(getMsanInterceptor()->holdAdapter(phAdapters[i])); + } + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urUSMDeviceAlloc +ur_result_t urUSMDeviceAlloc( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_device_handle_t hDevice, ///< [in] handle of the device object + const ur_usm_desc_t + *pUSMDesc, ///< [in][optional] USM memory allocation descriptor + ur_usm_pool_handle_t + pool, ///< [in][optional] Pointer to a pool created using urUSMPoolCreate + size_t + size, ///< [in] size in bytes of the USM memory object to be allocated + void **ppMem ///< [out] pointer to USM device memory object +) { + getContext()->logger.debug("==== urUSMDeviceAlloc"); + + return getMsanInterceptor()->allocateMemory(hContext, hDevice, pUSMDesc, + pool, size, ppMem); +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramCreateWithIL +ur_result_t urProgramCreateWithIL( + ur_context_handle_t hContext, ///< [in] handle of the context instance + const void *pIL, ///< [in] pointer to IL binary. + size_t length, ///< [in] length of `pIL` in bytes. + const ur_program_properties_t * + pProperties, ///< [in][optional] pointer to program creation properties. + ur_program_handle_t + *phProgram ///< [out] pointer to handle of program object created. +) { + auto pfnProgramCreateWithIL = + getContext()->urDdiTable.Program.pfnCreateWithIL; + + getContext()->logger.debug("==== urProgramCreateWithIL"); + + UR_CALL( + pfnProgramCreateWithIL(hContext, pIL, length, pProperties, phProgram)); + UR_CALL(getMsanInterceptor()->insertProgram(*phProgram)); + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramCreateWithBinary +ur_result_t urProgramCreateWithBinary( + ur_context_handle_t hContext, ///< [in] handle of the context instance + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] a pointer to a list of device handles. The + ///< binaries are loaded for devices specified in this list. + size_t * + pLengths, ///< [in][range(0, numDevices)] array of sizes of program binaries + ///< specified by `pBinaries` (in bytes). + const uint8_t ** + ppBinaries, ///< [in][range(0, numDevices)] pointer to program binaries to be loaded + ///< for devices specified by `phDevices`. + const ur_program_properties_t * + pProperties, ///< [in][optional] pointer to program creation properties. + ur_program_handle_t + *phProgram ///< [out] pointer to handle of Program object created. +) { + auto pfnProgramCreateWithBinary = + getContext()->urDdiTable.Program.pfnCreateWithBinary; + + getContext()->logger.debug("==== urProgramCreateWithBinary"); + + UR_CALL(pfnProgramCreateWithBinary(hContext, numDevices, phDevices, + pLengths, ppBinaries, pProperties, + phProgram)); + UR_CALL(getMsanInterceptor()->insertProgram(*phProgram)); + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramCreateWithNativeHandle +ur_result_t urProgramCreateWithNativeHandle( + ur_native_handle_t + hNativeProgram, ///< [in][nocheck] the native handle of the program. + ur_context_handle_t hContext, ///< [in] handle of the context instance + const ur_program_native_properties_t * + pProperties, ///< [in][optional] pointer to native program properties struct. + ur_program_handle_t * + phProgram ///< [out] pointer to the handle of the program object created. +) { + auto pfnProgramCreateWithNativeHandle = + getContext()->urDdiTable.Program.pfnCreateWithNativeHandle; + + getContext()->logger.debug("==== urProgramCreateWithNativeHandle"); + + UR_CALL(pfnProgramCreateWithNativeHandle(hNativeProgram, hContext, + pProperties, phProgram)); + UR_CALL(getMsanInterceptor()->insertProgram(*phProgram)); + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramRetain +ur_result_t +urProgramRetain(ur_program_handle_t + hProgram ///< [in][retain] handle for the Program to retain +) { + auto pfnRetain = getContext()->urDdiTable.Program.pfnRetain; + + getContext()->logger.debug("==== urProgramRetain"); + + UR_CALL(pfnRetain(hProgram)); + + auto ProgramInfo = getMsanInterceptor()->getProgramInfo(hProgram); + UR_ASSERT(ProgramInfo != nullptr, UR_RESULT_ERROR_INVALID_VALUE); + ProgramInfo->RefCount++; + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramBuild +ur_result_t urProgramBuild( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_program_handle_t hProgram, ///< [in] handle of the program object + const char *pOptions ///< [in] string of build options +) { + auto pfnProgramBuild = getContext()->urDdiTable.Program.pfnBuild; + + getContext()->logger.debug("==== urProgramBuild"); + + UR_CALL(pfnProgramBuild(hContext, hProgram, pOptions)); + + UR_CALL(getMsanInterceptor()->registerProgram(hProgram)); + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramBuildExp +ur_result_t urProgramBuildExp( + ur_program_handle_t hProgram, ///< [in] Handle of the program to build. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + const char * + pOptions ///< [in][optional] pointer to build options null-terminated string. +) { + auto pfnBuildExp = getContext()->urDdiTable.ProgramExp.pfnBuildExp; + + getContext()->logger.debug("==== urProgramBuildExp"); + + UR_CALL(pfnBuildExp(hProgram, numDevices, phDevices, pOptions)); + UR_CALL(getMsanInterceptor()->registerProgram(hProgram)); + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramLink +ur_result_t urProgramLink( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + uint32_t count, ///< [in] number of program handles in `phPrograms`. + const ur_program_handle_t * + phPrograms, ///< [in][range(0, count)] pointer to array of program handles. + const char * + pOptions, ///< [in][optional] pointer to linker options null-terminated string. + ur_program_handle_t + *phProgram ///< [out] pointer to handle of program object created. +) { + auto pfnProgramLink = getContext()->urDdiTable.Program.pfnLink; + + getContext()->logger.debug("==== urProgramLink"); + + UR_CALL(pfnProgramLink(hContext, count, phPrograms, pOptions, phProgram)); + + UR_CALL(getMsanInterceptor()->insertProgram(*phProgram)); + UR_CALL(getMsanInterceptor()->registerProgram(*phProgram)); + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramLinkExp +ur_result_t urProgramLinkExp( + ur_context_handle_t hContext, ///< [in] handle of the context instance. + uint32_t numDevices, ///< [in] number of devices + ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] pointer to array of device handles + uint32_t count, ///< [in] number of program handles in `phPrograms`. + const ur_program_handle_t * + phPrograms, ///< [in][range(0, count)] pointer to array of program handles. + const char * + pOptions, ///< [in][optional] pointer to linker options null-terminated string. + ur_program_handle_t + *phProgram ///< [out] pointer to handle of program object created. +) { + auto pfnProgramLinkExp = getContext()->urDdiTable.ProgramExp.pfnLinkExp; + + getContext()->logger.debug("==== urProgramLinkExp"); + + UR_CALL(pfnProgramLinkExp(hContext, numDevices, phDevices, count, + phPrograms, pOptions, phProgram)); + + UR_CALL(getMsanInterceptor()->insertProgram(*phProgram)); + UR_CALL(getMsanInterceptor()->registerProgram(*phProgram)); + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urProgramRelease +ur_result_t urProgramRelease( + ur_program_handle_t + hProgram ///< [in][release] handle for the Program to release +) { + auto pfnProgramRelease = getContext()->urDdiTable.Program.pfnRelease; + + getContext()->logger.debug("==== urProgramRelease"); + + UR_CALL(pfnProgramRelease(hProgram)); + + auto ProgramInfo = getMsanInterceptor()->getProgramInfo(hProgram); + UR_ASSERT(ProgramInfo != nullptr, UR_RESULT_ERROR_INVALID_VALUE); + if (--ProgramInfo->RefCount == 0) { + UR_CALL(getMsanInterceptor()->unregisterProgram(hProgram)); + UR_CALL(getMsanInterceptor()->eraseProgram(hProgram)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueKernelLaunch +ur_result_t urEnqueueKernelLaunch( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t + workDim, ///< [in] number of dimensions, from 1 to 3, to specify the global and + ///< work-group work-items + const size_t * + pGlobalWorkOffset, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< offset used to calculate the global ID of a work-item + const size_t * + pGlobalWorkSize, ///< [in] pointer to an array of workDim unsigned values that specify the + ///< number of global work-items in workDim that will execute the kernel + ///< function + const size_t * + pLocalWorkSize, ///< [in][optional] pointer to an array of workDim unsigned values that + ///< specify the number of local work-items forming a work-group that will + ///< execute the kernel function. + ///< If nullptr, the runtime implementation will choose the work-group + ///< size. + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before the kernel execution. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that no wait + ///< event. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< kernel execution instance. +) { + auto pfnKernelLaunch = getContext()->urDdiTable.Enqueue.pfnKernelLaunch; + + getContext()->logger.debug("==== urEnqueueKernelLaunch"); + + if (!isInstrumentedKernel(hKernel)) { + return pfnKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset, + pGlobalWorkSize, pLocalWorkSize, + numEventsInWaitList, phEventWaitList, phEvent); + } + + USMLaunchInfo LaunchInfo(GetContext(hQueue), GetDevice(hQueue), + pGlobalWorkSize, pLocalWorkSize, pGlobalWorkOffset, + workDim); + UR_CALL(LaunchInfo.initialize()); + + UR_CALL(getMsanInterceptor()->preLaunchKernel(hKernel, hQueue, LaunchInfo)); + + ur_event_handle_t hEvent{}; + ur_result_t result = + pfnKernelLaunch(hQueue, hKernel, workDim, pGlobalWorkOffset, + pGlobalWorkSize, LaunchInfo.LocalWorkSize.data(), + numEventsInWaitList, phEventWaitList, &hEvent); + + if (result == UR_RESULT_SUCCESS) { + UR_CALL(getMsanInterceptor()->postLaunchKernel(hKernel, hQueue, + LaunchInfo)); + } + + if (phEvent) { + *phEvent = hEvent; + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urContextCreate +ur_result_t urContextCreate( + uint32_t numDevices, ///< [in] the number of devices given in phDevices + const ur_device_handle_t + *phDevices, ///< [in][range(0, numDevices)] array of handle of devices. + const ur_context_properties_t * + pProperties, ///< [in][optional] pointer to context creation properties. + ur_context_handle_t + *phContext ///< [out] pointer to handle of context object created +) { + auto pfnCreate = getContext()->urDdiTable.Context.pfnCreate; + + getContext()->logger.debug("==== urContextCreate"); + + ur_result_t result = + pfnCreate(numDevices, phDevices, pProperties, phContext); + + if (result == UR_RESULT_SUCCESS) { + UR_CALL(setupContext(*phContext, numDevices, phDevices)); + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urContextCreateWithNativeHandle +ur_result_t urContextCreateWithNativeHandle( + ur_native_handle_t + hNativeContext, ///< [in][nocheck] the native handle of the getContext()-> + ur_adapter_handle_t hAdapter, + uint32_t numDevices, ///< [in] number of devices associated with the context + const ur_device_handle_t * + phDevices, ///< [in][range(0, numDevices)] list of devices associated with the context + const ur_context_native_properties_t * + pProperties, ///< [in][optional] pointer to native context properties struct + ur_context_handle_t * + phContext ///< [out] pointer to the handle of the context object created. +) { + auto pfnCreateWithNativeHandle = + getContext()->urDdiTable.Context.pfnCreateWithNativeHandle; + + getContext()->logger.debug("==== urContextCreateWithNativeHandle"); + + ur_result_t result = + pfnCreateWithNativeHandle(hNativeContext, hAdapter, numDevices, + phDevices, pProperties, phContext); + + if (result == UR_RESULT_SUCCESS) { + UR_CALL(setupContext(*phContext, numDevices, phDevices)); + } + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urContextRetain +ur_result_t urContextRetain( + ur_context_handle_t + hContext ///< [in] handle of the context to get a reference of. +) { + auto pfnRetain = getContext()->urDdiTable.Context.pfnRetain; + + getContext()->logger.debug("==== urContextRetain"); + + UR_CALL(pfnRetain(hContext)); + + auto ContextInfo = getMsanInterceptor()->getContextInfo(hContext); + UR_ASSERT(ContextInfo != nullptr, UR_RESULT_ERROR_INVALID_VALUE); + ContextInfo->RefCount++; + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urContextRelease +ur_result_t urContextRelease( + ur_context_handle_t hContext ///< [in] handle of the context to release. +) { + auto pfnRelease = getContext()->urDdiTable.Context.pfnRelease; + + getContext()->logger.debug("==== urContextRelease"); + + UR_CALL(pfnRelease(hContext)); + + auto ContextInfo = getMsanInterceptor()->getContextInfo(hContext); + UR_ASSERT(ContextInfo != nullptr, UR_RESULT_ERROR_INVALID_VALUE); + if (--ContextInfo->RefCount == 0) { + UR_CALL(getMsanInterceptor()->eraseContext(hContext)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urMemBufferCreate +ur_result_t urMemBufferCreate( + ur_context_handle_t hContext, ///< [in] handle of the context object + ur_mem_flags_t flags, ///< [in] allocation and usage information flags + size_t size, ///< [in] size in bytes of the memory object to be allocated + const ur_buffer_properties_t + *pProperties, ///< [in][optional] pointer to buffer creation properties + ur_mem_handle_t + *phBuffer ///< [out] pointer to handle of the memory buffer created +) { + if (nullptr == phBuffer) { + return UR_RESULT_ERROR_INVALID_NULL_POINTER; + } + + getContext()->logger.debug("==== urMemBufferCreate"); + + void *Host = nullptr; + if (pProperties) { + Host = pProperties->pHost; + } + + char *hostPtrOrNull = (flags & UR_MEM_FLAG_USE_HOST_POINTER) + ? ur_cast(Host) + : nullptr; + + std::shared_ptr pMemBuffer = + std::make_shared(hContext, size, hostPtrOrNull); + + if (Host && (flags & UR_MEM_FLAG_ALLOC_COPY_HOST_POINTER)) { + std::shared_ptr CtxInfo = + getMsanInterceptor()->getContextInfo(hContext); + for (const auto &hDevice : CtxInfo->DeviceList) { + ManagedQueue InternalQueue(hContext, hDevice); + char *Handle = nullptr; + UR_CALL(pMemBuffer->getHandle(hDevice, Handle)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + InternalQueue, true, Handle, Host, size, 0, nullptr, nullptr)); + } + } + + ur_result_t result = getMsanInterceptor()->insertMemBuffer(pMemBuffer); + *phBuffer = ur_cast(pMemBuffer.get()); + + return result; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urMemGetInfo +ur_result_t urMemGetInfo( + ur_mem_handle_t + hMemory, ///< [in] handle to the memory object being queried. + ur_mem_info_t propName, ///< [in] type of the info to retrieve. + size_t + propSize, ///< [in] the number of bytes of memory pointed to by pPropValue. + void * + pPropValue, ///< [out][optional][typename(propName, propSize)] array of bytes holding + ///< the info. + ///< If propSize is less than the real number of bytes needed to return + ///< the info then the ::UR_RESULT_ERROR_INVALID_SIZE error is returned and + ///< pPropValue is not used. + size_t * + pPropSizeRet ///< [out][optional] pointer to the actual size in bytes of the queried propName. +) { + auto pfnGetInfo = getContext()->urDdiTable.Mem.pfnGetInfo; + + getContext()->logger.debug("==== urMemGetInfo"); + + if (auto MemBuffer = getMsanInterceptor()->getMemBuffer(hMemory)) { + UrReturnHelper ReturnValue(propSize, pPropValue, pPropSizeRet); + switch (propName) { + case UR_MEM_INFO_CONTEXT: { + return ReturnValue(MemBuffer->Context); + } + case UR_MEM_INFO_SIZE: { + return ReturnValue(size_t{MemBuffer->Size}); + } + default: { + return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION; + } + } + } else { + UR_CALL( + pfnGetInfo(hMemory, propName, propSize, pPropValue, pPropSizeRet)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urMemRetain +ur_result_t urMemRetain( + ur_mem_handle_t hMem ///< [in] handle of the memory object to get access +) { + auto pfnRetain = getContext()->urDdiTable.Mem.pfnRetain; + + getContext()->logger.debug("==== urMemRetain"); + + if (auto MemBuffer = getMsanInterceptor()->getMemBuffer(hMem)) { + MemBuffer->RefCount++; + } else { + UR_CALL(pfnRetain(hMem)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urMemRelease +ur_result_t urMemRelease( + ur_mem_handle_t hMem ///< [in] handle of the memory object to release +) { + auto pfnRelease = getContext()->urDdiTable.Mem.pfnRelease; + + getContext()->logger.debug("==== urMemRelease"); + + if (auto MemBuffer = getMsanInterceptor()->getMemBuffer(hMem)) { + if (--MemBuffer->RefCount != 0) { + return UR_RESULT_SUCCESS; + } + UR_CALL(MemBuffer->free()); + UR_CALL(getMsanInterceptor()->eraseMemBuffer(hMem)); + } else { + UR_CALL(pfnRelease(hMem)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urMemBufferPartition +ur_result_t urMemBufferPartition( + ur_mem_handle_t + hBuffer, ///< [in] handle of the buffer object to allocate from + ur_mem_flags_t flags, ///< [in] allocation and usage information flags + ur_buffer_create_type_t bufferCreateType, ///< [in] buffer creation type + const ur_buffer_region_t + *pRegion, ///< [in] pointer to buffer create region information + ur_mem_handle_t + *phMem ///< [out] pointer to the handle of sub buffer created +) { + auto pfnBufferPartition = getContext()->urDdiTable.Mem.pfnBufferPartition; + + getContext()->logger.debug("==== urMemBufferPartition"); + + if (auto ParentBuffer = getMsanInterceptor()->getMemBuffer(hBuffer)) { + if (ParentBuffer->Size < (pRegion->origin + pRegion->size)) { + return UR_RESULT_ERROR_INVALID_BUFFER_SIZE; + } + std::shared_ptr SubBuffer = std::make_shared( + ParentBuffer, pRegion->origin, pRegion->size); + UR_CALL(getMsanInterceptor()->insertMemBuffer(SubBuffer)); + *phMem = reinterpret_cast(SubBuffer.get()); + } else { + UR_CALL(pfnBufferPartition(hBuffer, flags, bufferCreateType, pRegion, + phMem)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urMemGetNativeHandle +ur_result_t urMemGetNativeHandle( + ur_mem_handle_t hMem, ///< [in] handle of the mem. + ur_device_handle_t hDevice, + ur_native_handle_t + *phNativeMem ///< [out] a pointer to the native handle of the mem. +) { + auto pfnGetNativeHandle = getContext()->urDdiTable.Mem.pfnGetNativeHandle; + + getContext()->logger.debug("==== urMemGetNativeHandle"); + + if (auto MemBuffer = getMsanInterceptor()->getMemBuffer(hMem)) { + char *Handle = nullptr; + UR_CALL(MemBuffer->getHandle(hDevice, Handle)); + *phNativeMem = ur_cast(Handle); + } else { + UR_CALL(pfnGetNativeHandle(hMem, hDevice, phNativeMem)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueMemBufferRead +ur_result_t urEnqueueMemBufferRead( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) + size_t offset, ///< [in] offset in bytes in the buffer object + size_t size, ///< [in] size in bytes of data being read + void *pDst, ///< [in] pointer to host memory where data is to be read into + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. +) { + auto pfnMemBufferRead = getContext()->urDdiTable.Enqueue.pfnMemBufferRead; + + getContext()->logger.debug("==== urEnqueueMemBufferRead"); + + if (auto MemBuffer = getMsanInterceptor()->getMemBuffer(hBuffer)) { + ur_device_handle_t Device = GetDevice(hQueue); + char *pSrc = nullptr; + UR_CALL(MemBuffer->getHandle(Device, pSrc)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + hQueue, blockingRead, pDst, pSrc + offset, size, + numEventsInWaitList, phEventWaitList, phEvent)); + } else { + UR_CALL(pfnMemBufferRead(hQueue, hBuffer, blockingRead, offset, size, + pDst, numEventsInWaitList, phEventWaitList, + phEvent)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueMemBufferWrite +ur_result_t urEnqueueMemBufferWrite( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + bool + blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) + size_t offset, ///< [in] offset in bytes in the buffer object + size_t size, ///< [in] size in bytes of data being written + const void + *pSrc, ///< [in] pointer to host memory where data is to be written from + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. +) { + auto pfnMemBufferWrite = getContext()->urDdiTable.Enqueue.pfnMemBufferWrite; + + getContext()->logger.debug("==== urEnqueueMemBufferWrite"); + + if (auto MemBuffer = getMsanInterceptor()->getMemBuffer(hBuffer)) { + ur_device_handle_t Device = GetDevice(hQueue); + char *pDst = nullptr; + UR_CALL(MemBuffer->getHandle(Device, pDst)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + hQueue, blockingWrite, pDst + offset, pSrc, size, + numEventsInWaitList, phEventWaitList, phEvent)); + } else { + UR_CALL(pfnMemBufferWrite(hQueue, hBuffer, blockingWrite, offset, size, + pSrc, numEventsInWaitList, phEventWaitList, + phEvent)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueMemBufferReadRect +ur_result_t urEnqueueMemBufferReadRect( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object + bool blockingRead, ///< [in] indicates blocking (true), non-blocking (false) + ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer + ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region + ur_rect_region_t + region, ///< [in] 3D rectangular region descriptor: width, height, depth + size_t + bufferRowPitch, ///< [in] length of each row in bytes in the buffer object + size_t + bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being read + size_t + hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed by + ///< dst + size_t + hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + ///< pointed by dst + void *pDst, ///< [in] pointer to host memory where data is to be read into + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. +) { + auto pfnMemBufferReadRect = + getContext()->urDdiTable.Enqueue.pfnMemBufferReadRect; + + getContext()->logger.debug("==== urEnqueueMemBufferReadRect"); + + if (auto MemBuffer = getMsanInterceptor()->getMemBuffer(hBuffer)) { + char *SrcHandle = nullptr; + ur_device_handle_t Device = GetDevice(hQueue); + UR_CALL(MemBuffer->getHandle(Device, SrcHandle)); + + UR_CALL(EnqueueMemCopyRectHelper( + hQueue, SrcHandle, ur_cast(pDst), bufferOrigin, hostOrigin, + region, bufferRowPitch, bufferSlicePitch, hostRowPitch, + hostSlicePitch, blockingRead, numEventsInWaitList, phEventWaitList, + phEvent)); + } else { + UR_CALL(pfnMemBufferReadRect( + hQueue, hBuffer, blockingRead, bufferOrigin, hostOrigin, region, + bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, + pDst, numEventsInWaitList, phEventWaitList, phEvent)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueMemBufferWriteRect +ur_result_t urEnqueueMemBufferWriteRect( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBuffer, ///< [in][bounds(bufferOrigin, region)] handle of the buffer object + bool + blockingWrite, ///< [in] indicates blocking (true), non-blocking (false) + ur_rect_offset_t bufferOrigin, ///< [in] 3D offset in the buffer + ur_rect_offset_t hostOrigin, ///< [in] 3D offset in the host region + ur_rect_region_t + region, ///< [in] 3D rectangular region descriptor: width, height, depth + size_t + bufferRowPitch, ///< [in] length of each row in bytes in the buffer object + size_t + bufferSlicePitch, ///< [in] length of each 2D slice in bytes in the buffer object being + ///< written + size_t + hostRowPitch, ///< [in] length of each row in bytes in the host memory region pointed by + ///< src + size_t + hostSlicePitch, ///< [in] length of each 2D slice in bytes in the host memory region + ///< pointed by src + void + *pSrc, ///< [in] pointer to host memory where data is to be written from + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] points to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. +) { + auto pfnMemBufferWriteRect = + getContext()->urDdiTable.Enqueue.pfnMemBufferWriteRect; + + getContext()->logger.debug("==== urEnqueueMemBufferWriteRect"); + + if (auto MemBuffer = getMsanInterceptor()->getMemBuffer(hBuffer)) { + char *DstHandle = nullptr; + ur_device_handle_t Device = GetDevice(hQueue); + UR_CALL(MemBuffer->getHandle(Device, DstHandle)); + + UR_CALL(EnqueueMemCopyRectHelper( + hQueue, ur_cast(pSrc), DstHandle, hostOrigin, bufferOrigin, + region, hostRowPitch, hostSlicePitch, bufferRowPitch, + bufferSlicePitch, blockingWrite, numEventsInWaitList, + phEventWaitList, phEvent)); + } else { + UR_CALL(pfnMemBufferWriteRect( + hQueue, hBuffer, blockingWrite, bufferOrigin, hostOrigin, region, + bufferRowPitch, bufferSlicePitch, hostRowPitch, hostSlicePitch, + pSrc, numEventsInWaitList, phEventWaitList, phEvent)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueMemBufferCopy +ur_result_t urEnqueueMemBufferCopy( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOffset, size)] handle of the src buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOffset, size)] handle of the dest buffer object + size_t srcOffset, ///< [in] offset into hBufferSrc to begin copying from + size_t dstOffset, ///< [in] offset info hBufferDst to begin copying into + size_t size, ///< [in] size in bytes of data being copied + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. +) { + auto pfnMemBufferCopy = getContext()->urDdiTable.Enqueue.pfnMemBufferCopy; + + getContext()->logger.debug("==== urEnqueueMemBufferCopy"); + + auto SrcBuffer = getMsanInterceptor()->getMemBuffer(hBufferSrc); + auto DstBuffer = getMsanInterceptor()->getMemBuffer(hBufferDst); + + UR_ASSERT((SrcBuffer && DstBuffer) || (!SrcBuffer && !DstBuffer), + UR_RESULT_ERROR_INVALID_MEM_OBJECT); + + if (SrcBuffer && DstBuffer) { + ur_device_handle_t Device = GetDevice(hQueue); + char *SrcHandle = nullptr; + UR_CALL(SrcBuffer->getHandle(Device, SrcHandle)); + + char *DstHandle = nullptr; + UR_CALL(DstBuffer->getHandle(Device, DstHandle)); + + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + hQueue, false, DstHandle + dstOffset, SrcHandle + srcOffset, size, + numEventsInWaitList, phEventWaitList, phEvent)); + } else { + UR_CALL(pfnMemBufferCopy(hQueue, hBufferSrc, hBufferDst, srcOffset, + dstOffset, size, numEventsInWaitList, + phEventWaitList, phEvent)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueMemBufferCopyRect +ur_result_t urEnqueueMemBufferCopyRect( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBufferSrc, ///< [in][bounds(srcOrigin, region)] handle of the source buffer object + ur_mem_handle_t + hBufferDst, ///< [in][bounds(dstOrigin, region)] handle of the dest buffer object + ur_rect_offset_t srcOrigin, ///< [in] 3D offset in the source buffer + ur_rect_offset_t dstOrigin, ///< [in] 3D offset in the destination buffer + ur_rect_region_t + region, ///< [in] source 3D rectangular region descriptor: width, height, depth + size_t + srcRowPitch, ///< [in] length of each row in bytes in the source buffer object + size_t + srcSlicePitch, ///< [in] length of each 2D slice in bytes in the source buffer object + size_t + dstRowPitch, ///< [in] length of each row in bytes in the destination buffer object + size_t + dstSlicePitch, ///< [in] length of each 2D slice in bytes in the destination buffer object + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. +) { + auto pfnMemBufferCopyRect = + getContext()->urDdiTable.Enqueue.pfnMemBufferCopyRect; + + getContext()->logger.debug("==== urEnqueueMemBufferCopyRect"); + + auto SrcBuffer = getMsanInterceptor()->getMemBuffer(hBufferSrc); + auto DstBuffer = getMsanInterceptor()->getMemBuffer(hBufferDst); + + UR_ASSERT((SrcBuffer && DstBuffer) || (!SrcBuffer && !DstBuffer), + UR_RESULT_ERROR_INVALID_MEM_OBJECT); + + if (SrcBuffer && DstBuffer) { + ur_device_handle_t Device = GetDevice(hQueue); + char *SrcHandle = nullptr; + UR_CALL(SrcBuffer->getHandle(Device, SrcHandle)); + + char *DstHandle = nullptr; + UR_CALL(DstBuffer->getHandle(Device, DstHandle)); + + UR_CALL(EnqueueMemCopyRectHelper( + hQueue, SrcHandle, DstHandle, srcOrigin, dstOrigin, region, + srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, false, + numEventsInWaitList, phEventWaitList, phEvent)); + } else { + UR_CALL(pfnMemBufferCopyRect( + hQueue, hBufferSrc, hBufferDst, srcOrigin, dstOrigin, region, + srcRowPitch, srcSlicePitch, dstRowPitch, dstSlicePitch, + numEventsInWaitList, phEventWaitList, phEvent)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueMemBufferFill +ur_result_t urEnqueueMemBufferFill( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + const void *pPattern, ///< [in] pointer to the fill pattern + size_t patternSize, ///< [in] size in bytes of the pattern + size_t offset, ///< [in] offset into the buffer + size_t size, ///< [in] fill size in bytes, must be a multiple of patternSize + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. +) { + auto pfnMemBufferFill = getContext()->urDdiTable.Enqueue.pfnMemBufferFill; + + getContext()->logger.debug("==== urEnqueueMemBufferFill"); + + if (auto MemBuffer = getMsanInterceptor()->getMemBuffer(hBuffer)) { + char *Handle = nullptr; + ur_device_handle_t Device = GetDevice(hQueue); + UR_CALL(MemBuffer->getHandle(Device, Handle)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMFill( + hQueue, Handle + offset, patternSize, pPattern, size, + numEventsInWaitList, phEventWaitList, phEvent)); + } else { + UR_CALL(pfnMemBufferFill(hQueue, hBuffer, pPattern, patternSize, offset, + size, numEventsInWaitList, phEventWaitList, + phEvent)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueMemBufferMap +ur_result_t urEnqueueMemBufferMap( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hBuffer, ///< [in][bounds(offset, size)] handle of the buffer object + bool blockingMap, ///< [in] indicates blocking (true), non-blocking (false) + ur_map_flags_t mapFlags, ///< [in] flags for read, write, readwrite mapping + size_t offset, ///< [in] offset in bytes of the buffer region being mapped + size_t size, ///< [in] size in bytes of the buffer region being mapped + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t * + phEvent, ///< [out][optional] return an event object that identifies this particular + ///< command instance. + void **ppRetMap ///< [out] return mapped pointer. TODO: move it before + ///< numEventsInWaitList? +) { + auto pfnMemBufferMap = getContext()->urDdiTable.Enqueue.pfnMemBufferMap; + + getContext()->logger.debug("==== urEnqueueMemBufferMap"); + + if (auto MemBuffer = getMsanInterceptor()->getMemBuffer(hBuffer)) { + + // Translate the host access mode info. + MemBuffer::AccessMode AccessMode = MemBuffer::UNKNOWN; + if (mapFlags & UR_MAP_FLAG_WRITE_INVALIDATE_REGION) { + AccessMode = MemBuffer::WRITE_ONLY; + } else { + if (mapFlags & UR_MAP_FLAG_READ) { + AccessMode = MemBuffer::READ_ONLY; + if (mapFlags & UR_MAP_FLAG_WRITE) { + AccessMode = MemBuffer::READ_WRITE; + } + } else if (mapFlags & UR_MAP_FLAG_WRITE) { + AccessMode = MemBuffer::WRITE_ONLY; + } + } + + UR_ASSERT(AccessMode != MemBuffer::UNKNOWN, + UR_RESULT_ERROR_INVALID_ARGUMENT); + + ur_device_handle_t Device = GetDevice(hQueue); + // If the buffer used host pointer, then we just reuse it. If not, we + // need to manually allocate a new host USM. + if (MemBuffer->HostPtr) { + *ppRetMap = MemBuffer->HostPtr + offset; + } else { + ur_context_handle_t Context = GetContext(hQueue); + ur_usm_desc_t USMDesc{}; + USMDesc.align = MemBuffer->getAlignment(); + ur_usm_pool_handle_t Pool{}; + UR_CALL(getContext()->urDdiTable.USM.pfnHostAlloc( + Context, &USMDesc, Pool, size, ppRetMap)); + } + + // Actually, if the access mode is write only, we don't need to do this + // copy. However, in that way, we cannot generate a event to user. So, + // we'll aways do copy here. + char *SrcHandle = nullptr; + UR_CALL(MemBuffer->getHandle(Device, SrcHandle)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + hQueue, blockingMap, *ppRetMap, SrcHandle + offset, size, + numEventsInWaitList, phEventWaitList, phEvent)); + + { + std::scoped_lock Guard(MemBuffer->Mutex); + UR_ASSERT(MemBuffer->Mappings.find(*ppRetMap) == + MemBuffer->Mappings.end(), + UR_RESULT_ERROR_INVALID_VALUE); + MemBuffer->Mappings[*ppRetMap] = {offset, size}; + } + } else { + UR_CALL(pfnMemBufferMap(hQueue, hBuffer, blockingMap, mapFlags, offset, + size, numEventsInWaitList, phEventWaitList, + phEvent, ppRetMap)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urEnqueueMemUnmap +ur_result_t urEnqueueMemUnmap( + ur_queue_handle_t hQueue, ///< [in] handle of the queue object + ur_mem_handle_t + hMem, ///< [in] handle of the memory (buffer or image) object + void *pMappedPtr, ///< [in] mapped host address + uint32_t numEventsInWaitList, ///< [in] size of the event wait list + const ur_event_handle_t * + phEventWaitList, ///< [in][optional][range(0, numEventsInWaitList)] pointer to a list of + ///< events that must be complete before this command can be executed. + ///< If nullptr, the numEventsInWaitList must be 0, indicating that this + ///< command does not wait on any event to complete. + ur_event_handle_t * + phEvent ///< [out][optional] return an event object that identifies this particular + ///< command instance. +) { + auto pfnMemUnmap = getContext()->urDdiTable.Enqueue.pfnMemUnmap; + + getContext()->logger.debug("==== urEnqueueMemUnmap"); + + if (auto MemBuffer = getMsanInterceptor()->getMemBuffer(hMem)) { + MemBuffer::Mapping Mapping{}; + { + std::scoped_lock Guard(MemBuffer->Mutex); + auto It = MemBuffer->Mappings.find(pMappedPtr); + UR_ASSERT(It != MemBuffer->Mappings.end(), + UR_RESULT_ERROR_INVALID_VALUE); + Mapping = It->second; + MemBuffer->Mappings.erase(It); + } + + // Write back mapping memory data to device and release mapping memory + // if we allocated a host USM. But for now, UR doesn't support event + // call back, we can only do blocking copy here. + char *DstHandle = nullptr; + ur_context_handle_t Context = GetContext(hQueue); + ur_device_handle_t Device = GetDevice(hQueue); + UR_CALL(MemBuffer->getHandle(Device, DstHandle)); + UR_CALL(getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + hQueue, true, DstHandle + Mapping.Offset, pMappedPtr, Mapping.Size, + numEventsInWaitList, phEventWaitList, phEvent)); + + if (!MemBuffer->HostPtr) { + UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context, pMappedPtr)); + } + } else { + UR_CALL(pfnMemUnmap(hQueue, hMem, pMappedPtr, numEventsInWaitList, + phEventWaitList, phEvent)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urKernelCreate +ur_result_t urKernelCreate( + ur_program_handle_t hProgram, ///< [in] handle of the program instance + const char *pKernelName, ///< [in] pointer to null-terminated string. + ur_kernel_handle_t + *phKernel ///< [out] pointer to handle of kernel object created. +) { + auto pfnCreate = getContext()->urDdiTable.Kernel.pfnCreate; + + getContext()->logger.debug("==== urKernelCreate"); + + UR_CALL(pfnCreate(hProgram, pKernelName, phKernel)); + if (isInstrumentedKernel(*phKernel)) { + UR_CALL(getMsanInterceptor()->insertKernel(*phKernel)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urKernelRetain +ur_result_t urKernelRetain( + ur_kernel_handle_t hKernel ///< [in] handle for the Kernel to retain +) { + auto pfnRetain = getContext()->urDdiTable.Kernel.pfnRetain; + + getContext()->logger.debug("==== urKernelRetain"); + + UR_CALL(pfnRetain(hKernel)); + + auto KernelInfo = getMsanInterceptor()->getKernelInfo(hKernel); + if (KernelInfo) { + KernelInfo->RefCount++; + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urKernelRelease +ur_result_t urKernelRelease( + ur_kernel_handle_t hKernel ///< [in] handle for the Kernel to release +) { + auto pfnRelease = getContext()->urDdiTable.Kernel.pfnRelease; + + getContext()->logger.debug("==== urKernelRelease"); + UR_CALL(pfnRelease(hKernel)); + + auto KernelInfo = getMsanInterceptor()->getKernelInfo(hKernel); + if (KernelInfo) { + if (--KernelInfo->RefCount == 0) { + UR_CALL(getMsanInterceptor()->eraseKernel(hKernel)); + } + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urKernelSetArgValue +ur_result_t urKernelSetArgValue( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t argIndex, ///< [in] argument index in range [0, num args - 1] + size_t argSize, ///< [in] size of argument type + const ur_kernel_arg_value_properties_t + *pProperties, ///< [in][optional] pointer to value properties. + const void + *pArgValue ///< [in] argument value represented as matching arg type. +) { + auto pfnSetArgValue = getContext()->urDdiTable.Kernel.pfnSetArgValue; + + getContext()->logger.debug("==== urKernelSetArgValue"); + + std::shared_ptr MemBuffer; + std::shared_ptr KernelInfo; + if (argSize == sizeof(ur_mem_handle_t) && + (MemBuffer = getMsanInterceptor()->getMemBuffer( + *ur_cast(pArgValue))) && + (KernelInfo = getMsanInterceptor()->getKernelInfo(hKernel))) { + std::scoped_lock Guard(KernelInfo->Mutex); + KernelInfo->BufferArgs[argIndex] = std::move(MemBuffer); + } else { + UR_CALL( + pfnSetArgValue(hKernel, argIndex, argSize, pProperties, pArgValue)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Intercept function for urKernelSetArgMemObj +ur_result_t urKernelSetArgMemObj( + ur_kernel_handle_t hKernel, ///< [in] handle of the kernel object + uint32_t argIndex, ///< [in] argument index in range [0, num args - 1] + const ur_kernel_arg_mem_obj_properties_t + *pProperties, ///< [in][optional] pointer to Memory object properties. + ur_mem_handle_t hArgValue ///< [in][optional] handle of Memory object. +) { + auto pfnSetArgMemObj = getContext()->urDdiTable.Kernel.pfnSetArgMemObj; + + getContext()->logger.debug("==== urKernelSetArgMemObj"); + + std::shared_ptr MemBuffer; + std::shared_ptr KernelInfo; + if ((MemBuffer = getMsanInterceptor()->getMemBuffer(hArgValue)) && + (KernelInfo = getMsanInterceptor()->getKernelInfo(hKernel))) { + std::scoped_lock Guard(KernelInfo->Mutex); + KernelInfo->BufferArgs[argIndex] = std::move(MemBuffer); + } else { + UR_CALL(pfnSetArgMemObj(hKernel, argIndex, pProperties, hArgValue)); + } + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's Global table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +ur_result_t urGetGlobalProcAddrTable( + ur_global_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnAdapterGet = ur_sanitizer_layer::msan::urAdapterGet; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's Context table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +ur_result_t urGetContextProcAddrTable( + ur_context_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnCreate = ur_sanitizer_layer::msan::urContextCreate; + pDdiTable->pfnRetain = ur_sanitizer_layer::msan::urContextRetain; + pDdiTable->pfnRelease = ur_sanitizer_layer::msan::urContextRelease; + + pDdiTable->pfnCreateWithNativeHandle = + ur_sanitizer_layer::msan::urContextCreateWithNativeHandle; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's Program table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +ur_result_t urGetProgramProcAddrTable( + ur_program_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + pDdiTable->pfnCreateWithIL = + ur_sanitizer_layer::msan::urProgramCreateWithIL; + pDdiTable->pfnCreateWithBinary = + ur_sanitizer_layer::msan::urProgramCreateWithBinary; + pDdiTable->pfnCreateWithNativeHandle = + ur_sanitizer_layer::msan::urProgramCreateWithNativeHandle; + pDdiTable->pfnBuild = ur_sanitizer_layer::msan::urProgramBuild; + pDdiTable->pfnLink = ur_sanitizer_layer::msan::urProgramLink; + pDdiTable->pfnRetain = ur_sanitizer_layer::msan::urProgramRetain; + pDdiTable->pfnRelease = ur_sanitizer_layer::msan::urProgramRelease; + + return UR_RESULT_SUCCESS; +} + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's Kernel table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +ur_result_t urGetKernelProcAddrTable( + ur_kernel_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnCreate = ur_sanitizer_layer::msan::urKernelCreate; + pDdiTable->pfnRetain = ur_sanitizer_layer::msan::urKernelRetain; + pDdiTable->pfnRelease = ur_sanitizer_layer::msan::urKernelRelease; + pDdiTable->pfnSetArgValue = ur_sanitizer_layer::msan::urKernelSetArgValue; + pDdiTable->pfnSetArgMemObj = ur_sanitizer_layer::msan::urKernelSetArgMemObj; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's Mem table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +ur_result_t urGetMemProcAddrTable( + ur_mem_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnBufferCreate = ur_sanitizer_layer::msan::urMemBufferCreate; + pDdiTable->pfnRetain = ur_sanitizer_layer::msan::urMemRetain; + pDdiTable->pfnRelease = ur_sanitizer_layer::msan::urMemRelease; + pDdiTable->pfnBufferPartition = + ur_sanitizer_layer::msan::urMemBufferPartition; + pDdiTable->pfnGetNativeHandle = + ur_sanitizer_layer::msan::urMemGetNativeHandle; + pDdiTable->pfnGetInfo = ur_sanitizer_layer::msan::urMemGetInfo; + + return result; +} +/// @brief Exported function for filling application's ProgramExp table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +ur_result_t urGetProgramExpProcAddrTable( + ur_program_exp_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnBuildExp = ur_sanitizer_layer::msan::urProgramBuildExp; + pDdiTable->pfnLinkExp = ur_sanitizer_layer::msan::urProgramLinkExp; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's Enqueue table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +ur_result_t urGetEnqueueProcAddrTable( + ur_enqueue_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnMemBufferRead = + ur_sanitizer_layer::msan::urEnqueueMemBufferRead; + pDdiTable->pfnMemBufferWrite = + ur_sanitizer_layer::msan::urEnqueueMemBufferWrite; + pDdiTable->pfnMemBufferReadRect = + ur_sanitizer_layer::msan::urEnqueueMemBufferReadRect; + pDdiTable->pfnMemBufferWriteRect = + ur_sanitizer_layer::msan::urEnqueueMemBufferWriteRect; + pDdiTable->pfnMemBufferCopy = + ur_sanitizer_layer::msan::urEnqueueMemBufferCopy; + pDdiTable->pfnMemBufferCopyRect = + ur_sanitizer_layer::msan::urEnqueueMemBufferCopyRect; + pDdiTable->pfnMemBufferFill = + ur_sanitizer_layer::msan::urEnqueueMemBufferFill; + pDdiTable->pfnMemBufferMap = + ur_sanitizer_layer::msan::urEnqueueMemBufferMap; + pDdiTable->pfnMemUnmap = ur_sanitizer_layer::msan::urEnqueueMemUnmap; + pDdiTable->pfnKernelLaunch = + ur_sanitizer_layer::msan::urEnqueueKernelLaunch; + + return result; +} +/////////////////////////////////////////////////////////////////////////////// +/// @brief Exported function for filling application's USM table +/// with current process' addresses +/// +/// @returns +/// - ::UR_RESULT_SUCCESS +/// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER +ur_result_t urGetUSMProcAddrTable( + ur_usm_dditable_t + *pDdiTable ///< [in,out] pointer to table of DDI function pointers +) { + ur_result_t result = UR_RESULT_SUCCESS; + + pDdiTable->pfnDeviceAlloc = ur_sanitizer_layer::msan::urUSMDeviceAlloc; + + return result; +} + +ur_result_t urCheckVersion(ur_api_version_t version) { + if (UR_MAJOR_VERSION(ur_sanitizer_layer::getContext()->version) != + UR_MAJOR_VERSION(version) || + UR_MINOR_VERSION(ur_sanitizer_layer::getContext()->version) > + UR_MINOR_VERSION(version)) { + return UR_RESULT_ERROR_UNSUPPORTED_VERSION; + } + return UR_RESULT_SUCCESS; +} + +} // namespace msan + +ur_result_t initMsanDDITable(ur_dditable_t *dditable) { + ur_result_t result = UR_RESULT_SUCCESS; + + getContext()->logger.always("==== DeviceSanitizer: MSAN"); + + if (UR_RESULT_SUCCESS == result) { + result = + ur_sanitizer_layer::msan::urCheckVersion(UR_API_VERSION_CURRENT); + } + + if (UR_RESULT_SUCCESS == result) { + result = ur_sanitizer_layer::msan::urGetGlobalProcAddrTable( + &dditable->Global); + } + + if (UR_RESULT_SUCCESS == result) { + result = ur_sanitizer_layer::msan::urGetContextProcAddrTable( + &dditable->Context); + } + + if (UR_RESULT_SUCCESS == result) { + result = ur_sanitizer_layer::msan::urGetKernelProcAddrTable( + &dditable->Kernel); + } + + if (UR_RESULT_SUCCESS == result) { + result = ur_sanitizer_layer::msan::urGetProgramProcAddrTable( + &dditable->Program); + } + + if (UR_RESULT_SUCCESS == result) { + result = ur_sanitizer_layer::msan::urGetKernelProcAddrTable( + &dditable->Kernel); + } + + if (UR_RESULT_SUCCESS == result) { + result = + ur_sanitizer_layer::msan::urGetMemProcAddrTable(&dditable->Mem); + } + + if (UR_RESULT_SUCCESS == result) { + result = ur_sanitizer_layer::msan::urGetProgramExpProcAddrTable( + &dditable->ProgramExp); + } + + if (UR_RESULT_SUCCESS == result) { + result = ur_sanitizer_layer::msan::urGetEnqueueProcAddrTable( + &dditable->Enqueue); + } + + if (UR_RESULT_SUCCESS == result) { + result = + ur_sanitizer_layer::msan::urGetUSMProcAddrTable(&dditable->USM); + } + + if (result != UR_RESULT_SUCCESS) { + getContext()->logger.error("Initialize MSAN DDI table failed: {}", + result); + } + + return result; +} + +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/msan/msan_ddi.hpp b/source/loader/layers/sanitizer/msan/msan_ddi.hpp new file mode 100644 index 0000000000..0e0bc84803 --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_ddi.hpp @@ -0,0 +1,22 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_ddi.hpp + * + */ + +#include "ur_ddi.h" + +namespace ur_sanitizer_layer { + +void initMsanInterceptor(); +void destroyMsanInterceptor(); + +ur_result_t initMsanDDITable(ur_dditable_t *dditable); + +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.cpp b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp new file mode 100644 index 0000000000..30a2e07359 --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.cpp @@ -0,0 +1,490 @@ +//===----------------------------------------------------------------------===// +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_interceptor.cpp + * + */ + +#include "msan_interceptor.hpp" +#include "msan_ddi.hpp" +#include "msan_options.hpp" +#include "msan_report.hpp" +#include "msan_shadow.hpp" +#include "sanitizer_common/sanitizer_stacktrace.hpp" +#include "sanitizer_common/sanitizer_utils.hpp" +#include "ur_sanitizer_layer.hpp" + +namespace ur_sanitizer_layer { +namespace msan { + +MsanInterceptor::MsanInterceptor() {} + +MsanInterceptor::~MsanInterceptor() { + // We must release these objects before releasing adapters, since + // they may use the adapter in their destructor + for (const auto &[_, DeviceInfo] : m_DeviceMap) { + DeviceInfo->Shadow->Destory(); + } + + m_MemBufferMap.clear(); + m_AllocationMap.clear(); + m_KernelMap.clear(); + m_ContextMap.clear(); + + for (auto Adapter : m_Adapters) { + getContext()->urDdiTable.Global.pfnAdapterRelease(Adapter); + } +} + +ur_result_t MsanInterceptor::allocateMemory(ur_context_handle_t Context, + ur_device_handle_t Device, + const ur_usm_desc_t *Properties, + ur_usm_pool_handle_t Pool, + size_t Size, void **ResultPtr) { + + auto ContextInfo = getContextInfo(Context); + std::shared_ptr DeviceInfo = + Device ? getDeviceInfo(Device) : nullptr; + + void *Allocated = nullptr; + + UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( + Context, Device, Properties, Pool, Size, &Allocated)); + + *ResultPtr = Allocated; + + auto AI = + std::make_shared(MsanAllocInfo{(uptr)Allocated, + Size, + false, + Context, + Device, + GetCurrentBacktrace(), + {}}); + + AI->print(); + + // For updating shadow memory + ContextInfo->insertAllocInfo({Device}, AI); + + // For memory release + { + std::scoped_lock Guard(m_AllocationMapMutex); + m_AllocationMap.emplace(AI->AllocBegin, std::move(AI)); + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, + ur_queue_handle_t Queue, + USMLaunchInfo &LaunchInfo) { + auto Context = GetContext(Queue); + auto Device = GetDevice(Queue); + auto ContextInfo = getContextInfo(Context); + auto DeviceInfo = getDeviceInfo(Device); + + ManagedQueue InternalQueue(Context, Device); + if (!InternalQueue) { + getContext()->logger.error("Failed to create internal queue"); + return UR_RESULT_ERROR_INVALID_QUEUE; + } + + UR_CALL(prepareLaunch(DeviceInfo, InternalQueue, Kernel, LaunchInfo)); + + UR_CALL(updateShadowMemory(ContextInfo, DeviceInfo, InternalQueue)); + + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, + ur_queue_handle_t Queue, + USMLaunchInfo &LaunchInfo) { + // FIXME: We must use block operation here, until we support urEventSetCallback + auto Result = getContext()->urDdiTable.Queue.pfnFinish(Queue); + + if (Result == UR_RESULT_SUCCESS) { + const auto &Report = LaunchInfo.Data->Report; + + if (!Report.Flag) { + return Result; + } + + ReportUsesUninitializedValue(LaunchInfo.Data->Report, Kernel); + + exitWithErrors(); + } + + return Result; +} + +ur_result_t +MsanInterceptor::enqueueAllocInfo(std::shared_ptr &DeviceInfo, + ur_queue_handle_t Queue, + std::shared_ptr &AI) { + return DeviceInfo->Shadow->EnqueuePoisonShadow(Queue, AI->AllocBegin, + AI->AllocSize, 0xff); +} + +ur_result_t +MsanInterceptor::updateShadowMemory(std::shared_ptr &ContextInfo, + std::shared_ptr &DeviceInfo, + ur_queue_handle_t Queue) { + auto &AllocInfos = ContextInfo->AllocInfosMap[DeviceInfo->Handle]; + std::scoped_lock Guard(AllocInfos.Mutex); + + for (auto &AI : AllocInfos.List) { + UR_CALL(enqueueAllocInfo(DeviceInfo, Queue, AI)); + } + AllocInfos.List.clear(); + + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanInterceptor::registerProgram(ur_program_handle_t Program) { + ur_result_t Result = UR_RESULT_SUCCESS; + + getContext()->logger.info("registerSpirKernels"); + Result = registerSpirKernels(Program); + if (Result != UR_RESULT_SUCCESS) { + return Result; + } + + return Result; +} + +ur_result_t MsanInterceptor::unregisterProgram(ur_program_handle_t) { + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanInterceptor::registerSpirKernels(ur_program_handle_t Program) { + auto Context = GetContext(Program); + std::vector Devices = GetDevices(Program); + + for (auto Device : Devices) { + size_t MetadataSize; + void *MetadataPtr; + ur_result_t Result = + getContext()->urDdiTable.Program.pfnGetGlobalVariablePointer( + Device, Program, kSPIR_MsanSpirKernelMetadata, &MetadataSize, + &MetadataPtr); + if (Result != UR_RESULT_SUCCESS) { + getContext()->logger.error( + "Can't get the pointer of <{}> under device {}: {}", + kSPIR_MsanSpirKernelMetadata, (void *)Device, Result); + return Result; + } + + const uint64_t NumOfSpirKernel = MetadataSize / sizeof(SpirKernelInfo); + assert((MetadataSize % sizeof(SpirKernelInfo) == 0) && + "SpirKernelMetadata size is not correct"); + + ManagedQueue Queue(Context, Device); + + std::vector SKInfo(NumOfSpirKernel); + Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, &SKInfo[0], MetadataPtr, + sizeof(SpirKernelInfo) * NumOfSpirKernel, 0, nullptr, nullptr); + if (Result != UR_RESULT_SUCCESS) { + getContext()->logger.error("Can't read the value of <{}>: {}", + kSPIR_MsanSpirKernelMetadata, Result); + return Result; + } + + auto PI = getProgramInfo(Program); + for (const auto &SKI : SKInfo) { + if (SKI.Size == 0) { + continue; + } + std::vector KernelNameV(SKI.Size); + Result = getContext()->urDdiTable.Enqueue.pfnUSMMemcpy( + Queue, true, KernelNameV.data(), (void *)SKI.KernelName, + sizeof(char) * SKI.Size, 0, nullptr, nullptr); + if (Result != UR_RESULT_SUCCESS) { + getContext()->logger.error("Can't read kernel name: {}", + Result); + return Result; + } + + std::string KernelName = + std::string(KernelNameV.begin(), KernelNameV.end()); + + getContext()->logger.info( + "SpirKernel(name='{}', isInstrumented={})", KernelName, true); + + PI->InstrumentedKernels.insert(KernelName); + } + getContext()->logger.info("Number of sanitized kernel: {}", + PI->InstrumentedKernels.size()); + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanInterceptor::insertContext(ur_context_handle_t Context, + std::shared_ptr &CI) { + std::scoped_lock Guard(m_ContextMapMutex); + + if (m_ContextMap.find(Context) != m_ContextMap.end()) { + CI = m_ContextMap.at(Context); + return UR_RESULT_SUCCESS; + } + + CI = std::make_shared(Context); + + // Don't move CI, since it's a return value as well + m_ContextMap.emplace(Context, CI); + + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanInterceptor::eraseContext(ur_context_handle_t Context) { + std::scoped_lock Guard(m_ContextMapMutex); + assert(m_ContextMap.find(Context) != m_ContextMap.end()); + m_ContextMap.erase(Context); + // TODO: Remove devices in each context + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanInterceptor::insertDevice(ur_device_handle_t Device, + std::shared_ptr &DI) { + std::scoped_lock Guard(m_DeviceMapMutex); + + if (m_DeviceMap.find(Device) != m_DeviceMap.end()) { + DI = m_DeviceMap.at(Device); + return UR_RESULT_SUCCESS; + } + + DI = std::make_shared(Device); + + DI->IsSupportSharedSystemUSM = GetDeviceUSMCapability( + Device, UR_DEVICE_INFO_USM_SYSTEM_SHARED_SUPPORT); + + // Query alignment + UR_CALL(getContext()->urDdiTable.Device.pfnGetInfo( + Device, UR_DEVICE_INFO_MEM_BASE_ADDR_ALIGN, sizeof(DI->Alignment), + &DI->Alignment, nullptr)); + + // Don't move DI, since it's a return value as well + m_DeviceMap.emplace(Device, DI); + + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanInterceptor::eraseDevice(ur_device_handle_t Device) { + std::scoped_lock Guard(m_DeviceMapMutex); + assert(m_DeviceMap.find(Device) != m_DeviceMap.end()); + m_DeviceMap.erase(Device); + // TODO: Remove devices in each context + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanInterceptor::insertProgram(ur_program_handle_t Program) { + std::scoped_lock Guard(m_ProgramMapMutex); + if (m_ProgramMap.find(Program) != m_ProgramMap.end()) { + return UR_RESULT_SUCCESS; + } + m_ProgramMap.emplace(Program, std::make_shared(Program)); + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanInterceptor::eraseProgram(ur_program_handle_t Program) { + std::scoped_lock Guard(m_ProgramMapMutex); + assert(m_ProgramMap.find(Program) != m_ProgramMap.end()); + m_ProgramMap.erase(Program); + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanInterceptor::insertKernel(ur_kernel_handle_t Kernel) { + std::scoped_lock Guard(m_KernelMapMutex); + if (m_KernelMap.find(Kernel) != m_KernelMap.end()) { + return UR_RESULT_SUCCESS; + } + m_KernelMap.emplace(Kernel, std::make_shared(Kernel)); + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanInterceptor::eraseKernel(ur_kernel_handle_t Kernel) { + std::scoped_lock Guard(m_KernelMapMutex); + assert(m_KernelMap.find(Kernel) != m_KernelMap.end()); + m_KernelMap.erase(Kernel); + return UR_RESULT_SUCCESS; +} + +ur_result_t +MsanInterceptor::insertMemBuffer(std::shared_ptr MemBuffer) { + std::scoped_lock Guard(m_MemBufferMapMutex); + assert(m_MemBufferMap.find(ur_cast(MemBuffer.get())) == + m_MemBufferMap.end()); + m_MemBufferMap.emplace(reinterpret_cast(MemBuffer.get()), + MemBuffer); + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanInterceptor::eraseMemBuffer(ur_mem_handle_t MemHandle) { + std::scoped_lock Guard(m_MemBufferMapMutex); + assert(m_MemBufferMap.find(MemHandle) != m_MemBufferMap.end()); + m_MemBufferMap.erase(MemHandle); + return UR_RESULT_SUCCESS; +} + +std::shared_ptr +MsanInterceptor::getMemBuffer(ur_mem_handle_t MemHandle) { + std::shared_lock Guard(m_MemBufferMapMutex); + if (m_MemBufferMap.find(MemHandle) != m_MemBufferMap.end()) { + return m_MemBufferMap[MemHandle]; + } + return nullptr; +} + +ur_result_t MsanInterceptor::prepareLaunch( + std::shared_ptr &DeviceInfo, ur_queue_handle_t Queue, + ur_kernel_handle_t Kernel, USMLaunchInfo &LaunchInfo) { + auto Program = GetProgram(Kernel); + + auto EnqueueWriteGlobal = + [&Queue, &Program](const char *Name, const void *Value, size_t Size) { + auto Result = + getContext()->urDdiTable.Enqueue.pfnDeviceGlobalVariableWrite( + Queue, Program, Name, false, Size, 0, Value, 0, nullptr, + nullptr); + if (Result != UR_RESULT_SUCCESS) { + getContext()->logger.error( + "Failed to write device global \"{}\": {}", Name, Result); + return Result; + } + return UR_RESULT_SUCCESS; + }; + + // Set membuffer arguments + auto KernelInfo = getKernelInfo(Kernel); + assert(KernelInfo && "Kernel must be instrumented"); + + for (const auto &[ArgIndex, MemBuffer] : KernelInfo->BufferArgs) { + char *ArgPointer = nullptr; + UR_CALL(MemBuffer->getHandle(DeviceInfo->Handle, ArgPointer)); + ur_result_t URes = getContext()->urDdiTable.Kernel.pfnSetArgPointer( + Kernel, ArgIndex, nullptr, ArgPointer); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error( + "Failed to set buffer {} as the {} arg to kernel {}: {}", + ur_cast(MemBuffer.get()), ArgIndex, Kernel, + URes); + } + } + + // Set LaunchInfo + LaunchInfo.Data->GlobalShadowOffset = DeviceInfo->Shadow->ShadowBegin; + LaunchInfo.Data->GlobalShadowOffsetEnd = DeviceInfo->Shadow->ShadowEnd; + LaunchInfo.Data->DeviceTy = DeviceInfo->Type; + LaunchInfo.Data->Debug = getOptions().Debug ? 1 : 0; + + getContext()->logger.info( + "launch_info {} (GlobalShadow={}, Device={}, Debug={})", + (void *)LaunchInfo.Data, LaunchInfo.Data->GlobalShadowOffset, + ToString(LaunchInfo.Data->DeviceTy), LaunchInfo.Data->Debug); + + UR_CALL( + EnqueueWriteGlobal("__MsanLaunchInfo", &LaunchInfo.Data, sizeof(uptr))); + + return UR_RESULT_SUCCESS; +} + +std::optional +MsanInterceptor::findAllocInfoByAddress(uptr Address) { + std::shared_lock Guard(m_AllocationMapMutex); + auto It = m_AllocationMap.upper_bound(Address); + if (It == m_AllocationMap.begin()) { + return std::optional{}; + } + --It; + // Make sure we got the right MsanAllocInfo + assert(Address >= It->second->AllocBegin && + Address < It->second->AllocBegin + It->second->AllocSize && + "Wrong MsanAllocInfo for the address"); + return It; +} + +std::vector +MsanInterceptor::findAllocInfoByContext(ur_context_handle_t Context) { + std::shared_lock Guard(m_AllocationMapMutex); + std::vector AllocInfos; + for (auto It = m_AllocationMap.begin(); It != m_AllocationMap.end(); It++) { + const auto &[_, AI] = *It; + if (AI->Context == Context) { + AllocInfos.emplace_back(It); + } + } + return AllocInfos; +} + +ur_result_t DeviceInfo::allocShadowMemory(ur_context_handle_t Context) { + Shadow = GetMsanShadowMemory(Context, Handle, Type); + assert(Shadow && "Failed to get shadow memory"); + UR_CALL(Shadow->Setup()); + getContext()->logger.info("ShadowMemory(Global): {} - {}", + (void *)Shadow->ShadowBegin, + (void *)Shadow->ShadowEnd); + return UR_RESULT_SUCCESS; +} + +bool ProgramInfo::isKernelInstrumented(ur_kernel_handle_t Kernel) const { + const auto Name = GetKernelName(Kernel); + return InstrumentedKernels.find(Name) != InstrumentedKernels.end(); +} + +ContextInfo::~ContextInfo() { + [[maybe_unused]] auto Result = + getContext()->urDdiTable.Context.pfnRelease(Handle); + assert(Result == UR_RESULT_SUCCESS); +} + +ur_result_t USMLaunchInfo::initialize() { + UR_CALL(getContext()->urDdiTable.Context.pfnRetain(Context)); + UR_CALL(getContext()->urDdiTable.Device.pfnRetain(Device)); + UR_CALL(getContext()->urDdiTable.USM.pfnSharedAlloc( + Context, Device, nullptr, nullptr, sizeof(MsanLaunchInfo), + (void **)&Data)); + *Data = MsanLaunchInfo{}; + return UR_RESULT_SUCCESS; +} + +USMLaunchInfo::~USMLaunchInfo() { + [[maybe_unused]] ur_result_t Result; + if (Data) { + Result = getContext()->urDdiTable.USM.pfnFree(Context, (void *)Data); + assert(Result == UR_RESULT_SUCCESS); + } + Result = getContext()->urDdiTable.Context.pfnRelease(Context); + assert(Result == UR_RESULT_SUCCESS); + Result = getContext()->urDdiTable.Device.pfnRelease(Device); + assert(Result == UR_RESULT_SUCCESS); +} + +} // namespace msan + +using namespace msan; + +static MsanInterceptor *interceptor; + +MsanInterceptor *getMsanInterceptor() { return interceptor; } + +void initMsanInterceptor() { + if (interceptor) { + return; + } + interceptor = new MsanInterceptor(); +} + +void destroyMsanInterceptor() { + delete interceptor; + interceptor = nullptr; +} + +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/msan/msan_interceptor.hpp b/source/loader/layers/sanitizer/msan/msan_interceptor.hpp new file mode 100644 index 0000000000..80dbf389a4 --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_interceptor.hpp @@ -0,0 +1,323 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_interceptor.hpp + * + */ + +#pragma once + +#include "msan_allocator.hpp" +#include "msan_buffer.hpp" +#include "msan_libdevice.hpp" +#include "msan_options.hpp" +#include "msan_shadow.hpp" +#include "sanitizer_common/sanitizer_common.hpp" +#include "ur_sanitizer_layer.hpp" + +#include +#include +#include +#include +#include + +namespace ur_sanitizer_layer { +namespace msan { + +struct AllocInfoList { + std::vector> List; + ur_shared_mutex Mutex; +}; + +struct DeviceInfo { + ur_device_handle_t Handle; + + DeviceType Type = DeviceType::UNKNOWN; + size_t Alignment = 0; + std::shared_ptr Shadow; + + // Device features + bool IsSupportSharedSystemUSM = false; + + // Device handles are special and alive in the whole process lifetime, + // so we needn't retain&release here. + explicit DeviceInfo(ur_device_handle_t Device) : Handle(Device) {} + + ur_result_t allocShadowMemory(ur_context_handle_t Context); +}; + +struct QueueInfo { + ur_queue_handle_t Handle; + + // lock this mutex if following fields are accessed + ur_shared_mutex Mutex; + ur_event_handle_t LastEvent; + + explicit QueueInfo(ur_queue_handle_t Queue) + : Handle(Queue), LastEvent(nullptr) { + [[maybe_unused]] auto Result = + getContext()->urDdiTable.Queue.pfnRetain(Queue); + assert(Result == UR_RESULT_SUCCESS); + } + + ~QueueInfo() { + [[maybe_unused]] auto Result = + getContext()->urDdiTable.Queue.pfnRelease(Handle); + assert(Result == UR_RESULT_SUCCESS); + } +}; + +struct KernelInfo { + ur_kernel_handle_t Handle; + std::atomic RefCount = 1; + + // lock this mutex if following fields are accessed + ur_shared_mutex Mutex; + std::unordered_map> BufferArgs; + + explicit KernelInfo(ur_kernel_handle_t Kernel) : Handle(Kernel) { + [[maybe_unused]] auto Result = + getContext()->urDdiTable.Kernel.pfnRetain(Kernel); + assert(Result == UR_RESULT_SUCCESS); + } + + ~KernelInfo() { + [[maybe_unused]] auto Result = + getContext()->urDdiTable.Kernel.pfnRelease(Handle); + assert(Result == UR_RESULT_SUCCESS); + } +}; + +struct ProgramInfo { + ur_program_handle_t Handle; + std::atomic RefCount = 1; + + // Program is built only once, so we don't need to lock it + std::unordered_set InstrumentedKernels; + + explicit ProgramInfo(ur_program_handle_t Program) : Handle(Program) { + [[maybe_unused]] auto Result = + getContext()->urDdiTable.Program.pfnRetain(Handle); + assert(Result == UR_RESULT_SUCCESS); + } + + ~ProgramInfo() { + [[maybe_unused]] auto Result = + getContext()->urDdiTable.Program.pfnRelease(Handle); + assert(Result == UR_RESULT_SUCCESS); + } + + bool isKernelInstrumented(ur_kernel_handle_t Kernel) const; +}; + +struct ContextInfo { + ur_context_handle_t Handle; + std::atomic RefCount = 1; + + std::vector DeviceList; + std::unordered_map AllocInfosMap; + + explicit ContextInfo(ur_context_handle_t Context) : Handle(Context) { + [[maybe_unused]] auto Result = + getContext()->urDdiTable.Context.pfnRetain(Context); + assert(Result == UR_RESULT_SUCCESS); + } + + ~ContextInfo(); + + void insertAllocInfo(const std::vector &Devices, + std::shared_ptr &AI) { + for (auto Device : Devices) { + auto &AllocInfos = AllocInfosMap[Device]; + std::scoped_lock Guard(AllocInfos.Mutex); + AllocInfos.List.emplace_back(AI); + } + } +}; + +struct USMLaunchInfo { + MsanLaunchInfo *Data = nullptr; + + ur_context_handle_t Context = nullptr; + ur_device_handle_t Device = nullptr; + const size_t *GlobalWorkSize = nullptr; + const size_t *GlobalWorkOffset = nullptr; + std::vector LocalWorkSize; + uint32_t WorkDim = 0; + + USMLaunchInfo(ur_context_handle_t Context, ur_device_handle_t Device, + const size_t *GlobalWorkSize, const size_t *LocalWorkSize, + const size_t *GlobalWorkOffset, uint32_t WorkDim) + : Context(Context), Device(Device), GlobalWorkSize(GlobalWorkSize), + GlobalWorkOffset(GlobalWorkOffset), WorkDim(WorkDim) { + if (LocalWorkSize) { + this->LocalWorkSize = + std::vector(LocalWorkSize, LocalWorkSize + WorkDim); + } + } + ~USMLaunchInfo(); + + ur_result_t initialize(); +}; + +struct SpirKernelInfo { + uptr KernelName; + uptr Size; +}; + +class MsanInterceptor { + public: + explicit MsanInterceptor(); + + ~MsanInterceptor(); + + ur_result_t allocateMemory(ur_context_handle_t Context, + ur_device_handle_t Device, + const ur_usm_desc_t *Properties, + ur_usm_pool_handle_t Pool, size_t Size, + void **ResultPtr); + + ur_result_t registerProgram(ur_program_handle_t Program); + ur_result_t unregisterProgram(ur_program_handle_t Program); + + ur_result_t preLaunchKernel(ur_kernel_handle_t Kernel, + ur_queue_handle_t Queue, + msan::USMLaunchInfo &LaunchInfo); + ur_result_t postLaunchKernel(ur_kernel_handle_t Kernel, + ur_queue_handle_t Queue, + msan::USMLaunchInfo &LaunchInfo); + + ur_result_t insertContext(ur_context_handle_t Context, + std::shared_ptr &CI); + ur_result_t eraseContext(ur_context_handle_t Context); + + ur_result_t insertDevice(ur_device_handle_t Device, + std::shared_ptr &CI); + ur_result_t eraseDevice(ur_device_handle_t Device); + + ur_result_t insertProgram(ur_program_handle_t Program); + ur_result_t eraseProgram(ur_program_handle_t Program); + + ur_result_t insertKernel(ur_kernel_handle_t Kernel); + ur_result_t eraseKernel(ur_kernel_handle_t Kernel); + + ur_result_t insertMemBuffer(std::shared_ptr MemBuffer); + ur_result_t eraseMemBuffer(ur_mem_handle_t MemHandle); + std::shared_ptr getMemBuffer(ur_mem_handle_t MemHandle); + + ur_result_t holdAdapter(ur_adapter_handle_t Adapter) { + std::scoped_lock Guard(m_AdaptersMutex); + if (m_Adapters.find(Adapter) != m_Adapters.end()) { + return UR_RESULT_SUCCESS; + } + UR_CALL(getContext()->urDdiTable.Global.pfnAdapterRetain(Adapter)); + m_Adapters.insert(Adapter); + return UR_RESULT_SUCCESS; + } + + std::optional findAllocInfoByAddress(uptr Address); + + std::vector + findAllocInfoByContext(ur_context_handle_t Context); + + std::shared_ptr + getContextInfo(ur_context_handle_t Context) { + std::shared_lock Guard(m_ContextMapMutex); + assert(m_ContextMap.find(Context) != m_ContextMap.end()); + return m_ContextMap[Context]; + } + + std::shared_ptr getDeviceInfo(ur_device_handle_t Device) { + std::shared_lock Guard(m_DeviceMapMutex); + assert(m_DeviceMap.find(Device) != m_DeviceMap.end()); + return m_DeviceMap[Device]; + } + + std::shared_ptr + getProgramInfo(ur_program_handle_t Program) { + std::shared_lock Guard(m_ProgramMapMutex); + assert(m_ProgramMap.find(Program) != m_ProgramMap.end()); + return m_ProgramMap[Program]; + } + + std::shared_ptr getKernelInfo(ur_kernel_handle_t Kernel) { + std::shared_lock Guard(m_KernelMapMutex); + if (m_KernelMap.find(Kernel) != m_KernelMap.end()) { + return m_KernelMap[Kernel]; + } + return nullptr; + } + + const MsanOptions &getOptions() { return m_Options; } + + void exitWithErrors() { + m_NormalExit = false; + exit(1); + } + + bool isNormalExit() { return m_NormalExit; } + + private: + ur_result_t + updateShadowMemory(std::shared_ptr &ContextInfo, + std::shared_ptr &DeviceInfo, + ur_queue_handle_t Queue); + + ur_result_t enqueueAllocInfo(std::shared_ptr &DeviceInfo, + ur_queue_handle_t Queue, + std::shared_ptr &AI); + + /// Initialize Global Variables & Kernel Name at first Launch + ur_result_t prepareLaunch(std::shared_ptr &DeviceInfo, + ur_queue_handle_t Queue, + ur_kernel_handle_t Kernel, + msan::USMLaunchInfo &LaunchInfo); + + ur_result_t + allocShadowMemory(ur_context_handle_t Context, + std::shared_ptr &DeviceInfo); + + ur_result_t registerSpirKernels(ur_program_handle_t Program); + + private: + std::unordered_map> + m_ContextMap; + ur_shared_mutex m_ContextMapMutex; + std::unordered_map> + m_DeviceMap; + ur_shared_mutex m_DeviceMapMutex; + + std::unordered_map> + m_ProgramMap; + ur_shared_mutex m_ProgramMapMutex; + + std::unordered_map> + m_KernelMap; + ur_shared_mutex m_KernelMapMutex; + + std::unordered_map> + m_MemBufferMap; + ur_shared_mutex m_MemBufferMapMutex; + + /// Assumption: all USM chunks are allocated in one VA + MsanAllocationMap m_AllocationMap; + ur_shared_mutex m_AllocationMapMutex; + + MsanOptions m_Options; + + std::unordered_set m_Adapters; + ur_shared_mutex m_AdaptersMutex; + + bool m_NormalExit = true; +}; + +} // namespace msan + +msan::MsanInterceptor *getMsanInterceptor(); + +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/msan/msan_libdevice.hpp b/source/loader/layers/sanitizer/msan/msan_libdevice.hpp new file mode 100644 index 0000000000..cd05cfa38c --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_libdevice.hpp @@ -0,0 +1,66 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_libdevice.hpp + * + */ + +#pragma once + +#include "sanitizer_common/sanitizer_libdevice.hpp" + +#if !defined(__SPIR__) && !defined(__SPIRV__) +namespace ur_sanitizer_layer { +#endif // !__SPIR__ && !__SPIRV__ + +struct MsanErrorReport { + int Flag = 0; + + char File[256 + 1] = {}; + char Func[256 + 1] = {}; + + int32_t Line = 0; + + uint64_t GID0 = 0; + uint64_t GID1 = 0; + uint64_t GID2 = 0; + + uint64_t LID0 = 0; + uint64_t LID1 = 0; + uint64_t LID2 = 0; + + uint32_t AccessSize = 0; + ErrorType ErrorTy = ErrorType::UNKNOWN; +}; + +struct MsanLocalArgsInfo { + uint64_t Size = 0; + uint64_t SizeWithRedZone = 0; +}; + +struct MsanLaunchInfo { + uintptr_t GlobalShadowOffset = 0; + uintptr_t GlobalShadowOffsetEnd = 0; + + DeviceType DeviceTy = DeviceType::UNKNOWN; + uint32_t Debug = 0; + uint32_t IsRecover = 0; + + MsanErrorReport Report; +}; + +// Based on the observation, only the last 24 bits of the address of the private +// variable have changed +constexpr std::size_t MSAN_PRIVATE_SIZE = 0xffffffULL + 1; + +constexpr auto kSPIR_MsanDeviceGlobalMetadata = "__MsanDeviceGlobalMetadata"; +constexpr auto kSPIR_MsanSpirKernelMetadata = "__MsanKernelMetadata"; + +#if !defined(__SPIR__) && !defined(__SPIRV__) +} // namespace ur_sanitizer_layer +#endif // !__SPIR__ && !__SPIRV__ diff --git a/source/loader/layers/sanitizer/msan/msan_options.cpp b/source/loader/layers/sanitizer/msan/msan_options.cpp new file mode 100644 index 0000000000..b6ab9484da --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_options.cpp @@ -0,0 +1,90 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_options.cpp + * + */ + +#include "msan_options.hpp" + +#include "ur/ur.hpp" +#include "ur_sanitizer_layer.hpp" + +#include +#include +#include + +namespace ur_sanitizer_layer { +namespace msan { + +MsanOptions::MsanOptions() { + std::optional OptionsEnvMap; + try { + OptionsEnvMap = getenv_to_map("UR_LAYER_MSAN_OPTIONS"); + } catch (const std::invalid_argument &e) { + std::stringstream SS; + SS << "[ERROR]: "; + SS << e.what(); + getContext()->logger.always(SS.str().c_str()); + die("Sanitizer failed to parse options.\n"); + } + + if (!OptionsEnvMap.has_value()) { + return; + } + + const char *TrueStrings[] = {"1", "true"}; + const char *FalseStrings[] = {"0", "false"}; + + auto InplaceToLower = [](std::string &S) { + std::transform(S.begin(), S.end(), S.begin(), + [](unsigned char C) { return std::tolower(C); }); + }; + auto IsTrue = [&](const std::string &S) { + return std::any_of(std::begin(TrueStrings), std::end(TrueStrings), + [&](const char *CS) { return S == CS; }); + }; + auto IsFalse = [&](const std::string &S) { + return std::any_of(std::begin(FalseStrings), std::end(FalseStrings), + [&](const char *CS) { return S == CS; }); + }; + + auto SetBoolOption = [&](const std::string &Name, bool &Opt) { + auto KV = OptionsEnvMap->find(Name); + if (KV != OptionsEnvMap->end()) { + auto Value = KV->second.front(); + InplaceToLower(Value); + if (IsTrue(Value)) { + Opt = true; + } else if (IsFalse(Value)) { + Opt = false; + } else { + std::stringstream SS; + SS << "\"" << Name << "\" is set to \"" << Value + << "\", which is not an valid setting. "; + SS << "Acceptable input are: for enable, use:"; + for (auto &S : TrueStrings) { + SS << " \"" << S << "\""; + } + SS << "; "; + SS << "for disable, use:"; + for (auto &S : FalseStrings) { + SS << " \"" << S << "\""; + } + SS << "."; + getContext()->logger.error(SS.str().c_str()); + die("Sanitizer failed to parse options.\n"); + } + } + }; + + SetBoolOption("debug", Debug); +} + +} // namespace msan +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/msan/msan_options.hpp b/source/loader/layers/sanitizer/msan/msan_options.hpp new file mode 100644 index 0000000000..94b1e2c31e --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_options.hpp @@ -0,0 +1,27 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_options.hpp + * + */ + +#pragma once + +#include + +namespace ur_sanitizer_layer { +namespace msan { + +struct MsanOptions { + bool Debug = false; + + explicit MsanOptions(); +}; + +} // namespace msan +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/msan/msan_report.cpp b/source/loader/layers/sanitizer/msan/msan_report.cpp new file mode 100644 index 0000000000..c6f33a4c93 --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_report.cpp @@ -0,0 +1,43 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_report.cpp + * + */ + +#include "msan_report.hpp" +#include "msan_libdevice.hpp" + +#include "sanitizer_common/sanitizer_common.hpp" +#include "sanitizer_common/sanitizer_utils.hpp" +#include "ur_sanitizer_layer.hpp" + +namespace ur_sanitizer_layer { +namespace msan { + +void ReportUsesUninitializedValue(const MsanErrorReport &Report, + ur_kernel_handle_t Kernel) { + const char *File = Report.File[0] ? Report.File : ""; + const char *Func = Report.Func[0] ? Report.Func : ""; + auto KernelName = GetKernelName(Kernel); + + // Try to demangle the kernel name + KernelName = DemangleName(KernelName); + + getContext()->logger.always( + "====WARNING: DeviceSanitizer: use-of-uninitialized-value"); + getContext()->logger.always( + "use of size {} at kernel <{}> LID({}, {}, {}) GID({}, " + "{}, {})", + Report.AccessSize, KernelName.c_str(), Report.LID0, Report.LID1, + Report.LID2, Report.GID0, Report.GID1, Report.GID2); + getContext()->logger.always(" #0 {} {}:{}", Func, File, Report.Line); +} + +} // namespace msan +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/msan/msan_report.hpp b/source/loader/layers/sanitizer/msan/msan_report.hpp new file mode 100644 index 0000000000..a8d7ac2324 --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_report.hpp @@ -0,0 +1,27 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_report.hpp + * + */ + +#pragma once + +#include "ur_api.h" + +namespace ur_sanitizer_layer { + +struct MsanErrorReport; + +namespace msan { + +void ReportUsesUninitializedValue(const MsanErrorReport &Report, + ur_kernel_handle_t Kernel); + +} // namespace msan +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.cpp b/source/loader/layers/sanitizer/msan/msan_shadow.cpp new file mode 100644 index 0000000000..add9813db6 --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_shadow.cpp @@ -0,0 +1,291 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_shadow.cpp + * + */ + +#include "msan_shadow.hpp" +#include "msan_interceptor.hpp" +#include "sanitizer_common/sanitizer_utils.hpp" +#include "ur_api.h" +#include "ur_sanitizer_layer.hpp" + +namespace ur_sanitizer_layer { +namespace msan { + +#define CPU_SHADOW1_BEGIN 0x010000000000ULL +#define CPU_SHADOW1_END 0x100000000000ULL +#define CPU_SHADOW2_BEGIN 0x200000000000ULL +#define CPU_SHADOW2_END 0x300000000000ULL +#define CPU_SHADOW3_BEGIN 0x500000000000ULL +#define CPU_SHADOW3_END 0x510000000000ULL + +#define CPU_SHADOW_MASK 0x500000000000ULL + +std::shared_ptr +GetMsanShadowMemory(ur_context_handle_t Context, ur_device_handle_t Device, + DeviceType Type) { + if (Type == DeviceType::CPU) { + static std::shared_ptr ShadowCPU = + std::make_shared(Context, Device); + return ShadowCPU; + } else if (Type == DeviceType::GPU_PVC) { + static std::shared_ptr ShadowPVC = + std::make_shared(Context, Device); + return ShadowPVC; + } else if (Type == DeviceType::GPU_DG2) { + static std::shared_ptr ShadowDG2 = + std::make_shared(Context, Device); + return ShadowDG2; + } else { + getContext()->logger.error("Unsupport device type"); + return nullptr; + } +} + +ur_result_t MsanShadowMemoryCPU::Setup() { + static ur_result_t Result = [this]() { + if (MmapFixedNoReserve(CPU_SHADOW1_BEGIN, + CPU_SHADOW1_END - CPU_SHADOW1_BEGIN) == 0) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + if (ProtectMemoryRange(CPU_SHADOW1_END, + CPU_SHADOW2_BEGIN - CPU_SHADOW1_END) == 0) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + if (MmapFixedNoReserve(CPU_SHADOW2_BEGIN, + CPU_SHADOW2_END - CPU_SHADOW2_BEGIN) == 0) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + if (ProtectMemoryRange(CPU_SHADOW2_END, + CPU_SHADOW3_BEGIN - CPU_SHADOW2_END) == 0) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + if (MmapFixedNoReserve(CPU_SHADOW3_BEGIN, + CPU_SHADOW3_END - CPU_SHADOW3_BEGIN) == 0) { + return UR_RESULT_ERROR_OUT_OF_HOST_MEMORY; + } + ShadowBegin = CPU_SHADOW1_BEGIN; + ShadowEnd = CPU_SHADOW3_END; + DontCoredumpRange(ShadowBegin, ShadowEnd - ShadowBegin); + return UR_RESULT_SUCCESS; + }(); + return Result; +} + +ur_result_t MsanShadowMemoryCPU::Destory() { + if (ShadowBegin == 0 && ShadowEnd == 0) { + return UR_RESULT_SUCCESS; + } + static ur_result_t Result = [this]() { + if (!Munmap(CPU_SHADOW1_BEGIN, CPU_SHADOW1_END - CPU_SHADOW1_BEGIN)) { + return UR_RESULT_ERROR_UNKNOWN; + } + if (!Munmap(CPU_SHADOW1_END, CPU_SHADOW2_BEGIN - CPU_SHADOW1_END)) { + return UR_RESULT_ERROR_UNKNOWN; + } + if (!Munmap(CPU_SHADOW2_BEGIN, CPU_SHADOW2_END - CPU_SHADOW2_BEGIN) == + 0) { + return UR_RESULT_ERROR_UNKNOWN; + } + if (!Munmap(CPU_SHADOW2_END, CPU_SHADOW3_BEGIN - CPU_SHADOW2_END)) { + return UR_RESULT_ERROR_UNKNOWN; + } + if (!Munmap(CPU_SHADOW3_BEGIN, CPU_SHADOW3_END - CPU_SHADOW3_BEGIN) == + 0) { + return UR_RESULT_ERROR_UNKNOWN; + } + ShadowBegin = ShadowEnd = 0; + return UR_RESULT_SUCCESS; + }(); + return Result; +} + +uptr MsanShadowMemoryCPU::MemToShadow(uptr Ptr) { + return Ptr ^ CPU_SHADOW_MASK; +} + +ur_result_t MsanShadowMemoryCPU::EnqueuePoisonShadow(ur_queue_handle_t, + uptr Ptr, uptr Size, + u8 Value) { + if (Size == 0) { + return UR_RESULT_SUCCESS; + } + + uptr ShadowBegin = MemToShadow(Ptr); + uptr ShadowEnd = MemToShadow(Ptr + Size - 1); + assert(ShadowBegin <= ShadowEnd); + getContext()->logger.debug( + "EnqueuePoisonShadow(addr={}, count={}, value={})", (void *)ShadowBegin, + ShadowEnd - ShadowBegin + 1, (void *)(size_t)Value); + memset((void *)ShadowBegin, Value, ShadowEnd - ShadowBegin + 1); + + return UR_RESULT_SUCCESS; +} + +ur_result_t MsanShadowMemoryGPU::Setup() { + // Currently, Level-Zero doesn't create independent VAs for each contexts, if we reserve + // shadow memory for each contexts, this will cause out-of-resource error when user uses + // multiple contexts. Therefore, we just create one shadow memory here. + static ur_result_t Result = [this]() { + size_t ShadowSize = GetShadowSize(); + // TODO: Protect Bad Zone + auto Result = getContext()->urDdiTable.VirtualMem.pfnReserve( + Context, nullptr, ShadowSize, (void **)&ShadowBegin); + if (Result == UR_RESULT_SUCCESS) { + ShadowEnd = ShadowBegin + ShadowSize; + // Retain the context which reserves shadow memory + getContext()->urDdiTable.Context.pfnRetain(Context); + } + + // Set shadow memory for null pointer + ManagedQueue Queue(Context, Device); + return UR_RESULT_SUCCESS; + }(); + return Result; +} + +ur_result_t MsanShadowMemoryGPU::Destory() { + if (ShadowBegin == 0) { + return UR_RESULT_SUCCESS; + } + static ur_result_t Result = [this]() { + auto Result = getContext()->urDdiTable.VirtualMem.pfnFree( + Context, (const void *)ShadowBegin, GetShadowSize()); + getContext()->urDdiTable.Context.pfnRelease(Context); + return Result; + }(); + return Result; +} + +ur_result_t MsanShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, + uptr Ptr, uptr Size, + u8 Value) { + if (Size == 0) { + return UR_RESULT_SUCCESS; + } + + uptr ShadowBegin = MemToShadow(Ptr); + uptr ShadowEnd = MemToShadow(Ptr + Size - 1); + assert(ShadowBegin <= ShadowEnd); + { + static const size_t PageSize = + GetVirtualMemGranularity(Context, Device); + + ur_physical_mem_properties_t Desc{ + UR_STRUCTURE_TYPE_PHYSICAL_MEM_PROPERTIES, nullptr, 0}; + + // Make sure [Ptr, Ptr + Size] is mapped to physical memory + for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize); + MappedPtr <= ShadowEnd; MappedPtr += PageSize) { + std::scoped_lock Guard(VirtualMemMapsMutex); + if (VirtualMemMaps.find(MappedPtr) == VirtualMemMaps.end()) { + ur_physical_mem_handle_t PhysicalMem{}; + auto URes = getContext()->urDdiTable.PhysicalMem.pfnCreate( + Context, Device, PageSize, &Desc, &PhysicalMem); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("urPhysicalMemCreate(): {}", + URes); + return URes; + } + + URes = getContext()->urDdiTable.VirtualMem.pfnMap( + Context, (void *)MappedPtr, PageSize, PhysicalMem, 0, + UR_VIRTUAL_MEM_ACCESS_FLAG_READ_WRITE); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("urVirtualMemMap({}, {}): {}", + (void *)MappedPtr, PageSize, + URes); + return URes; + } + + getContext()->logger.debug("urVirtualMemMap: {} ~ {}", + (void *)MappedPtr, + (void *)(MappedPtr + PageSize - 1)); + + // Initialize to zero + URes = EnqueueUSMBlockingSet(Queue, (void *)MappedPtr, 0, + PageSize); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("EnqueueUSMBlockingSet(): {}", + URes); + return URes; + } + + VirtualMemMaps[MappedPtr].first = PhysicalMem; + } + + // We don't need to record virtual memory map for null pointer, + // since it doesn't have an alloc info. + if (Ptr == 0) { + continue; + } + + auto AllocInfoIt = + getMsanInterceptor()->findAllocInfoByAddress(Ptr); + assert(AllocInfoIt); + VirtualMemMaps[MappedPtr].second.insert((*AllocInfoIt)->second); + } + } + + auto URes = EnqueueUSMBlockingSet(Queue, (void *)ShadowBegin, Value, + ShadowEnd - ShadowBegin + 1); + getContext()->logger.debug( + "EnqueuePoisonShadow (addr={}, count={}, value={}): {}", + (void *)ShadowBegin, ShadowEnd - ShadowBegin + 1, (void *)(size_t)Value, + URes); + if (URes != UR_RESULT_SUCCESS) { + getContext()->logger.error("EnqueueUSMBlockingSet(): {}", URes); + return URes; + } + + return UR_RESULT_SUCCESS; +} + +ur_result_t +MsanShadowMemoryGPU::ReleaseShadow(std::shared_ptr AI) { + uptr ShadowBegin = MemToShadow(AI->AllocBegin); + uptr ShadowEnd = MemToShadow(AI->AllocBegin + AI->AllocSize); + assert(ShadowBegin <= ShadowEnd); + + static const size_t PageSize = GetVirtualMemGranularity(Context, Device); + + for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize); + MappedPtr <= ShadowEnd; MappedPtr += PageSize) { + std::scoped_lock Guard(VirtualMemMapsMutex); + if (VirtualMemMaps.find(MappedPtr) == VirtualMemMaps.end()) { + continue; + } + VirtualMemMaps[MappedPtr].second.erase(AI); + if (VirtualMemMaps[MappedPtr].second.empty()) { + UR_CALL(getContext()->urDdiTable.VirtualMem.pfnUnmap( + Context, (void *)MappedPtr, PageSize)); + UR_CALL(getContext()->urDdiTable.PhysicalMem.pfnRelease( + VirtualMemMaps[MappedPtr].first)); + getContext()->logger.debug("urVirtualMemUnmap: {} ~ {}", + (void *)MappedPtr, + (void *)(MappedPtr + PageSize - 1)); + } + } + + return UR_RESULT_SUCCESS; +} + +uptr MsanShadowMemoryPVC::MemToShadow(uptr Ptr) { + assert(Ptr & 0xFF00000000000000ULL && "Ptr must be device USM"); + return ShadowBegin + (Ptr & 0x3FFF'FFFF'FFFFULL); +} + +uptr MsanShadowMemoryDG2::MemToShadow(uptr Ptr) { + assert(Ptr & 0xFFFF000000000000ULL && "Ptr must be device USM"); + return ShadowBegin + (Ptr & 0x3FFF'FFFF'FFFFULL); +} + +} // namespace msan +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/msan/msan_shadow.hpp b/source/loader/layers/sanitizer/msan/msan_shadow.hpp new file mode 100644 index 0000000000..de13683cbc --- /dev/null +++ b/source/loader/layers/sanitizer/msan/msan_shadow.hpp @@ -0,0 +1,144 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file msan_shadow.hpp + * + */ + +#pragma once + +#include "msan_allocator.hpp" +#include "sanitizer_common/sanitizer_libdevice.hpp" + +#include + +namespace ur_sanitizer_layer { +namespace msan { + +struct MsanShadowMemory { + MsanShadowMemory(ur_context_handle_t Context, ur_device_handle_t Device) + : Context(Context), Device(Device) {} + + virtual ~MsanShadowMemory() {} + + virtual ur_result_t Setup() = 0; + + virtual ur_result_t Destory() = 0; + + virtual uptr MemToShadow(uptr Ptr) = 0; + + virtual ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, + uptr Size, u8 Value) = 0; + + virtual ur_result_t ReleaseShadow(std::shared_ptr) { + return UR_RESULT_SUCCESS; + } + + ur_context_handle_t Context{}; + + ur_device_handle_t Device{}; + + uptr ShadowBegin = 0; + + uptr ShadowEnd = 0; +}; + +/// Shadow Memory layout of CPU device +/// +/// 0x000000000000 ~ 0x010000000000 "app-1" +/// 0x010000000000 ~ 0x100000000000 "shadow-2" +/// 0x100000000000 ~ 0x110000000000 "invalid" +/// 0x110000000000 ~ 0x200000000000 "origin-2" +/// 0x200000000000 ~ 0x300000000000 "shadow-3" +/// 0x300000000000 ~ 0x400000000000 "origin-3" +/// 0x400000000000 ~ 0x500000000000 "invalid" +/// 0x500000000000 ~ 0x510000000000 "shadow-1" +/// 0x510000000000 ~ 0x600000000000 "app-2" +/// 0x600000000000 ~ 0x610000000000 "origin-1" +/// 0x610000000000 ~ 0x700000000000 "invalid" +/// 0x700000000000 ~ 0x740000000000 "allocator" +/// 0x740000000000 ~ 0x800000000000 "app-3" +/// +struct MsanShadowMemoryCPU final : public MsanShadowMemory { + MsanShadowMemoryCPU(ur_context_handle_t Context, ur_device_handle_t Device) + : MsanShadowMemory(Context, Device) {} + + ur_result_t Setup() override; + + ur_result_t Destory() override; + + uptr MemToShadow(uptr Ptr) override; + + ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, + uptr Size, u8 Value) override; +}; + +struct MsanShadowMemoryGPU : public MsanShadowMemory { + MsanShadowMemoryGPU(ur_context_handle_t Context, ur_device_handle_t Device) + : MsanShadowMemory(Context, Device) {} + + ur_result_t Setup() override; + + ur_result_t Destory() override; + ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, + uptr Size, u8 Value) override final; + + ur_result_t ReleaseShadow(std::shared_ptr AI) override final; + + virtual size_t GetShadowSize() = 0; + + ur_mutex VirtualMemMapsMutex; + + std::unordered_map< + uptr, std::pair>>> + VirtualMemMaps; +}; + +/// Shadow Memory layout of GPU PVC device +/// +/// USM Allocation Range (56 bits) +/// Host USM : 0x0000_0000_0000_0000 ~ 0x00ff_ffff_ffff_ffff +/// Shared USM : 0x0000_0000_0000_0000 ~ 0x0000_7fff_ffff_ffff +/// DeviceĀ USM : 0xff00_0000_0000_0000 ~ 0xff00_ffff_ffff_ffff +/// +/// USM Allocation Range (AllocateHostAllocationsInHeapExtendedHost=0) +/// Host USM : 0x0000_0000_0000_0000 ~ 0x0000_7fff_ffff_ffff +/// Shared USM : 0x0000_0000_0000_0000 ~ 0x0000_7fff_ffff_ffff +/// DeviceĀ USM : 0xff00_0000_0000_0000 ~ 0xff00_ffff_ffff_ffff +/// +struct MsanShadowMemoryPVC final : public MsanShadowMemoryGPU { + MsanShadowMemoryPVC(ur_context_handle_t Context, ur_device_handle_t Device) + : MsanShadowMemoryGPU(Context, Device) {} + + uptr MemToShadow(uptr Ptr) override; + + size_t GetShadowSize() override { return 0x8000'0000'0000ULL; } +}; + +/// Shadow Memory layout of GPU DG2 device +/// +/// USM Allocation Range (48 bits) +/// Host/Shared USM : 0x0000_0000_0000_0000 ~ 0x0000_7fff_ffff_ffff +/// Device USM : 0xffff_8000_0000_0000 ~ 0xffff_ffff_ffff_ffff +/// +struct MsanShadowMemoryDG2 final : public MsanShadowMemoryGPU { + MsanShadowMemoryDG2(ur_context_handle_t Context, ur_device_handle_t Device) + : MsanShadowMemoryGPU(Context, Device) {} + + uptr MemToShadow(uptr Ptr) override; + + size_t GetShadowSize() override { return 0x4000'0000'0000ULL; } +}; + +std::shared_ptr +GetMsanShadowMemory(ur_context_handle_t Context, ur_device_handle_t Device, + DeviceType Type); + +} // namespace msan +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp b/source/loader/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp index 380482ff84..df64a72ed7 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/linux/sanitizer_utils.cpp @@ -26,6 +26,15 @@ namespace ur_sanitizer_layer { bool IsInASanContext() { return (void *)__asan_init != nullptr; } +uptr MmapFixedNoReserve(uptr Addr, uptr Size) { + Size = RoundUpTo(Size, EXEC_PAGESIZE); + Addr = RoundDownTo(Addr, EXEC_PAGESIZE); + void *P = + mmap((void *)Addr, Size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE | MAP_ANONYMOUS, -1, 0); + return (uptr)P; +} + uptr MmapNoReserve(uptr Addr, uptr Size) { Size = RoundUpTo(Size, EXEC_PAGESIZE); Addr = RoundDownTo(Addr, EXEC_PAGESIZE); @@ -36,6 +45,15 @@ uptr MmapNoReserve(uptr Addr, uptr Size) { bool Munmap(uptr Addr, uptr Size) { return munmap((void *)Addr, Size) == 0; } +uptr ProtectMemoryRange(uptr Addr, uptr Size) { + Size = RoundUpTo(Size, EXEC_PAGESIZE); + Addr = RoundDownTo(Addr, EXEC_PAGESIZE); + void *P = + mmap((void *)Addr, Size, PROT_NONE, + MAP_PRIVATE | MAP_FIXED | MAP_NORESERVE | MAP_ANONYMOUS, -1, 0); + return (uptr)P; +} + bool DontCoredumpRange(uptr Addr, uptr Size) { Size = RoundUpTo(Size, EXEC_PAGESIZE); Addr = RoundDownTo(Addr, EXEC_PAGESIZE); diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_common.hpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_common.hpp index 147bd23be3..c23dc15f7e 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_common.hpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_common.hpp @@ -139,8 +139,10 @@ struct SourceInfo { bool IsInASanContext(); +uptr MmapFixedNoReserve(uptr Addr, uptr Size); uptr MmapNoReserve(uptr Addr, uptr Size); bool Munmap(uptr Addr, uptr Size); +uptr ProtectMemoryRange(uptr Addr, uptr Size); bool DontCoredumpRange(uptr Addr, uptr Size); void *GetMemFunctionPointer(const char *); diff --git a/source/loader/layers/sanitizer/ur_sanddi.cpp b/source/loader/layers/sanitizer/ur_sanddi.cpp new file mode 100644 index 0000000000..59764645f9 --- /dev/null +++ b/source/loader/layers/sanitizer/ur_sanddi.cpp @@ -0,0 +1,54 @@ +/* + * + * Copyright (C) 2024 Intel Corporation + * + * Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. + * See LICENSE.TXT + * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + * + * @file ur_sanddi.cpp + * + */ + +#include "asan/asan_ddi.hpp" +#include "msan/msan_ddi.hpp" +#include "ur_sanitizer_layer.hpp" + +namespace ur_sanitizer_layer { + +ur_result_t context_t::init(ur_dditable_t *dditable, + const std::set &enabledLayerNames, + [[maybe_unused]] codeloc_data codelocData) { + bool asanEnabled = enabledLayerNames.count("UR_LAYER_ASAN"); + bool msanEnabled = enabledLayerNames.count("UR_LAYER_MSAN"); + + if (asanEnabled && msanEnabled) { + getContext()->logger.warning( + "Enabling ASAN and MSAN at the same time is not " + "supported."); + return UR_RESULT_SUCCESS; + } else if (asanEnabled) { + enabledType = SanitizerType::AddressSanitizer; + } else if (msanEnabled) { + enabledType = SanitizerType::MemorySanitizer; + } else { + return UR_RESULT_SUCCESS; + } + + urDdiTable = *dditable; + + switch (enabledType) { + case SanitizerType::AddressSanitizer: + initAsanInterceptor(); + return initAsanDDITable(dditable); + case SanitizerType::MemorySanitizer: + initMsanInterceptor(); + return initMsanDDITable(dditable); + default: + break; + } + + return UR_RESULT_SUCCESS; +} + +} // namespace ur_sanitizer_layer diff --git a/source/loader/layers/sanitizer/ur_sanitizer_layer.cpp b/source/loader/layers/sanitizer/ur_sanitizer_layer.cpp index d1e00c640c..d0a172f5e5 100644 --- a/source/loader/layers/sanitizer/ur_sanitizer_layer.cpp +++ b/source/loader/layers/sanitizer/ur_sanitizer_layer.cpp @@ -12,6 +12,7 @@ #include "ur_sanitizer_layer.hpp" #include "asan/asan_ddi.hpp" +#include "msan/msan_ddi.hpp" namespace ur_sanitizer_layer { context_t *getContext() { return context_t::get_direct(); } @@ -26,6 +27,9 @@ ur_result_t context_t::tearDown() { case SanitizerType::AddressSanitizer: destroyAsanInterceptor(); break; + case SanitizerType::MemorySanitizer: + destroyMsanInterceptor(); + break; default: break; }