From 29e5519535d5661d6b528b072ab8d2ba3c68323c Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Thu, 12 Sep 2024 13:56:19 +0100 Subject: [PATCH 1/6] Set default visibility to hidden in Rel. builds --- cmake/helpers.cmake | 1 + examples/collector/CMakeLists.txt | 4 ++-- include/ur_api.h | 10 +++++++++- scripts/core/common.yml | 6 ++++++ test/layers/tracing/CMakeLists.txt | 4 ++-- tools/urtrace/CMakeLists.txt | 4 ++-- 6 files changed, 22 insertions(+), 7 deletions(-) diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index 24cb6f8e54..6a5700da8b 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -70,6 +70,7 @@ function(add_ur_target_compile_options name) ) if (CMAKE_BUILD_TYPE STREQUAL "Release") target_compile_definitions(${name} PRIVATE -D_FORTIFY_SOURCE=2) + target_compile_options(${name} PRIVATE -fvisibility=hidden) endif() if(UR_DEVELOPER_MODE) target_compile_options(${name} PRIVATE diff --git a/examples/collector/CMakeLists.txt b/examples/collector/CMakeLists.txt index 5fe484d0b8..6dd112aae0 100644 --- a/examples/collector/CMakeLists.txt +++ b/examples/collector/CMakeLists.txt @@ -17,6 +17,6 @@ target_link_libraries(${TARGET_NAME} PRIVATE ${TARGET_XPTI}) target_include_directories(${TARGET_NAME} PRIVATE ${xpti_SOURCE_DIR}/include) if(MSVC) - target_compile_definitions(${TARGET_NAME} PRIVATE - XPTI_STATIC_LIBRARY XPTI_CALLBACK_API_EXPORTS) + target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_STATIC_LIBRARY) endif() +target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_CALLBACK_API_EXPORTS) diff --git a/include/ur_api.h b/include/ur_api.h index a707d40a3f..e75793f3d2 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -332,9 +332,17 @@ typedef enum ur_structure_type_t { #if defined(_WIN32) /// @brief Microsoft-specific dllexport storage-class attribute #define UR_APIEXPORT __declspec(dllexport) +#endif // defined(_WIN32) +#endif // UR_APIEXPORT + +/////////////////////////////////////////////////////////////////////////////// +#ifndef UR_APIEXPORT +#if __GNUC__ >= 4 +/// @brief GCC-specific dllexport storage-class attribute +#define UR_APIEXPORT __attribute__((visibility("default"))) #else #define UR_APIEXPORT -#endif // defined(_WIN32) +#endif // __GNUC__ >= 4 #endif // UR_APIEXPORT /////////////////////////////////////////////////////////////////////////////// diff --git a/scripts/core/common.yml b/scripts/core/common.yml index d06333eb07..5df4a7c04e 100644 --- a/scripts/core/common.yml +++ b/scripts/core/common.yml @@ -39,6 +39,12 @@ desc: "Microsoft-specific dllexport storage-class attribute" condition: "defined(_WIN32)" name: $X_APIEXPORT value: __declspec(dllexport) +--- #-------------------------------------------------------------------------- +type: macro +desc: "GCC-specific dllexport storage-class attribute" +condition: "__GNUC__ >= 4" +name: $X_APIEXPORT +value: __attribute__ ((visibility ("default"))) altvalue: "" --- #-------------------------------------------------------------------------- type: macro diff --git a/test/layers/tracing/CMakeLists.txt b/test/layers/tracing/CMakeLists.txt index 969e4318b1..c09f2eafb1 100644 --- a/test/layers/tracing/CMakeLists.txt +++ b/test/layers/tracing/CMakeLists.txt @@ -15,9 +15,9 @@ target_link_libraries(test_collector PRIVATE ${TARGET_XPTI}) target_include_directories(test_collector PRIVATE ${xpti_SOURCE_DIR}/include) if(MSVC) - target_compile_definitions(test_collector PRIVATE - XPTI_STATIC_LIBRARY XPTI_CALLBACK_API_EXPORTS) + target_compile_definitions(test_collector PRIVATE XPTI_STATIC_LIBRARY) endif() +target_compile_definitions(test_collector PRIVATE XPTI_CALLBACK_API_EXPORTS) function(set_tracing_test_props target_name collector_name) set_tests_properties(${target_name} PROPERTIES diff --git a/tools/urtrace/CMakeLists.txt b/tools/urtrace/CMakeLists.txt index 085f361223..9b385606ea 100644 --- a/tools/urtrace/CMakeLists.txt +++ b/tools/urtrace/CMakeLists.txt @@ -17,9 +17,9 @@ target_link_libraries(${TARGET_NAME} PRIVATE ${TARGET_XPTI} ${PROJECT_NAME}::com target_include_directories(${TARGET_NAME} PRIVATE ${xpti_SOURCE_DIR}/include) if(MSVC) - target_compile_definitions(${TARGET_NAME} PRIVATE - XPTI_STATIC_LIBRARY XPTI_CALLBACK_API_EXPORTS) + target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_STATIC_LIBRARY) endif() +target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_CALLBACK_API_EXPORTS) set(UR_TRACE_CLI_BIN ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/urtrace) From d7ac3ddbe95dc941da3553bd448c9d5f2e5ac075 Mon Sep 17 00:00:00 2001 From: Martin Morrison-Grant Date: Thu, 5 Sep 2024 17:35:54 +0100 Subject: [PATCH 2/6] [Loader] Supress system errors when loading adapters on Windows. --- source/loader/ur_loader.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/source/loader/ur_loader.cpp b/source/loader/ur_loader.cpp index f2b43f2725..97fd0019a1 100644 --- a/source/loader/ur_loader.cpp +++ b/source/loader/ur_loader.cpp @@ -17,6 +17,19 @@ namespace ur_loader { context_t *getContext() { return context_t::get_direct(); } ur_result_t context_t::init() { +#ifdef _WIN32 + // Suppress system errors. + // Tells the system to not display the critical-error-handler message box. + // Instead, the system sends the error to the calling process. + // This is crucial for graceful handling of plugins that couldn't be + // loaded, e.g. due to missing native run-times. + // Sometimes affects L0 or the unified runtime. + // TODO: add reporting in case of an error. + // NOTE: we restore the old mode to not affect user app behavior. + // See https://github.com/intel/llvm/blob/sycl/sycl/ur_win_proxy_loader/ur_win_proxy_loader.cpp (preloadLibraries()) + UINT SavedMode = SetErrorMode(SEM_FAILCRITICALERRORS); +#endif + #ifdef UR_STATIC_ADAPTER_LEVEL_ZERO // If the adapters were force loaded, it means the user wants to use // a specific adapter library. Don't load any static adapters. @@ -35,6 +48,10 @@ ur_result_t context_t::init() { } } } +#ifdef _WIN32 + // Restore system error handling. + (void)SetErrorMode(SavedMode); +#endif forceIntercept = getenv_tobool("UR_ENABLE_LOADER_INTERCEPT"); From 53abe070457de7885a8b511b0cbd992d21958aeb Mon Sep 17 00:00:00 2001 From: Martin Grant Date: Fri, 13 Sep 2024 17:19:03 +0100 Subject: [PATCH 3/6] Apply suggestions from code review Co-authored-by: aarongreig --- source/loader/ur_loader.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/loader/ur_loader.cpp b/source/loader/ur_loader.cpp index 97fd0019a1..e5a2bdb34e 100644 --- a/source/loader/ur_loader.cpp +++ b/source/loader/ur_loader.cpp @@ -21,9 +21,8 @@ ur_result_t context_t::init() { // Suppress system errors. // Tells the system to not display the critical-error-handler message box. // Instead, the system sends the error to the calling process. - // This is crucial for graceful handling of plugins that couldn't be + // This is crucial for graceful handling of adapters that couldn't be // loaded, e.g. due to missing native run-times. - // Sometimes affects L0 or the unified runtime. // TODO: add reporting in case of an error. // NOTE: we restore the old mode to not affect user app behavior. // See https://github.com/intel/llvm/blob/sycl/sycl/ur_win_proxy_loader/ur_win_proxy_loader.cpp (preloadLibraries()) From a8a492a6f9be5b09e8436865a6efd237a046264e Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Mon, 16 Sep 2024 08:58:40 -0700 Subject: [PATCH 4/6] [L0] Fix urEnqueueDeviceGlobalVariableWrite to use device specific modules - Fixed urEnqueueDeviceGlobalVariableWrite to check the map for a device specific module for the device used by the queue if it exists, otherwise use the ZeModule in the Program handle. Signed-off-by: Neil R. Spruit --- source/adapters/level_zero/kernel.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index 8e627f3ade..9c638d53f6 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -494,11 +494,19 @@ ur_result_t urEnqueueDeviceGlobalVariableWrite( ) { std::scoped_lock lock(Queue->Mutex); + ze_module_handle_t ZeModule{}; + auto It = Program->ZeModuleMap.find(Queue->Device->ZeDevice); + if (It != Program->ZeModuleMap.end()) { + ZeModule = It->second; + } else { + ZeModule = Program->ZeModule; + } + // Find global variable pointer size_t GlobalVarSize = 0; void *GlobalVarPtr = nullptr; ZE2UR_CALL(zeModuleGetGlobalPointer, - (Program->ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); + (ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); if (GlobalVarSize < Offset + Count) { setErrorMessage("Write device global variable is out of range.", UR_RESULT_ERROR_INVALID_VALUE, From bd5ed375014e9131605713997f8ec13b8eb8e6df Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Mon, 16 Sep 2024 11:02:24 -0700 Subject: [PATCH 5/6] [L0] Fix Counter Based Event Default signal given placeholder - When a counter based event is created and is expected to be already signalled, then those events either need to be labelled as Completed==true or they need to be signalled on the device vs the host. - Now, all places where host signal was skipped, those events are marked "Completed" such that those events will be skipped in the waitlist and in the case of the multi device, the event will be signalled on the device as part of the execute with the previous waitlist. Signed-off-by: Neil R. Spruit --- source/adapters/level_zero/event.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 01ea6efb91..f155fd3c82 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -1546,8 +1546,13 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (ZeCommandList, 1u, &EventList[I]->ZeEvent)); - if (!MultiDeviceEvent->CounterBasedEventsEnabled) + if (!MultiDeviceEvent->CounterBasedEventsEnabled) { ZE2UR_CALL(zeEventHostSignal, (MultiDeviceZeEvent)); + } else { + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (ZeCommandList, MultiDeviceZeEvent)); + } + MultiDeviceEvent->Completed = true; UR_CALL(Queue->executeCommandList(CommandList, /* IsBlocking */ false, /* OkToBatchCommand */ true)); From 3f128d09d7f7832f90f2bbbc3d4465a5d6083f29 Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Thu, 12 Sep 2024 12:27:18 -0700 Subject: [PATCH 6/6] [L0] Fix urEnqueueEventsWaitWithBarrier for driver in order lists - Fix urEnqueueEventsWaitWithBarrier to avoid appending a wait on events given a waitlist, but driver in order lists. - Driver in order lists gurantees this implicitly and is not needed. - fix immediate command list cache entry to also set the in order flag. Signed-off-by: Neil R. Spruit --- source/adapters/level_zero/context.cpp | 2 ++ source/adapters/level_zero/event.cpp | 4 +++- source/adapters/level_zero/queue.cpp | 4 +++- 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index a8eaaa2317..296e3e98d5 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -530,6 +530,8 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( counterBasedExt.flags = ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE; } + logger::debug("ze_event_pool_desc_t counter based flags set to: {}", + counterBasedExt.flags); ZeEventPoolDesc.pNext = &counterBasedExt; } diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 01ea6efb91..822f7e4f5d 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -196,7 +196,9 @@ ur_result_t urEnqueueEventsWaitWithBarrier( // if (Queue->isInOrderQueue() && InOrderBarrierBySignal && !Queue->isProfilingEnabled()) { - if (EventWaitList.Length) { + // If we are using driver in order lists, then append wait on events + // is unnecessary and we can signal the event created. + if (EventWaitList.Length && !CmdList->second.IsInOrderList) { ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (CmdList->first, EventWaitList.Length, EventWaitList.ZeEventList)); diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 9757dad74f..978547df10 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -2386,6 +2386,7 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { ZeCommandQueueDesc.ordinal = QueueOrdinal; ZeCommandQueueDesc.index = QueueIndex; ZeCommandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + bool isInOrderList = false; const char *Priority = "Normal"; if (Queue->isPriorityLow()) { ZeCommandQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW; @@ -2401,6 +2402,7 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { } if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) { + isInOrderList = true; ZeCommandQueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_IN_ORDER; } @@ -2449,7 +2451,7 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { ZeCommandList, ur_command_list_info_t( nullptr, true, false, nullptr, ZeCommandQueueDesc, - Queue->useCompletionBatching(), true, false, true)}) + Queue->useCompletionBatching(), true, isInOrderList, true)}) .first; return ImmCmdLists[Index];