diff --git a/cmake/helpers.cmake b/cmake/helpers.cmake index 24cb6f8e54..6a5700da8b 100644 --- a/cmake/helpers.cmake +++ b/cmake/helpers.cmake @@ -70,6 +70,7 @@ function(add_ur_target_compile_options name) ) if (CMAKE_BUILD_TYPE STREQUAL "Release") target_compile_definitions(${name} PRIVATE -D_FORTIFY_SOURCE=2) + target_compile_options(${name} PRIVATE -fvisibility=hidden) endif() if(UR_DEVELOPER_MODE) target_compile_options(${name} PRIVATE diff --git a/examples/collector/CMakeLists.txt b/examples/collector/CMakeLists.txt index 5fe484d0b8..6dd112aae0 100644 --- a/examples/collector/CMakeLists.txt +++ b/examples/collector/CMakeLists.txt @@ -17,6 +17,6 @@ target_link_libraries(${TARGET_NAME} PRIVATE ${TARGET_XPTI}) target_include_directories(${TARGET_NAME} PRIVATE ${xpti_SOURCE_DIR}/include) if(MSVC) - target_compile_definitions(${TARGET_NAME} PRIVATE - XPTI_STATIC_LIBRARY XPTI_CALLBACK_API_EXPORTS) + target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_STATIC_LIBRARY) endif() +target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_CALLBACK_API_EXPORTS) diff --git a/include/ur_api.h b/include/ur_api.h index a707d40a3f..e75793f3d2 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -332,9 +332,17 @@ typedef enum ur_structure_type_t { #if defined(_WIN32) /// @brief Microsoft-specific dllexport storage-class attribute #define UR_APIEXPORT __declspec(dllexport) +#endif // defined(_WIN32) +#endif // UR_APIEXPORT + +/////////////////////////////////////////////////////////////////////////////// +#ifndef UR_APIEXPORT +#if __GNUC__ >= 4 +/// @brief GCC-specific dllexport storage-class attribute +#define UR_APIEXPORT __attribute__((visibility("default"))) #else #define UR_APIEXPORT -#endif // defined(_WIN32) +#endif // __GNUC__ >= 4 #endif // UR_APIEXPORT /////////////////////////////////////////////////////////////////////////////// diff --git a/scripts/core/common.yml b/scripts/core/common.yml index d06333eb07..5df4a7c04e 100644 --- a/scripts/core/common.yml +++ b/scripts/core/common.yml @@ -39,6 +39,12 @@ desc: "Microsoft-specific dllexport storage-class attribute" condition: "defined(_WIN32)" name: $X_APIEXPORT value: __declspec(dllexport) +--- #-------------------------------------------------------------------------- +type: macro +desc: "GCC-specific dllexport storage-class attribute" +condition: "__GNUC__ >= 4" +name: $X_APIEXPORT +value: __attribute__ ((visibility ("default"))) altvalue: "" --- #-------------------------------------------------------------------------- type: macro diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index a8eaaa2317..296e3e98d5 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -530,6 +530,8 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool( counterBasedExt.flags = ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE; } + logger::debug("ze_event_pool_desc_t counter based flags set to: {}", + counterBasedExt.flags); ZeEventPoolDesc.pNext = &counterBasedExt; } diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 01ea6efb91..f58db37753 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -196,7 +196,9 @@ ur_result_t urEnqueueEventsWaitWithBarrier( // if (Queue->isInOrderQueue() && InOrderBarrierBySignal && !Queue->isProfilingEnabled()) { - if (EventWaitList.Length) { + // If we are using driver in order lists, then append wait on events + // is unnecessary and we can signal the event created. + if (EventWaitList.Length && !CmdList->second.IsInOrderList) { ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (CmdList->first, EventWaitList.Length, EventWaitList.ZeEventList)); @@ -1546,8 +1548,13 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (ZeCommandList, 1u, &EventList[I]->ZeEvent)); - if (!MultiDeviceEvent->CounterBasedEventsEnabled) + if (!MultiDeviceEvent->CounterBasedEventsEnabled) { ZE2UR_CALL(zeEventHostSignal, (MultiDeviceZeEvent)); + } else { + ZE2UR_CALL(zeCommandListAppendSignalEvent, + (ZeCommandList, MultiDeviceZeEvent)); + } + MultiDeviceEvent->Completed = true; UR_CALL(Queue->executeCommandList(CommandList, /* IsBlocking */ false, /* OkToBatchCommand */ true)); diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index 8e627f3ade..9c638d53f6 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -494,11 +494,19 @@ ur_result_t urEnqueueDeviceGlobalVariableWrite( ) { std::scoped_lock lock(Queue->Mutex); + ze_module_handle_t ZeModule{}; + auto It = Program->ZeModuleMap.find(Queue->Device->ZeDevice); + if (It != Program->ZeModuleMap.end()) { + ZeModule = It->second; + } else { + ZeModule = Program->ZeModule; + } + // Find global variable pointer size_t GlobalVarSize = 0; void *GlobalVarPtr = nullptr; ZE2UR_CALL(zeModuleGetGlobalPointer, - (Program->ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); + (ZeModule, Name, &GlobalVarSize, &GlobalVarPtr)); if (GlobalVarSize < Offset + Count) { setErrorMessage("Write device global variable is out of range.", UR_RESULT_ERROR_INVALID_VALUE, diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 9757dad74f..978547df10 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -2386,6 +2386,7 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { ZeCommandQueueDesc.ordinal = QueueOrdinal; ZeCommandQueueDesc.index = QueueIndex; ZeCommandQueueDesc.mode = ZE_COMMAND_QUEUE_MODE_ASYNCHRONOUS; + bool isInOrderList = false; const char *Priority = "Normal"; if (Queue->isPriorityLow()) { ZeCommandQueueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_PRIORITY_LOW; @@ -2401,6 +2402,7 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { } if (Queue->Device->useDriverInOrderLists() && Queue->isInOrderQueue()) { + isInOrderList = true; ZeCommandQueueDesc.flags |= ZE_COMMAND_QUEUE_FLAG_IN_ORDER; } @@ -2449,7 +2451,7 @@ ur_command_list_ptr_t &ur_queue_handle_t_::ur_queue_group_t::getImmCmdList() { ZeCommandList, ur_command_list_info_t( nullptr, true, false, nullptr, ZeCommandQueueDesc, - Queue->useCompletionBatching(), true, false, true)}) + Queue->useCompletionBatching(), true, isInOrderList, true)}) .first; return ImmCmdLists[Index]; diff --git a/source/loader/ur_loader.cpp b/source/loader/ur_loader.cpp index f2b43f2725..e5a2bdb34e 100644 --- a/source/loader/ur_loader.cpp +++ b/source/loader/ur_loader.cpp @@ -17,6 +17,18 @@ namespace ur_loader { context_t *getContext() { return context_t::get_direct(); } ur_result_t context_t::init() { +#ifdef _WIN32 + // Suppress system errors. + // Tells the system to not display the critical-error-handler message box. + // Instead, the system sends the error to the calling process. + // This is crucial for graceful handling of adapters that couldn't be + // loaded, e.g. due to missing native run-times. + // TODO: add reporting in case of an error. + // NOTE: we restore the old mode to not affect user app behavior. + // See https://github.com/intel/llvm/blob/sycl/sycl/ur_win_proxy_loader/ur_win_proxy_loader.cpp (preloadLibraries()) + UINT SavedMode = SetErrorMode(SEM_FAILCRITICALERRORS); +#endif + #ifdef UR_STATIC_ADAPTER_LEVEL_ZERO // If the adapters were force loaded, it means the user wants to use // a specific adapter library. Don't load any static adapters. @@ -35,6 +47,10 @@ ur_result_t context_t::init() { } } } +#ifdef _WIN32 + // Restore system error handling. + (void)SetErrorMode(SavedMode); +#endif forceIntercept = getenv_tobool("UR_ENABLE_LOADER_INTERCEPT"); diff --git a/test/layers/tracing/CMakeLists.txt b/test/layers/tracing/CMakeLists.txt index 969e4318b1..c09f2eafb1 100644 --- a/test/layers/tracing/CMakeLists.txt +++ b/test/layers/tracing/CMakeLists.txt @@ -15,9 +15,9 @@ target_link_libraries(test_collector PRIVATE ${TARGET_XPTI}) target_include_directories(test_collector PRIVATE ${xpti_SOURCE_DIR}/include) if(MSVC) - target_compile_definitions(test_collector PRIVATE - XPTI_STATIC_LIBRARY XPTI_CALLBACK_API_EXPORTS) + target_compile_definitions(test_collector PRIVATE XPTI_STATIC_LIBRARY) endif() +target_compile_definitions(test_collector PRIVATE XPTI_CALLBACK_API_EXPORTS) function(set_tracing_test_props target_name collector_name) set_tests_properties(${target_name} PROPERTIES diff --git a/tools/urtrace/CMakeLists.txt b/tools/urtrace/CMakeLists.txt index 085f361223..9b385606ea 100644 --- a/tools/urtrace/CMakeLists.txt +++ b/tools/urtrace/CMakeLists.txt @@ -17,9 +17,9 @@ target_link_libraries(${TARGET_NAME} PRIVATE ${TARGET_XPTI} ${PROJECT_NAME}::com target_include_directories(${TARGET_NAME} PRIVATE ${xpti_SOURCE_DIR}/include) if(MSVC) - target_compile_definitions(${TARGET_NAME} PRIVATE - XPTI_STATIC_LIBRARY XPTI_CALLBACK_API_EXPORTS) + target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_STATIC_LIBRARY) endif() +target_compile_definitions(${TARGET_NAME} PRIVATE XPTI_CALLBACK_API_EXPORTS) set(UR_TRACE_CLI_BIN ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/urtrace)