From 0a6b9a3c65edd96dc15b417ca08ab9d7bce97b8b Mon Sep 17 00:00:00 2001 From: Omar Ahmed Date: Wed, 29 May 2024 11:47:25 +0100 Subject: [PATCH 1/4] Merge pull request #1666 from igchor/fix_l0_test Fix level zero specific test --- test/adapters/level_zero/CMakeLists.txt | 59 +++++++++++++++++++ .../adapters/level_zero/event_cache_tests.cpp | 22 +++++-- 2 files changed, 75 insertions(+), 6 deletions(-) create mode 100644 test/adapters/level_zero/CMakeLists.txt diff --git a/test/adapters/level_zero/CMakeLists.txt b/test/adapters/level_zero/CMakeLists.txt new file mode 100644 index 0000000000..2757501a96 --- /dev/null +++ b/test/adapters/level_zero/CMakeLists.txt @@ -0,0 +1,59 @@ +# Copyright (C) 2024 Intel Corporation +# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +# See LICENSE.TXT +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +if(NOT UR_DPCXX) + # Tests that require kernels can't be used if we aren't generating + # device binaries + message(WARNING + "UR_DPCXX is not defined, skipping some adapter tests for level_zero") +else() + add_adapter_test(level_zero + FIXTURE KERNELS + SOURCES + urProgramLink.cpp + urKernelCreateWithNativeHandle.cpp + ENVIRONMENT + "UR_ADAPTERS_FORCE_LOAD=\"$\"" + ) + # TODO: valgrind tests require very new environment. + # Enable once all L0 runners are updated. + # add_adapter_memcheck_test(level_zero + # ENVIRONMENT + # "UR_ADAPTERS_FORCE_LOAD=\"$\"" + # ) + + target_link_libraries(test-adapter-level_zero PRIVATE + LevelZeroLoader + LevelZeroLoader-Headers + ) + + target_include_directories(test-adapter-level_zero PRIVATE + ${PROJECT_SOURCE_DIR}/source + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero + LevelZeroLoader-Headers + ) + + add_dependencies(test-adapter-level_zero + generate_device_binaries kernel_names_header) +endif() + +if(NOT WIN32) + # Make L0 use CallMap from a seprate shared lib so that we can access the map + # from the tests. This only seems to work on linux + add_library(zeCallMap SHARED zeCallMap.cpp) + target_compile_definitions(ur_adapter_level_zero PRIVATE UR_L0_CALL_COUNT_IN_TESTS) + target_link_libraries(ur_adapter_level_zero PRIVATE zeCallMap) + + add_adapter_test(level_zero_ze_calls + FIXTURE DEVICES + SOURCES + event_cache_tests.cpp + ENVIRONMENT + "UR_ADAPTERS_FORCE_LOAD=\"$\"" + "UR_L0_LEAKS_DEBUG=1" + ) + + target_link_libraries(test-adapter-level_zero_ze_calls PRIVATE zeCallMap) +endif() diff --git a/test/adapters/level_zero/event_cache_tests.cpp b/test/adapters/level_zero/event_cache_tests.cpp index 53bc39ad96..5ad970bad1 100644 --- a/test/adapters/level_zero/event_cache_tests.cpp +++ b/test/adapters/level_zero/event_cache_tests.cpp @@ -19,6 +19,16 @@ extern std::map *ZeCallCount; using FlagsTupleType = std::tuple; +// TODO: get rid of this, this is a workaround for fails on older driver +// where for some reason continuing the test leads to a segfault +#define UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(ret) \ + auto status = ret; \ + if (status == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { \ + exit(0); \ + } else { \ + ASSERT_EQ(status, UR_RESULT_SUCCESS); \ + } + struct urEventCacheTest : uur::urContextTestWithParam { void SetUp() override { UUR_RETURN_ON_FATAL_FAILURE(urContextTestWithParam::SetUp()); @@ -42,20 +52,20 @@ struct urEventCacheTest : uur::urContextTestWithParam { EXPECT_SUCCESS(urMemRelease(buffer)); } if (queue) { - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urQueueRelease(queue)); + UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(urQueueRelease(queue)); } UUR_RETURN_ON_FATAL_FAILURE(urContextTestWithParam::TearDown()); } auto enqueueWork(ur_event_handle_t *hEvent, int data) { input.assign(count, data); - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urEnqueueMemBufferWrite( + UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(urEnqueueMemBufferWrite( queue, buffer, false, 0, size, input.data(), 0, nullptr, hEvent)); } void verifyData() { std::vector output(count, 1); - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urEnqueueMemBufferRead( + UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(urEnqueueMemBufferRead( queue, buffer, true, 0, size, output.data(), 0, nullptr, nullptr)); if (!(flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE)) { @@ -79,7 +89,7 @@ TEST_P(urEventCacheTest, eventsReuseNoVisibleEvent) { for (int j = 0; j < numEnqueues; j++) { enqueueWork(nullptr, i * numEnqueues + j); } - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urQueueFinish(queue)); + UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(urQueueFinish(queue)); verifyData(); } @@ -101,7 +111,7 @@ TEST_P(urEventCacheTest, eventsReuseWithVisibleEvent) { for (int j = 0; j < numEnqueues; j++) { enqueueWork(events[j].ptr(), i * numEnqueues + j); } - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urQueueFinish(queue)); + UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(urQueueFinish(queue)); verifyData(); } @@ -126,7 +136,7 @@ TEST_P(urEventCacheTest, eventsReuseWithVisibleEventAndWait) { events.clear(); } } - UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urQueueFinish(queue)); + UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(urQueueFinish(queue)); } ASSERT_GE((*ZeCallCount)["zeEventCreate"], waitEveryN); From adfa98ee85b1e84f63c102d8ed65ba0c7673e6b5 Mon Sep 17 00:00:00 2001 From: Omar Ahmed Date: Wed, 29 May 2024 15:02:39 +0100 Subject: [PATCH 2/4] Merge pull request #1667 from nrspruit/fix_multi_device_event_cache [UR] Fix Multi Device Event Cache for shared Root Device --- source/adapters/level_zero/event.cpp | 22 +++- test/adapters/level_zero/CMakeLists.txt | 11 ++ .../multi_device_event_cache_tests.cpp | 107 ++++++++++++++++++ 3 files changed, 136 insertions(+), 4 deletions(-) create mode 100644 test/adapters/level_zero/multi_device_event_cache_tests.cpp diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index d061c73b2a..4872424433 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -1359,7 +1359,20 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( std::shared_lock Lock(EventList[I]->Mutex); - if (Queue && Queue->Device != CurQueueDevice && + ur_device_handle_t QueueRootDevice; + ur_device_handle_t CurrentQueueRootDevice; + if (Queue) { + QueueRootDevice = Queue->Device; + CurrentQueueRootDevice = CurQueueDevice; + if (Queue->Device->isSubDevice()) { + QueueRootDevice = Queue->Device->RootDevice; + } + if (CurQueueDevice->isSubDevice()) { + CurrentQueueRootDevice = CurQueueDevice->RootDevice; + } + } + + if (Queue && QueueRootDevice != CurrentQueueRootDevice && !EventList[I]->IsMultiDevice) { ze_event_handle_t MultiDeviceZeEvent = nullptr; ur_event_handle_t MultiDeviceEvent; @@ -1373,9 +1386,10 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( const auto &ZeCommandList = CommandList->first; EventList[I]->RefCount.increment(); - zeCommandListAppendWaitOnEvents(ZeCommandList, 1u, - &EventList[I]->ZeEvent); - zeEventHostSignal(MultiDeviceZeEvent); + ZE2UR_CALL(zeCommandListAppendWaitOnEvents, + (ZeCommandList, 1u, &EventList[I]->ZeEvent)); + if (!MultiDeviceEvent->CounterBasedEventsEnabled) + ZE2UR_CALL(zeEventHostSignal, (MultiDeviceZeEvent)); UR_CALL(Queue->executeCommandList(CommandList, /* IsBlocking */ false, /* OkToBatchCommand */ true)); diff --git a/test/adapters/level_zero/CMakeLists.txt b/test/adapters/level_zero/CMakeLists.txt index 2757501a96..4d25e54e9a 100644 --- a/test/adapters/level_zero/CMakeLists.txt +++ b/test/adapters/level_zero/CMakeLists.txt @@ -56,4 +56,15 @@ if(NOT WIN32) ) target_link_libraries(test-adapter-level_zero_ze_calls PRIVATE zeCallMap) + + add_adapter_test(level_zero_multi_queue + FIXTURE DEVICES + SOURCES + multi_device_event_cache_tests.cpp + ENVIRONMENT + "UR_ADAPTERS_FORCE_LOAD=\"$\"" + "UR_L0_LEAKS_DEBUG=1" + ) + + target_link_libraries(test-adapter-level_zero_multi_queue PRIVATE zeCallMap) endif() diff --git a/test/adapters/level_zero/multi_device_event_cache_tests.cpp b/test/adapters/level_zero/multi_device_event_cache_tests.cpp new file mode 100644 index 0000000000..b45991aebf --- /dev/null +++ b/test/adapters/level_zero/multi_device_event_cache_tests.cpp @@ -0,0 +1,107 @@ +// Copyright (C) 2024 Intel Corporation +// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. +// See LICENSE.TXT +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "ur_print.hpp" +#include "uur/fixtures.h" +#include "uur/raii.h" + +#include +#include + +extern std::map *ZeCallCount; + +using urMultiQueueMultiDeviceEventCacheTest = uur::urAllDevicesTest; +TEST_F(urMultiQueueMultiDeviceEventCacheTest, + GivenMultiSubDeviceWithQueuePerSubDeviceThenEventIsSharedBetweenQueues) { + uint32_t max_sub_devices = 0; + ASSERT_SUCCESS( + uur::GetDevicePartitionMaxSubDevices(devices[0], max_sub_devices)); + if (max_sub_devices < 2) { + GTEST_SKIP(); + } + ur_device_partition_property_t prop; + prop.type = UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; + prop.value.affinity_domain = + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE; + + ur_device_partition_properties_t properties{ + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, + nullptr, + &prop, + 1, + }; + uint32_t numSubDevices = 0; + ASSERT_SUCCESS( + urDevicePartition(devices[0], &properties, 0, nullptr, &numSubDevices)); + std::vector sub_devices; + sub_devices.reserve(numSubDevices); + ASSERT_SUCCESS(urDevicePartition(devices[0], &properties, numSubDevices, + sub_devices.data(), nullptr)); + uur::raii::Context context1 = nullptr; + ASSERT_SUCCESS( + urContextCreate(1, &sub_devices[0], nullptr, context1.ptr())); + ASSERT_NE(nullptr, context1); + uur::raii::Context context2 = nullptr; + ASSERT_SUCCESS( + urContextCreate(1, &sub_devices[1], nullptr, context2.ptr())); + ASSERT_NE(nullptr, context2); + ur_queue_handle_t queue1 = nullptr; + ASSERT_SUCCESS(urQueueCreate(context1, sub_devices[0], 0, &queue1)); + ur_queue_handle_t queue2 = nullptr; + ASSERT_SUCCESS(urQueueCreate(context2, sub_devices[1], 0, &queue2)); + uur::raii::Event event = nullptr; + uur::raii::Event eventWait = nullptr; + uur::raii::Event eventWaitDummy = nullptr; + (*ZeCallCount)["zeCommandListAppendWaitOnEvents"] = 0; + EXPECT_SUCCESS(urEventCreateWithNativeHandle(nullptr, context2, nullptr, + eventWait.ptr())); + EXPECT_SUCCESS(urEventCreateWithNativeHandle(nullptr, context1, nullptr, + eventWaitDummy.ptr())); + EXPECT_SUCCESS( + urEnqueueEventsWait(queue1, 1, eventWaitDummy.ptr(), eventWait.ptr())); + EXPECT_SUCCESS( + urEnqueueEventsWait(queue2, 1, eventWait.ptr(), event.ptr())); + EXPECT_EQ((*ZeCallCount)["zeCommandListAppendWaitOnEvents"], 2); + ASSERT_SUCCESS(urEventRelease(eventWaitDummy.get())); + ASSERT_SUCCESS(urEventRelease(eventWait.get())); + ASSERT_SUCCESS(urEventRelease(event.get())); + ASSERT_SUCCESS(urQueueRelease(queue2)); + ASSERT_SUCCESS(urQueueRelease(queue1)); +} + +TEST_F(urMultiQueueMultiDeviceEventCacheTest, + GivenMultiDeviceWithQueuePerDeviceThenMultiDeviceEventIsCreated) { + if (devices.size() < 2) { + GTEST_SKIP(); + } + uur::raii::Context context1 = nullptr; + ASSERT_SUCCESS(urContextCreate(1, &devices[0], nullptr, context1.ptr())); + ASSERT_NE(nullptr, context1); + uur::raii::Context context2 = nullptr; + ASSERT_SUCCESS(urContextCreate(1, &devices[1], nullptr, context2.ptr())); + ASSERT_NE(nullptr, context2); + ur_queue_handle_t queue1 = nullptr; + ASSERT_SUCCESS(urQueueCreate(context1, devices[0], 0, &queue1)); + ur_queue_handle_t queue2 = nullptr; + ASSERT_SUCCESS(urQueueCreate(context2, devices[1], 0, &queue2)); + uur::raii::Event event = nullptr; + uur::raii::Event eventWait = nullptr; + uur::raii::Event eventWaitDummy = nullptr; + (*ZeCallCount)["zeCommandListAppendWaitOnEvents"] = 0; + EXPECT_SUCCESS(urEventCreateWithNativeHandle(nullptr, context2, nullptr, + eventWait.ptr())); + EXPECT_SUCCESS(urEventCreateWithNativeHandle(nullptr, context1, nullptr, + eventWaitDummy.ptr())); + EXPECT_SUCCESS( + urEnqueueEventsWait(queue1, 1, eventWaitDummy.ptr(), eventWait.ptr())); + EXPECT_SUCCESS( + urEnqueueEventsWait(queue2, 1, eventWait.ptr(), event.ptr())); + EXPECT_EQ((*ZeCallCount)["zeCommandListAppendWaitOnEvents"], 3); + ASSERT_SUCCESS(urEventRelease(eventWaitDummy.get())); + ASSERT_SUCCESS(urEventRelease(eventWait.get())); + ASSERT_SUCCESS(urEventRelease(event.get())); + ASSERT_SUCCESS(urQueueRelease(queue2)); + ASSERT_SUCCESS(urQueueRelease(queue1)); +} From 8f6bb3f04b40a533b4a75b5f1bdac0fa8ebc9f5f Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Wed, 29 May 2024 16:20:17 +0100 Subject: [PATCH 3/4] Set version v0.9.5 --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4c8fd63490..6942d7d9b5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR) -project(unified-runtime VERSION 0.9.4) +project(unified-runtime VERSION 0.9.5) include(GNUInstallDirs) include(CheckCXXSourceCompiles) From 1af4c158d093de9c9095b4524629f4a45bd94800 Mon Sep 17 00:00:00 2001 From: omarahmed1111 Date: Wed, 29 May 2024 17:36:24 +0100 Subject: [PATCH 4/4] Remove the check for CounterBasedEventsEnabled in l0 events --- source/adapters/level_zero/event.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 4872424433..8ce798aa2d 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -1388,8 +1388,7 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( ZE2UR_CALL(zeCommandListAppendWaitOnEvents, (ZeCommandList, 1u, &EventList[I]->ZeEvent)); - if (!MultiDeviceEvent->CounterBasedEventsEnabled) - ZE2UR_CALL(zeEventHostSignal, (MultiDeviceZeEvent)); + ZE2UR_CALL(zeEventHostSignal, (MultiDeviceZeEvent)); UR_CALL(Queue->executeCommandList(CommandList, /* IsBlocking */ false, /* OkToBatchCommand */ true));