Skip to content

Commit

Permalink
Merge pull request #1685 from omarahmed1111/v0.9.5rc
Browse files Browse the repository at this point in the history
Candidate for the v0.9.5 release tag
  • Loading branch information
omarahmed1111 committed May 29, 2024
2 parents 83cd14d + 1af4c15 commit ca68414
Show file tree
Hide file tree
Showing 5 changed files with 211 additions and 11 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

cmake_minimum_required(VERSION 3.14.0 FATAL_ERROR)
project(unified-runtime VERSION 0.9.4)
project(unified-runtime VERSION 0.9.5)

include(GNUInstallDirs)
include(CheckCXXSourceCompiles)
Expand Down
21 changes: 17 additions & 4 deletions source/adapters/level_zero/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1359,7 +1359,20 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(

std::shared_lock<ur_shared_mutex> Lock(EventList[I]->Mutex);

if (Queue && Queue->Device != CurQueueDevice &&
ur_device_handle_t QueueRootDevice;
ur_device_handle_t CurrentQueueRootDevice;
if (Queue) {
QueueRootDevice = Queue->Device;
CurrentQueueRootDevice = CurQueueDevice;
if (Queue->Device->isSubDevice()) {
QueueRootDevice = Queue->Device->RootDevice;
}
if (CurQueueDevice->isSubDevice()) {
CurrentQueueRootDevice = CurQueueDevice->RootDevice;
}
}

if (Queue && QueueRootDevice != CurrentQueueRootDevice &&
!EventList[I]->IsMultiDevice) {
ze_event_handle_t MultiDeviceZeEvent = nullptr;
ur_event_handle_t MultiDeviceEvent;
Expand All @@ -1373,9 +1386,9 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
const auto &ZeCommandList = CommandList->first;
EventList[I]->RefCount.increment();

zeCommandListAppendWaitOnEvents(ZeCommandList, 1u,
&EventList[I]->ZeEvent);
zeEventHostSignal(MultiDeviceZeEvent);
ZE2UR_CALL(zeCommandListAppendWaitOnEvents,
(ZeCommandList, 1u, &EventList[I]->ZeEvent));
ZE2UR_CALL(zeEventHostSignal, (MultiDeviceZeEvent));

UR_CALL(Queue->executeCommandList(CommandList, /* IsBlocking */ false,
/* OkToBatchCommand */ true));
Expand Down
70 changes: 70 additions & 0 deletions test/adapters/level_zero/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# Copyright (C) 2024 Intel Corporation
# Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
# See LICENSE.TXT
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

if(NOT UR_DPCXX)
# Tests that require kernels can't be used if we aren't generating
# device binaries
message(WARNING
"UR_DPCXX is not defined, skipping some adapter tests for level_zero")
else()
add_adapter_test(level_zero
FIXTURE KERNELS
SOURCES
urProgramLink.cpp
urKernelCreateWithNativeHandle.cpp
ENVIRONMENT
"UR_ADAPTERS_FORCE_LOAD=\"$<TARGET_FILE:ur_adapter_level_zero>\""
)
# TODO: valgrind tests require very new environment.
# Enable once all L0 runners are updated.
# add_adapter_memcheck_test(level_zero
# ENVIRONMENT
# "UR_ADAPTERS_FORCE_LOAD=\"$<TARGET_FILE:ur_adapter_level_zero>\""
# )

target_link_libraries(test-adapter-level_zero PRIVATE
LevelZeroLoader
LevelZeroLoader-Headers
)

target_include_directories(test-adapter-level_zero PRIVATE
${PROJECT_SOURCE_DIR}/source
${PROJECT_SOURCE_DIR}/source/adapters/level_zero
LevelZeroLoader-Headers
)

add_dependencies(test-adapter-level_zero
generate_device_binaries kernel_names_header)
endif()

if(NOT WIN32)
# Make L0 use CallMap from a seprate shared lib so that we can access the map
# from the tests. This only seems to work on linux
add_library(zeCallMap SHARED zeCallMap.cpp)
target_compile_definitions(ur_adapter_level_zero PRIVATE UR_L0_CALL_COUNT_IN_TESTS)
target_link_libraries(ur_adapter_level_zero PRIVATE zeCallMap)

add_adapter_test(level_zero_ze_calls
FIXTURE DEVICES
SOURCES
event_cache_tests.cpp
ENVIRONMENT
"UR_ADAPTERS_FORCE_LOAD=\"$<TARGET_FILE:ur_adapter_level_zero>\""
"UR_L0_LEAKS_DEBUG=1"
)

target_link_libraries(test-adapter-level_zero_ze_calls PRIVATE zeCallMap)

add_adapter_test(level_zero_multi_queue
FIXTURE DEVICES
SOURCES
multi_device_event_cache_tests.cpp
ENVIRONMENT
"UR_ADAPTERS_FORCE_LOAD=\"$<TARGET_FILE:ur_adapter_level_zero>\""
"UR_L0_LEAKS_DEBUG=1"
)

target_link_libraries(test-adapter-level_zero_multi_queue PRIVATE zeCallMap)
endif()
22 changes: 16 additions & 6 deletions test/adapters/level_zero/event_cache_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,16 @@ extern std::map<std::string, int> *ZeCallCount;
using FlagsTupleType = std::tuple<ur_queue_flags_t, ur_queue_flags_t,
ur_queue_flags_t, ur_queue_flags_t>;

// TODO: get rid of this, this is a workaround for fails on older driver
// where for some reason continuing the test leads to a segfault
#define UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(ret) \
auto status = ret; \
if (status == UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { \
exit(0); \
} else { \
ASSERT_EQ(status, UR_RESULT_SUCCESS); \
}

struct urEventCacheTest : uur::urContextTestWithParam<FlagsTupleType> {
void SetUp() override {
UUR_RETURN_ON_FATAL_FAILURE(urContextTestWithParam::SetUp());
Expand All @@ -42,20 +52,20 @@ struct urEventCacheTest : uur::urContextTestWithParam<FlagsTupleType> {
EXPECT_SUCCESS(urMemRelease(buffer));
}
if (queue) {
UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urQueueRelease(queue));
UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(urQueueRelease(queue));
}
UUR_RETURN_ON_FATAL_FAILURE(urContextTestWithParam::TearDown());
}

auto enqueueWork(ur_event_handle_t *hEvent, int data) {
input.assign(count, data);
UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urEnqueueMemBufferWrite(
UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(urEnqueueMemBufferWrite(
queue, buffer, false, 0, size, input.data(), 0, nullptr, hEvent));
}

void verifyData() {
std::vector<uint32_t> output(count, 1);
UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urEnqueueMemBufferRead(
UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(urEnqueueMemBufferRead(
queue, buffer, true, 0, size, output.data(), 0, nullptr, nullptr));

if (!(flags & UR_QUEUE_FLAG_OUT_OF_ORDER_EXEC_MODE_ENABLE)) {
Expand All @@ -79,7 +89,7 @@ TEST_P(urEventCacheTest, eventsReuseNoVisibleEvent) {
for (int j = 0; j < numEnqueues; j++) {
enqueueWork(nullptr, i * numEnqueues + j);
}
UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urQueueFinish(queue));
UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(urQueueFinish(queue));
verifyData();
}

Expand All @@ -101,7 +111,7 @@ TEST_P(urEventCacheTest, eventsReuseWithVisibleEvent) {
for (int j = 0; j < numEnqueues; j++) {
enqueueWork(events[j].ptr(), i * numEnqueues + j);
}
UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urQueueFinish(queue));
UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(urQueueFinish(queue));
verifyData();
}

Expand All @@ -126,7 +136,7 @@ TEST_P(urEventCacheTest, eventsReuseWithVisibleEventAndWait) {
events.clear();
}
}
UUR_ASSERT_SUCCESS_OR_UNSUPPORTED(urQueueFinish(queue));
UUR_ASSERT_SUCCESS_OR_EXIT_IF_UNSUPPORTED(urQueueFinish(queue));
}

ASSERT_GE((*ZeCallCount)["zeEventCreate"], waitEveryN);
Expand Down
107 changes: 107 additions & 0 deletions test/adapters/level_zero/multi_device_event_cache_tests.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
// Copyright (C) 2024 Intel Corporation
// Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions.
// See LICENSE.TXT
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include "ur_print.hpp"
#include "uur/fixtures.h"
#include "uur/raii.h"

#include <map>
#include <string>

extern std::map<std::string, int> *ZeCallCount;

using urMultiQueueMultiDeviceEventCacheTest = uur::urAllDevicesTest;
TEST_F(urMultiQueueMultiDeviceEventCacheTest,
GivenMultiSubDeviceWithQueuePerSubDeviceThenEventIsSharedBetweenQueues) {
uint32_t max_sub_devices = 0;
ASSERT_SUCCESS(
uur::GetDevicePartitionMaxSubDevices(devices[0], max_sub_devices));
if (max_sub_devices < 2) {
GTEST_SKIP();
}
ur_device_partition_property_t prop;
prop.type = UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN;
prop.value.affinity_domain =
UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE;

ur_device_partition_properties_t properties{
UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES,
nullptr,
&prop,
1,
};
uint32_t numSubDevices = 0;
ASSERT_SUCCESS(
urDevicePartition(devices[0], &properties, 0, nullptr, &numSubDevices));
std::vector<ur_device_handle_t> sub_devices;
sub_devices.reserve(numSubDevices);
ASSERT_SUCCESS(urDevicePartition(devices[0], &properties, numSubDevices,
sub_devices.data(), nullptr));
uur::raii::Context context1 = nullptr;
ASSERT_SUCCESS(
urContextCreate(1, &sub_devices[0], nullptr, context1.ptr()));
ASSERT_NE(nullptr, context1);
uur::raii::Context context2 = nullptr;
ASSERT_SUCCESS(
urContextCreate(1, &sub_devices[1], nullptr, context2.ptr()));
ASSERT_NE(nullptr, context2);
ur_queue_handle_t queue1 = nullptr;
ASSERT_SUCCESS(urQueueCreate(context1, sub_devices[0], 0, &queue1));
ur_queue_handle_t queue2 = nullptr;
ASSERT_SUCCESS(urQueueCreate(context2, sub_devices[1], 0, &queue2));
uur::raii::Event event = nullptr;
uur::raii::Event eventWait = nullptr;
uur::raii::Event eventWaitDummy = nullptr;
(*ZeCallCount)["zeCommandListAppendWaitOnEvents"] = 0;
EXPECT_SUCCESS(urEventCreateWithNativeHandle(nullptr, context2, nullptr,
eventWait.ptr()));
EXPECT_SUCCESS(urEventCreateWithNativeHandle(nullptr, context1, nullptr,
eventWaitDummy.ptr()));
EXPECT_SUCCESS(
urEnqueueEventsWait(queue1, 1, eventWaitDummy.ptr(), eventWait.ptr()));
EXPECT_SUCCESS(
urEnqueueEventsWait(queue2, 1, eventWait.ptr(), event.ptr()));
EXPECT_EQ((*ZeCallCount)["zeCommandListAppendWaitOnEvents"], 2);
ASSERT_SUCCESS(urEventRelease(eventWaitDummy.get()));
ASSERT_SUCCESS(urEventRelease(eventWait.get()));
ASSERT_SUCCESS(urEventRelease(event.get()));
ASSERT_SUCCESS(urQueueRelease(queue2));
ASSERT_SUCCESS(urQueueRelease(queue1));
}

TEST_F(urMultiQueueMultiDeviceEventCacheTest,
GivenMultiDeviceWithQueuePerDeviceThenMultiDeviceEventIsCreated) {
if (devices.size() < 2) {
GTEST_SKIP();
}
uur::raii::Context context1 = nullptr;
ASSERT_SUCCESS(urContextCreate(1, &devices[0], nullptr, context1.ptr()));
ASSERT_NE(nullptr, context1);
uur::raii::Context context2 = nullptr;
ASSERT_SUCCESS(urContextCreate(1, &devices[1], nullptr, context2.ptr()));
ASSERT_NE(nullptr, context2);
ur_queue_handle_t queue1 = nullptr;
ASSERT_SUCCESS(urQueueCreate(context1, devices[0], 0, &queue1));
ur_queue_handle_t queue2 = nullptr;
ASSERT_SUCCESS(urQueueCreate(context2, devices[1], 0, &queue2));
uur::raii::Event event = nullptr;
uur::raii::Event eventWait = nullptr;
uur::raii::Event eventWaitDummy = nullptr;
(*ZeCallCount)["zeCommandListAppendWaitOnEvents"] = 0;
EXPECT_SUCCESS(urEventCreateWithNativeHandle(nullptr, context2, nullptr,
eventWait.ptr()));
EXPECT_SUCCESS(urEventCreateWithNativeHandle(nullptr, context1, nullptr,
eventWaitDummy.ptr()));
EXPECT_SUCCESS(
urEnqueueEventsWait(queue1, 1, eventWaitDummy.ptr(), eventWait.ptr()));
EXPECT_SUCCESS(
urEnqueueEventsWait(queue2, 1, eventWait.ptr(), event.ptr()));
EXPECT_EQ((*ZeCallCount)["zeCommandListAppendWaitOnEvents"], 3);
ASSERT_SUCCESS(urEventRelease(eventWaitDummy.get()));
ASSERT_SUCCESS(urEventRelease(eventWait.get()));
ASSERT_SUCCESS(urEventRelease(event.get()));
ASSERT_SUCCESS(urQueueRelease(queue2));
ASSERT_SUCCESS(urQueueRelease(queue1));
}

0 comments on commit ca68414

Please sign in to comment.