Skip to content

Commit

Permalink
Merge branch 'main' into coop_kernel_query
Browse files Browse the repository at this point in the history
  • Loading branch information
0x12CC authored Dec 3, 2024
2 parents ca0a0e3 + e7ee297 commit 0e46d99
Show file tree
Hide file tree
Showing 86 changed files with 2,193 additions and 529 deletions.
3 changes: 0 additions & 3 deletions .github/workflows/build-fuzz-reusable.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,6 @@ jobs:
cmake --build build -j $(nproc)
- name: Configure CMake
# CFI sanitization (or flto?) seems to cause linking to fail
# https://github.com/oneapi-src/unified-runtime/issues/2323
run: >
cmake
-B${{github.workspace}}/build
Expand All @@ -60,7 +58,6 @@ jobs:
-DUR_USE_ASAN=ON
-DUR_USE_UBSAN=ON
-DUR_BUILD_ADAPTER_L0=ON
-DUR_USE_CFI=OFF
-DUR_LEVEL_ZERO_LOADER_LIBRARY=${{github.workspace}}/level-zero/build/lib/libze_loader.so
-DUR_LEVEL_ZERO_INCLUDE_DIR=${{github.workspace}}/level-zero/include/
-DUR_DPCXX=${{github.workspace}}/dpcpp_compiler/bin/clang++
Expand Down
15 changes: 8 additions & 7 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -221,13 +221,14 @@ jobs:
needs: [ubuntu-build, opencl]
uses: ./.github/workflows/e2e_opencl.yml

e2e-cuda:
name: E2E CUDA
permissions:
contents: read
pull-requests: write
needs: [ubuntu-build, cuda]
uses: ./.github/workflows/e2e_cuda.yml
# Causes hangs: https://github.com/oneapi-src/unified-runtime/issues/2398
#e2e-cuda:
# name: E2E CUDA
# permissions:
# contents: read
# pull-requests: write
# needs: [ubuntu-build, cuda]
# uses: ./.github/workflows/e2e_cuda.yml

windows-build:
name: Build - Windows
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/e2e_core.yml
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ jobs:
- name: Run e2e tests
id: tests
run: ninja -C build-e2e check-sycl-e2e
run: ninja -C build-e2e check-sycl-e2e || echo "e2e tests have failed. Ignoring failure."

# FIXME: Requires pull-request: write permissions but this is only granted
# on pull requests from forks if using pull_request_target workflow
Expand Down
27 changes: 26 additions & 1 deletion cmake/FetchLevelZero.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ set(UR_LEVEL_ZERO_LOADER_LIBRARY "" CACHE FILEPATH "Path of the Level Zero Loade
set(UR_LEVEL_ZERO_INCLUDE_DIR "" CACHE FILEPATH "Directory containing the Level Zero Headers")
set(UR_LEVEL_ZERO_LOADER_REPO "" CACHE STRING "Github repo to get the Level Zero loader sources from")
set(UR_LEVEL_ZERO_LOADER_TAG "" CACHE STRING " GIT tag of the Level Loader taken from github repo")
set(UR_COMPUTE_RUNTIME_REPO "" CACHE STRING "Github repo to get the compute runtime sources from")
set(UR_COMPUTE_RUNTIME_TAG "" CACHE STRING " GIT tag of the compute runtime taken from github repo")

# Copy Level Zero loader/headers locally to the build to avoid leaking their path.
set(LEVEL_ZERO_COPY_DIR ${CMAKE_CURRENT_BINARY_DIR}/level_zero_loader)
Expand Down Expand Up @@ -87,8 +89,31 @@ target_link_libraries(LevelZeroLoader
INTERFACE "${LEVEL_ZERO_LIB_NAME}"
)

file(GLOB LEVEL_ZERO_LOADER_API_HEADERS "${LEVEL_ZERO_INCLUDE_DIR}/*.h")
file(COPY ${LEVEL_ZERO_LOADER_API_HEADERS} DESTINATION ${LEVEL_ZERO_INCLUDE_DIR}/level_zero)
add_library(LevelZeroLoader-Headers INTERFACE)
target_include_directories(LevelZeroLoader-Headers
INTERFACE "$<BUILD_INTERFACE:${LEVEL_ZERO_INCLUDE_DIR}>"
INTERFACE "$<BUILD_INTERFACE:${LEVEL_ZERO_INCLUDE_DIR};${LEVEL_ZERO_INCLUDE_DIR}/level_zero>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
)

if (UR_COMPUTE_RUNTIME_REPO STREQUAL "")
set(UR_COMPUTE_RUNTIME_REPO "https://github.com/intel/compute-runtime.git")
endif()
if (UR_COMPUTE_RUNTIME_TAG STREQUAL "")
set(UR_COMPUTE_RUNTIME_TAG 24.39.31294.12)
endif()
include(FetchContent)
# Sparse fetch only the dir with level zero headers to avoid pulling in the entire compute-runtime.
FetchContentSparse_Declare(compute-runtime-level-zero-headers ${UR_COMPUTE_RUNTIME_REPO} "${UR_COMPUTE_RUNTIME_TAG}" "level_zero/include")
FetchContent_GetProperties(compute-runtime-level-zero-headers)
if(NOT compute-runtime-level-zero-headers_POPULATED)
FetchContent_Populate(compute-runtime-level-zero-headers)
endif()
add_library(ComputeRuntimeLevelZero-Headers INTERFACE)
set(COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE "${compute-runtime-level-zero-headers_SOURCE_DIR}/../..")
message(STATUS "Level Zero Adapter: Using Level Zero headers from ${COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE}")
target_include_directories(ComputeRuntimeLevelZero-Headers
INTERFACE "$<BUILD_INTERFACE:${COMPUTE_RUNTIME_LEVEL_ZERO_INCLUDE}>"
"$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>"
)
20 changes: 16 additions & 4 deletions cmake/helpers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ if(CMAKE_SYSTEM_NAME STREQUAL Linux)
check_cxx_compiler_flag("-fstack-clash-protection" CXX_HAS_FSTACK_CLASH_PROTECTION)
endif()

if (UR_USE_CFI AND UR_USE_ASAN)
message(WARNING "Both UR_USE_CFI and UR_USE_ASAN are ON. "
"Due to build errors, this is unsupported; CFI checks will be disabled")
set(UR_USE_CFI OFF)
endif()

if (UR_USE_CFI)
set(SAVED_CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS})
set(CMAKE_REQUIRED_FLAGS "-flto -fvisibility=hidden")
Expand All @@ -73,6 +79,13 @@ else()
set(CXX_HAS_CFI_SANITIZE OFF)
endif()

set(CFI_FLAGS "")
if (CFI_HAS_CFI_SANITIZE)
# cfi-icall requires called functions in shared libraries to also be built with cfi-icall, which we can't
# guarantee. -fsanitize=cfi depends on -flto
set(CFI_FLAGS "-flto -fsanitize=cfi -fno-sanitize=cfi-icall -fsanitize-ignorelist=${CMAKE_SOURCE_DIR}/sanitizer-ignorelist.txt")
endif()

function(add_ur_target_compile_options name)
if(NOT MSVC)
target_compile_definitions(${name} PRIVATE -D_FORTIFY_SOURCE=2)
Expand All @@ -89,9 +102,8 @@ function(add_ur_target_compile_options name)
-fPIC
-fstack-protector-strong
-fvisibility=hidden
# cfi-icall requires called functions in shared libraries to also be built with cfi-icall, which we can't
# guarantee. -fsanitize=cfi depends on -flto
$<$<BOOL:${CXX_HAS_CFI_SANITIZE}>:-flto -fsanitize=cfi -fno-sanitize=cfi-icall>

${CFI_FLAGS}
$<$<BOOL:${CXX_HAS_FCF_PROTECTION_FULL}>:-fcf-protection=full>
$<$<BOOL:${CXX_HAS_FSTACK_CLASH_PROTECTION}>:-fstack-clash-protection>

Expand Down Expand Up @@ -129,7 +141,7 @@ function(add_ur_target_link_options name)
if(NOT MSVC)
if (NOT APPLE)
target_link_options(${name} PRIVATE
$<$<BOOL:${CXX_HAS_CFI_SANITIZE}>:-flto -fsanitize=cfi -fno-sanitize=cfi-icall>
${CFI_FLAGS}
"LINKER:-z,relro,-z,now,-z,noexecstack"
)
if (UR_DEVELOPER_MODE)
Expand Down
30 changes: 28 additions & 2 deletions include/ur_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -967,6 +967,9 @@ typedef enum ur_adapter_info_t {
///< The reference count returned should be considered immediately stale.
///< It is unsuitable for general use in applications. This feature is
///< provided for identifying memory leaks.
UR_ADAPTER_INFO_VERSION = 2, ///< [uint32_t] Specifies the adapter version, initial value of 1 and
///< incremented unpon major changes, e.g. when multiple versions of an
///< adapter may exist in parallel.
/// @cond
UR_ADAPTER_INFO_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand All @@ -988,7 +991,7 @@ typedef enum ur_adapter_info_t {
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hAdapter`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_ADAPTER_INFO_REFERENCE_COUNT < propName`
/// + `::UR_ADAPTER_INFO_VERSION < propName`
/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION
/// + If `propName` is not supported by the adapter.
/// - ::UR_RESULT_ERROR_INVALID_SIZE
Expand Down Expand Up @@ -1705,6 +1708,8 @@ typedef enum ur_device_info_t {
UR_DEVICE_INFO_ENQUEUE_NATIVE_COMMAND_SUPPORT_EXP = 0x2020, ///< [::ur_bool_t] returns true if the device supports enqueueing of native
///< work
UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP = 0x2021, ///< [::ur_bool_t] returns true if the device supports low-power events.
UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP = 0x2022, ///< [::ur_exp_device_2d_block_array_capability_flags_t] return a bit-field
///< of Intel GPU 2D block array capabilities
/// @cond
UR_DEVICE_INFO_FORCE_UINT32 = 0x7fffffff
/// @endcond
Expand All @@ -1730,7 +1735,7 @@ typedef enum ur_device_info_t {
/// - ::UR_RESULT_ERROR_INVALID_NULL_HANDLE
/// + `NULL == hDevice`
/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION
/// + `::UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP < propName`
/// + `::UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP < propName`
/// - ::UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION
/// + If `propName` is not supported by the adapter.
/// - ::UR_RESULT_ERROR_INVALID_SIZE
Expand Down Expand Up @@ -7428,6 +7433,27 @@ urEnqueueWriteHostPipe(
///< an element of the phEventWaitList array.
);

#if !defined(__GNUC__)
#pragma endregion
#endif
// Intel 'oneAPI' Unified Runtime Experimental device descriptor for querying Intel device 2D block array capabilities
#if !defined(__GNUC__)
#pragma region 2d_block_array_capabilities_(experimental)
#endif
///////////////////////////////////////////////////////////////////////////////
/// @brief Intel GPU 2D block array capabilities
typedef uint32_t ur_exp_device_2d_block_array_capability_flags_t;
typedef enum ur_exp_device_2d_block_array_capability_flag_t {
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD = UR_BIT(0), ///< Load instructions are supported
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE = UR_BIT(1), ///< Store instructions are supported
/// @cond
UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_FORCE_UINT32 = 0x7fffffff
/// @endcond

} ur_exp_device_2d_block_array_capability_flag_t;
/// @brief Bit Mask for validating ur_exp_device_2d_block_array_capability_flags_t
#define UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAGS_MASK 0xfffffffc

#if !defined(__GNUC__)
#pragma endregion
#endif
Expand Down
8 changes: 8 additions & 0 deletions include/ur_print.h
Original file line number Diff line number Diff line change
Expand Up @@ -874,6 +874,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urPrintMapFlags(enum ur_map_flag_t value, ch
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintUsmMigrationFlags(enum ur_usm_migration_flag_t value, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_exp_device_2d_block_array_capability_flag_t enum
/// @returns
/// - ::UR_RESULT_SUCCESS
/// - ::UR_RESULT_ERROR_INVALID_SIZE
/// - `buff_size < out_size`
UR_APIEXPORT ur_result_t UR_APICALL urPrintExpDevice_2dBlockArrayCapabilityFlags(enum ur_exp_device_2d_block_array_capability_flag_t value, char *buffer, const size_t buff_size, size_t *out_size);

///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_exp_image_copy_flag_t enum
/// @returns
Expand Down
93 changes: 93 additions & 0 deletions include/ur_print.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,9 @@ inline ur_result_t printFlag<ur_map_flag_t>(std::ostream &os, uint32_t flag);
template <>
inline ur_result_t printFlag<ur_usm_migration_flag_t>(std::ostream &os, uint32_t flag);

template <>
inline ur_result_t printFlag<ur_exp_device_2d_block_array_capability_flag_t>(std::ostream &os, uint32_t flag);

template <>
inline ur_result_t printFlag<ur_exp_image_copy_flag_t>(std::ostream &os, uint32_t flag);

Expand Down Expand Up @@ -328,6 +331,7 @@ inline std::ostream &operator<<(std::ostream &os, [[maybe_unused]] const struct
inline std::ostream &operator<<(std::ostream &os, enum ur_execution_info_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_map_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_usm_migration_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_device_2d_block_array_capability_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_image_copy_flag_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_sampler_cubemap_filter_mode_t value);
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_external_mem_type_t value);
Expand Down Expand Up @@ -1918,6 +1922,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_adapter_info_t value)
case UR_ADAPTER_INFO_REFERENCE_COUNT:
os << "UR_ADAPTER_INFO_REFERENCE_COUNT";
break;
case UR_ADAPTER_INFO_VERSION:
os << "UR_ADAPTER_INFO_VERSION";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -1958,6 +1965,18 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_adapter_inf

os << ")";
} break;
case UR_ADAPTER_INFO_VERSION: {
const uint32_t *tptr = (const uint32_t *)ptr;
if (sizeof(uint32_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(uint32_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

os << *tptr;

os << ")";
} break;
default:
os << "unknown enumerator";
return UR_RESULT_ERROR_INVALID_ENUMERATION;
Expand Down Expand Up @@ -2665,6 +2684,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) {
case UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP:
os << "UR_DEVICE_INFO_LOW_POWER_EVENTS_EXP";
break;
case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP:
os << "UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP";
break;
default:
os << "unknown enumerator";
break;
Expand Down Expand Up @@ -4472,6 +4494,19 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, ur_device_info

os << ")";
} break;
case UR_DEVICE_INFO_2D_BLOCK_ARRAY_CAPABILITIES_EXP: {
const ur_exp_device_2d_block_array_capability_flags_t *tptr = (const ur_exp_device_2d_block_array_capability_flags_t *)ptr;
if (sizeof(ur_exp_device_2d_block_array_capability_flags_t) > size) {
os << "invalid size (is: " << size << ", expected: >=" << sizeof(ur_exp_device_2d_block_array_capability_flags_t) << ")";
return UR_RESULT_ERROR_INVALID_SIZE;
}
os << (const void *)(tptr) << " (";

ur::details::printFlag<ur_exp_device_2d_block_array_capability_flag_t>(os,
*tptr);

os << ")";
} break;
default:
os << "unknown enumerator";
return UR_RESULT_ERROR_INVALID_ENUMERATION;
Expand Down Expand Up @@ -9455,6 +9490,64 @@ inline ur_result_t printFlag<ur_usm_migration_flag_t>(std::ostream &os, uint32_t
}
} // namespace ur::details
///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_exp_device_2d_block_array_capability_flag_t type
/// @returns
/// std::ostream &
inline std::ostream &operator<<(std::ostream &os, enum ur_exp_device_2d_block_array_capability_flag_t value) {
switch (value) {
case UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD:
os << "UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD";
break;
case UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE:
os << "UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE";
break;
default:
os << "unknown enumerator";
break;
}
return os;
}

namespace ur::details {
///////////////////////////////////////////////////////////////////////////////
/// @brief Print ur_exp_device_2d_block_array_capability_flag_t flag
template <>
inline ur_result_t printFlag<ur_exp_device_2d_block_array_capability_flag_t>(std::ostream &os, uint32_t flag) {
uint32_t val = flag;
bool first = true;

if ((val & UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD) == (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD) {
val ^= (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD;
}

if ((val & UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE) == (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE) {
val ^= (uint32_t)UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
if (!first) {
os << " | ";
} else {
first = false;
}
os << UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE;
}
if (val != 0) {
std::bitset<32> bits(val);
if (!first) {
os << " | ";
}
os << "unknown bit flags " << bits;
} else if (first) {
os << "0";
}
return UR_RESULT_SUCCESS;
}
} // namespace ur::details
///////////////////////////////////////////////////////////////////////////////
/// @brief Print operator for the ur_exp_image_copy_flag_t type
/// @returns
/// std::ostream &
Expand Down
6 changes: 6 additions & 0 deletions sanitizer-ignorelist.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
[cfi-unrelated-cast]
# std::_Sp_counted_ptr_inplace::_Sp_counted_ptr_inplace() (libstdc++).
# This ctor is used by std::make_shared and needs to cast to uninitialized T*
# in order to call std::allocator_traits<T>::construct.
# See: https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/cfi/cfi_ignorelist.txt
fun:_ZNSt23_Sp_counted_ptr_inplace*
Loading

0 comments on commit 0e46d99

Please sign in to comment.