Skip to content

Commit

Permalink
[wip] HSA HAL
Browse files Browse the repository at this point in the history
use nod-ai/ROCR-Runtime/iree-aie
  • Loading branch information
makslevental committed Sep 3, 2024
1 parent 065218b commit 14e5720
Show file tree
Hide file tree
Showing 41 changed files with 7,232 additions and 2 deletions.
8 changes: 7 additions & 1 deletion .github/workflows/ci-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:
run: |
dnf install -y almalinux-release-devel epel-release
yum remove -y openssl-devel zlib-devel || true
yum install -y protobuf-devel protobuf-compiler tmate
yum install -y protobuf-devel protobuf-compiler libnuma-devel tmate
- name: Python deps
run: |
Expand All @@ -73,6 +73,12 @@ jobs:
key: ${{ env.CACHE_KEY }}
restore-keys: linux-build-test-cpp-

- name: Build ROCT/ROCR
run: |
export cache_dir="${{ env.CACHE_DIR }}"
bash build_tools/ci/build_roct_rocr.sh
echo "hsa-runtime64_ROOT=$PWD/rocr-install" >> $GITHUB_ENV
- name: Build packages
run: |
export cache_dir="${{ env.CACHE_DIR }}"
Expand Down
5 changes: 5 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,8 @@
path = third_party/iree
url = https://github.com/iree-org/iree.git
shallow = true
[submodule "third_party/ROCR-Runtime"]
path = third_party/ROCR-Runtime
url = https://github.com/nod-ai/ROCR-Runtime.git
shallow = true
branch = iree-aie
54 changes: 54 additions & 0 deletions build_tools/ci/build_roct_rocr.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/bash

set -eux -o errtrace

this_dir="$(cd $(dirname $0) && pwd)"
repo_root="$(cd $this_dir/../.. && pwd)"

roct_dir="$(cd $repo_root/third_party/ROCT-Thunk-Interface && pwd)"
rocr_dir="$(cd $repo_root/third_party/ROCR-Runtime && pwd)"

build_roct_dir="$repo_root/roct-build"
roct_install_dir="$repo_root/roct-install"
mkdir -p "$build_roct_dir"
build_roct_dir="$(cd $build_roct_dir && pwd)"

build_rocr_dir="$repo_root/rocr-build"
rocr_install_dir="$repo_root/rocr-install"
mkdir -p "$build_rocr_dir"
build_rocr_dir="$(cd $build_rocr_dir && pwd)"

cache_dir="${cache_dir:-}"

if [ -z "${cache_dir}" ]; then
cache_dir="${repo_root}/.build-cache"
mkdir -p "${cache_dir}"
cache_dir="$(cd ${cache_dir} && pwd)"
fi
echo "Caching to ${cache_dir}"
mkdir -p "${cache_dir}/ccache"

if [[ "$OSTYPE" == "msys"* ]]; then
export CC=clang-cl.exe
export CXX=clang-cl.exe
fi
export CCACHE_DIR="${cache_dir}/ccache"
export CCACHE_MAXSIZE="700M"
export CMAKE_C_COMPILER_LAUNCHER=ccache
export CMAKE_CXX_COMPILER_LAUNCHER=ccache

cd $roct_dir
cmake -GNinja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX="$roct_install_dir" \
-S "$roct_dir" -B "$build_roct_dir"
cmake --build "$build_roct_dir" --target install

cd $rocr_dir
cmake -GNinja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX="$rocr_install_dir" \
-DCMAKE_PREFIX_PATH="$roct_install_dir" \
-DIMAGE_SUPPORT=OFF \
-S "$rocr_dir/src" -B "$build_rocr_dir"
cmake --build "$build_rocr_dir" --target install
2 changes: 1 addition & 1 deletion build_tools/ci/build_test_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ if [[ "$OSTYPE" != "darwin"* ]]; then
-DCMAKE_CXX_COMPILER="${CXX}" \
-DLLVM_TARGET_ARCH=X86 \
-DLLVM_TARGETS_TO_BUILD=X86 \
-DIREE_EXTERNAL_HAL_DRIVERS=xrt \
-DIREE_EXTERNAL_HAL_DRIVERS=hsa \
-S $iree_dir -B $build_dir
else
cmake $CMAKE_ARGS \
Expand Down
11 changes: 11 additions & 0 deletions iree_runtime_plugin.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,16 @@ if(IREE_AMD_AIE_ENABLE_XRT_DRIVER)
include(iree_aie_bootgen)
endif()

set(IREE_AMD_AIE_ENABLE_HSA_DRIVER OFF)
if("hsa" IN_LIST IREE_EXTERNAL_HAL_DRIVERS)
message(STATUS "Enabling HSA build because it is an enabled HAL driver")
set(IREE_AMD_AIE_ENABLE_HSA_DRIVER ON)
endif()

if(IREE_AMD_AIE_ENABLE_HSA_DRIVER)
find_package(hsa-runtime64 CONFIG REQUIRED
NAMES hsa-runtime64 hsa_runtime64)
endif()

add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/runtime/src AMD-AIE)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/experimental AMD-AIE-experimental)
4 changes: 4 additions & 0 deletions runtime/src/iree-amd-aie/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ if(IREE_AMD_AIE_ENABLE_XRT_DRIVER)
add_subdirectory(driver/xrt)
endif()

if(IREE_AMD_AIE_ENABLE_HSA_DRIVER)
add_subdirectory(driver/hsa)
endif()

# Flatbuffer schema generation does not require XRT. Moreover the generated
# flatbuffer header files are used by the compiler to create artefacts
# (.vmfb file), and so the schema sub-directory is required even when not
Expand Down
92 changes: 92 additions & 0 deletions runtime/src/iree-amd-aie/driver/hsa/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
# Copyright 2023 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

iree_add_all_subdirs()

iree_register_external_hal_driver(
NAME
hsa
DRIVER_TARGET
iree-amd-aie::driver::hsa::registration
REGISTER_FN
iree_hal_hsa_driver_module_register
)

iree_cc_library(
NAME
dynamic_symbols
HDRS
"dynamic_symbols.h"
"status_util.h"
TEXTUAL_HDRS
"dynamic_symbol_tables.h"
SRCS
"dynamic_symbols.c"
"hsa_headers.h"
"status_util.c"
DEPS
hsa-runtime64::hsa-runtime64
iree::base
iree::base::core_headers
iree::base::internal::dynamic_library
PUBLIC
)

iree_cc_library(
NAME
hsa
HDRS
"api.h"
SRCS
"api.h"
"event_pool.c"
"event_pool.h"
"event_semaphore.c"
"event_semaphore.h"
"hsa_allocator.c"
"hsa_allocator.h"
"hsa_buffer.c"
"hsa_buffer.h"
"hsa_device.c"
"hsa_device.h"
"hsa_driver.c"
"native_executable.c"
"native_executable.h"
"nop_executable_cache.c"
"nop_executable_cache.h"
"pending_queue_actions.c"
"pending_queue_actions.h"
"pipeline_layout.c"
"pipeline_layout.h"
"queue_command_buffer.c"
"queue_command_buffer.h"
"timepoint_pool.c"
"timepoint_pool.h"
DEPS
hsa-runtime64::hsa-runtime64
::dynamic_symbols
iree::base
iree::base::core_headers
iree::base::internal
iree::base::internal::arena
iree::base::internal::atomic_slist
iree::base::internal::event_pool
iree::base::internal::synchronization
iree::base::internal::threading
iree::base::internal::wait_handle
iree::base::internal::flatcc::parsing
iree::hal
iree::hal::utils::collective_batch
iree::hal::utils::deferred_command_buffer
iree::hal::utils::file_transfer
iree::hal::utils::memory_file
iree::hal::utils::resource_set
iree::hal::utils::semaphore_base
iree::schemas::rocm_executable_def_c_fbs
PUBLIC
)

108 changes: 108 additions & 0 deletions runtime/src/iree-amd-aie/driver/hsa/api.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
// Copyright 2023 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

// See iree/base/api.h for documentation on the API conventions used.

#ifndef IREE_EXPERIMENTAL_HSA_API_H_
#define IREE_EXPERIMENTAL_HSA_API_H_

#include "iree/base/api.h"
#include "iree/hal/api.h"

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

//===----------------------------------------------------------------------===//
// iree_hal_hsa_device_t
//===----------------------------------------------------------------------===//

typedef struct iree_hal_hsa_memory_pool_params_t {
// Minimum number of bytes to keep in the pool when trimming with
// iree_hal_device_trim.
uint64_t minimum_capacity;
// Soft maximum number of bytes to keep in the pool.
// When more than this is allocated the extra will be freed at the next
// device synchronization in order to remain under the threshold.
uint64_t release_threshold;
} iree_hal_hsa_memory_pool_params_t;

typedef struct iree_hal_hsa_memory_pooling_params_t {
// Used exclusively for DEVICE_LOCAL allocations.
iree_hal_hsa_memory_pool_params_t device_local;
// Used for any host-visible/host-local memory types.
iree_hal_hsa_memory_pool_params_t other;
} iree_hal_hsa_memory_pooling_params_t;

// Parameters configuring an iree_hal_hsa_device_t.
// Must be initialized with iree_hal_hsa_device_params_initialize prior to
// use.
typedef struct iree_hal_hsa_device_params_t {
// Number of queues exposed on the device.
// Each queue acts as a separate synchronization scope where all work executes
// concurrently unless prohibited by semaphores.
iree_host_size_t queue_count;

// Total size of each block in the device shared block pool.
// Larger sizes will lower overhead and ensure the heap isn't hit for
// transient allocations while also increasing memory consumption.
iree_host_size_t arena_block_size;

// The host and device event pool capacity.
// The HSA driver implements semaphore with host and device events. This
// parameter controls the size of those pools. Larger values would make
// creating semaphore values quicker, though with increased memory
// consumption.
iree_host_size_t event_pool_capacity;

// Enables tracing of command buffers when IREE tracing is enabled.
// May take advantage of additional extensions for more accurate timing or
// hardware-specific performance counters.
//
// NOTE: tracing has a non-trivial overhead and will skew the timing of
// submissions and introduce false barriers between dispatches. Use this to
// identify slow dispatches and refine from there; be wary of whole-program
// tracing with this enabled.
bool queue_tracing;

// Parameters for each memory pool used for queue-ordered allocations.
iree_hal_hsa_memory_pooling_params_t memory_pools;
} iree_hal_hsa_device_params_t;

// Initializes |out_params| to default values.
IREE_API_EXPORT void iree_hal_hsa_device_params_initialize(
iree_hal_hsa_device_params_t* out_params);

//===----------------------------------------------------------------------===//
// iree_hal_hsa_driver_t
//===----------------------------------------------------------------------===//

// HSA HAL driver creation options.
typedef struct iree_hal_hsa_driver_options_t {
// The index of the default HSA device to use within the list of available
// devices.
int default_device_index;
} iree_hal_hsa_driver_options_t;

// Initializes the given |out_options| with default driver creation options.
IREE_API_EXPORT void iree_hal_hsa_driver_options_initialize(
iree_hal_hsa_driver_options_t* out_options);

// Creates a HSA HAL driver with the given |options|, from which HSA devices
// can be enumerated and created with specific parameters.
//
// |out_driver| must be released by the caller (see iree_hal_driver_release).
IREE_API_EXPORT iree_status_t iree_hal_hsa_driver_create(
iree_string_view_t identifier, const iree_hal_hsa_driver_options_t* options,
const iree_hal_hsa_device_params_t* default_params,
iree_allocator_t host_allocator, iree_hal_driver_t** out_driver);

#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus

#endif // IREE_EXPERIMENTAL_HSA_API_H_
Loading

0 comments on commit 14e5720

Please sign in to comment.