Skip to content

Commit

Permalink
[wip] HSA HAL
Browse files Browse the repository at this point in the history
  • Loading branch information
makslevental committed Aug 29, 2024
1 parent bfbd397 commit 24bd9b4
Show file tree
Hide file tree
Showing 43 changed files with 7,299 additions and 1 deletion.
7 changes: 7 additions & 0 deletions .github/workflows/ci-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,13 @@ jobs:
key: linux-build-test-cpp-asserts-manylinux-v2-${{ github.sha }}
restore-keys: linux-build-test-cpp-

- name: Build ROCT/ROCR
run: |
export cache_dir="${{ env.CACHE_DIR }}"
# in order to export
source build_tools/ci/build_roct_rocr.sh
echo "hsa-runtime64_ROOT=${hsa-runtime64_ROOT}" >> $GITHUB_ENV
- name: Build packages
run: |
export cache_dir="${{ env.CACHE_DIR }}"
Expand Down
7 changes: 7 additions & 0 deletions .github/workflows/ci-macos.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,13 @@ jobs:
key: ${{ matrix.runs-on }}-build-test-cpp-asserts-v1-${{ github.sha }}-${{ github.event.repository.updated_at }}
restore-keys: ${{ matrix.runs-on }}-build-test-cpp-

- name: Build ROCT/ROCR
run: |
export cache_dir="${{ env.CACHE_DIR }}"
# in order to export
source build_tools/ci/build_roct_rocr.sh
echo "hsa-runtime64_ROOT=${hsa-runtime64_ROOT}" >> $GITHUB_ENV
- name: Build packages
run: |
export cache_dir="${{ env.CACHE_DIR }}"
Expand Down
7 changes: 7 additions & 0 deletions .github/workflows/ci-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,13 @@ jobs:
key: windows-build-test-cpp-asserts-v1-${{ github.sha }}-${{ github.event.repository.updated_at }}
restore-keys: windows-build-test-cpp-

- name: Build ROCT/ROCR
run: |
export cache_dir="${{ env.CACHE_DIR }}"
# in order to export
source build_tools/ci/build_roct_rocr.sh
echo "hsa-runtime64_ROOT=${hsa-runtime64_ROOT}" >> $GITHUB_ENV
- name: Build packages
run: |
export cache_dir="${{ env.CACHE_DIR }}"
Expand Down
9 changes: 9 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,12 @@
[submodule "third_party/openssl"]
path = third_party/openssl
url = https://github.com/viaduck/openssl-cmake.git
[submodule "third_party/ROCR-Runtime"]
path = third_party/ROCR-Runtime
url = https://github.com/nod-ai/ROCR-Runtime.git
shallow = true
[submodule "third_party/ROCT-Thunk-Interface"]
path = third_party/ROCT-Thunk-Interface
url = https://github.com/ROCm/ROCT-Thunk-Interface.git
shallow = true
branch = rocm-6.2.x
59 changes: 59 additions & 0 deletions build_tools/ci/build_roct_rocr.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
#!/bin/bash

set -eux -o errtrace

this_dir="$(cd $(dirname $0) && pwd)"
repo_root="$(cd $this_dir/../.. && pwd)"

roct_dir="$(cd $repo_root/third_party/ROCT-Thunk-Interface && pwd)"
rocr_dir="$(cd $repo_root/third_party/ROCR-Runtime && pwd)"

build_roct_dir="$repo_root/roct-build"
roct_install_dir="$repo_root/roct-install"
mkdir -p "$build_roct_dir"
build_roct_dir="$(cd $build_roct_dir && pwd)"

build_rocr_dir="$repo_root/rocr-build"
rocr_install_dir="$repo_root/rocr-install"
mkdir -p "$build_rocr_dir"
build_rocr_dir="$(cd $build_rocr_dir && pwd)"

cache_dir="${cache_dir:-}"

if [ -z "${cache_dir}" ]; then
cache_dir="${repo_root}/.build-cache"
mkdir -p "${cache_dir}"
cache_dir="$(cd ${cache_dir} && pwd)"
fi
echo "Caching to ${cache_dir}"
mkdir -p "${cache_dir}/ccache"

if [[ "$OSTYPE" == "linux-gnu"* ]]; then
export CMAKE_TOOLCHAIN_FILE="$this_dir/linux_default_toolchain.cmake"
export CC=clang
export CXX=clang++
elif [[ "$OSTYPE" == "msys"* ]]; then
export CC=clang-cl.exe
export CXX=clang-cl.exe
fi
export CCACHE_DIR="${cache_dir}/ccache"
export CCACHE_MAXSIZE="700M"
export CMAKE_C_COMPILER_LAUNCHER=ccache
export CMAKE_CXX_COMPILER_LAUNCHER=ccache

cd $roct_dir
cmake -GNinja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX="$roct_install_dir" \
-S "$roct_dir" -B "$build_roct_dir"
cmake --build "$build_roct_dir" --target install

cd $rocr_dir
cmake -GNinja \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_INSTALL_PREFIX="$rocr_install_dir" \
-DCMAKE_PREFIX_PATH="$roct_install_dir" \
-S "$rocr_dir" -B "$build_rocr_dir"
cmake --build "$build_rocr_dir" --target install

export hsa-runtime64_ROOT=$rocr_install_dir
2 changes: 1 addition & 1 deletion build_tools/ci/build_test_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ if [[ "$OSTYPE" != "darwin"* ]]; then
-DCMAKE_CXX_COMPILER="${CXX}" \
-DLLVM_TARGET_ARCH=X86 \
-DLLVM_TARGETS_TO_BUILD=X86 \
-DIREE_EXTERNAL_HAL_DRIVERS=xrt \
-DIREE_EXTERNAL_HAL_DRIVERS=hsa \
-S $iree_dir -B $build_dir
else
cmake $CMAKE_ARGS \
Expand Down
10 changes: 10 additions & 0 deletions iree_runtime_plugin.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,15 @@ if(IREE_AMD_AIE_ENABLE_XRT_DRIVER)
include(iree_aie_bootgen)
endif()

set(IREE_AMD_AIE_ENABLE_HSA_DRIVER OFF)
if("hsa" IN_LIST IREE_EXTERNAL_HAL_DRIVERS)
message(STATUS "Enabling HSA build because it is an enabled HAL driver")
set(IREE_AMD_AIE_ENABLE_HSA_DRIVER ON)
endif()

if(IREE_AMD_AIE_ENABLE_HSA_DRIVER)
find_package(hsa-runtime64 CONFIG REQUIRED)
endif()

add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/runtime/src AMD-AIE)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/experimental AMD-AIE-experimental)
4 changes: 4 additions & 0 deletions runtime/src/iree-amd-aie/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ if(IREE_AMD_AIE_ENABLE_XRT_DRIVER)
add_subdirectory(driver/xrt)
endif()

if(IREE_AMD_AIE_ENABLE_HSA_DRIVER)
add_subdirectory(driver/hsa)
endif()

# Flatbuffer schema generation does not require XRT. Moreover the generated
# flatbuffer header files are used by the compiler to create artefacts
# (.vmfb file), and so the schema sub-directory is required even when not
Expand Down
92 changes: 92 additions & 0 deletions runtime/src/iree-amd-aie/driver/hsa/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
# Copyright 2023 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

iree_add_all_subdirs()

iree_register_external_hal_driver(
NAME
hsa
DRIVER_TARGET
iree-amd-aie::driver::hsa::registration
REGISTER_FN
iree_hal_hsa_driver_module_register
)

iree_cc_library(
NAME
dynamic_symbols
HDRS
"dynamic_symbols.h"
"status_util.h"
TEXTUAL_HDRS
"dynamic_symbol_tables.h"
SRCS
"dynamic_symbols.c"
"hsa_headers.h"
"status_util.c"
DEPS
hsa-runtime64::hsa-runtime64
iree::base
iree::base::core_headers
iree::base::internal::dynamic_library
PUBLIC
)

iree_cc_library(
NAME
hsa
HDRS
"api.h"
SRCS
"api.h"
"event_pool.c"
"event_pool.h"
"event_semaphore.c"
"event_semaphore.h"
"hsa_allocator.c"
"hsa_allocator.h"
"hsa_buffer.c"
"hsa_buffer.h"
"hsa_device.c"
"hsa_device.h"
"hsa_driver.c"
"native_executable.c"
"native_executable.h"
"nop_executable_cache.c"
"nop_executable_cache.h"
"pending_queue_actions.c"
"pending_queue_actions.h"
"pipeline_layout.c"
"pipeline_layout.h"
"queue_command_buffer.c"
"queue_command_buffer.h"
"timepoint_pool.c"
"timepoint_pool.h"
DEPS
hsa-runtime64::hsa-runtime64
::dynamic_symbols
iree::base
iree::base::core_headers
iree::base::internal
iree::base::internal::arena
iree::base::internal::atomic_slist
iree::base::internal::event_pool
iree::base::internal::synchronization
iree::base::internal::threading
iree::base::internal::wait_handle
iree::base::internal::flatcc::parsing
iree::hal
iree::hal::utils::collective_batch
iree::hal::utils::deferred_command_buffer
iree::hal::utils::file_transfer
iree::hal::utils::memory_file
iree::hal::utils::resource_set
iree::hal::utils::semaphore_base
iree::schemas::rocm_executable_def_c_fbs
PUBLIC
)

108 changes: 108 additions & 0 deletions runtime/src/iree-amd-aie/driver/hsa/api.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
// Copyright 2023 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

// See iree/base/api.h for documentation on the API conventions used.

#ifndef IREE_EXPERIMENTAL_HSA_API_H_
#define IREE_EXPERIMENTAL_HSA_API_H_

#include "iree/base/api.h"
#include "iree/hal/api.h"

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

//===----------------------------------------------------------------------===//
// iree_hal_hsa_device_t
//===----------------------------------------------------------------------===//

typedef struct iree_hal_hsa_memory_pool_params_t {
// Minimum number of bytes to keep in the pool when trimming with
// iree_hal_device_trim.
uint64_t minimum_capacity;
// Soft maximum number of bytes to keep in the pool.
// When more than this is allocated the extra will be freed at the next
// device synchronization in order to remain under the threshold.
uint64_t release_threshold;
} iree_hal_hsa_memory_pool_params_t;

typedef struct iree_hal_hsa_memory_pooling_params_t {
// Used exclusively for DEVICE_LOCAL allocations.
iree_hal_hsa_memory_pool_params_t device_local;
// Used for any host-visible/host-local memory types.
iree_hal_hsa_memory_pool_params_t other;
} iree_hal_hsa_memory_pooling_params_t;

// Parameters configuring an iree_hal_hsa_device_t.
// Must be initialized with iree_hal_hsa_device_params_initialize prior to
// use.
typedef struct iree_hal_hsa_device_params_t {
// Number of queues exposed on the device.
// Each queue acts as a separate synchronization scope where all work executes
// concurrently unless prohibited by semaphores.
iree_host_size_t queue_count;

// Total size of each block in the device shared block pool.
// Larger sizes will lower overhead and ensure the heap isn't hit for
// transient allocations while also increasing memory consumption.
iree_host_size_t arena_block_size;

// The host and device event pool capacity.
// The HSA driver implements semaphore with host and device events. This
// parameter controls the size of those pools. Larger values would make
// creating semaphore values quicker, though with increased memory
// consumption.
iree_host_size_t event_pool_capacity;

// Enables tracing of command buffers when IREE tracing is enabled.
// May take advantage of additional extensions for more accurate timing or
// hardware-specific performance counters.
//
// NOTE: tracing has a non-trivial overhead and will skew the timing of
// submissions and introduce false barriers between dispatches. Use this to
// identify slow dispatches and refine from there; be wary of whole-program
// tracing with this enabled.
bool queue_tracing;

// Parameters for each memory pool used for queue-ordered allocations.
iree_hal_hsa_memory_pooling_params_t memory_pools;
} iree_hal_hsa_device_params_t;

// Initializes |out_params| to default values.
IREE_API_EXPORT void iree_hal_hsa_device_params_initialize(
iree_hal_hsa_device_params_t* out_params);

//===----------------------------------------------------------------------===//
// iree_hal_hsa_driver_t
//===----------------------------------------------------------------------===//

// HSA HAL driver creation options.
typedef struct iree_hal_hsa_driver_options_t {
// The index of the default HSA device to use within the list of available
// devices.
int default_device_index;
} iree_hal_hsa_driver_options_t;

// Initializes the given |out_options| with default driver creation options.
IREE_API_EXPORT void iree_hal_hsa_driver_options_initialize(
iree_hal_hsa_driver_options_t* out_options);

// Creates a HSA HAL driver with the given |options|, from which HSA devices
// can be enumerated and created with specific parameters.
//
// |out_driver| must be released by the caller (see iree_hal_driver_release).
IREE_API_EXPORT iree_status_t iree_hal_hsa_driver_create(
iree_string_view_t identifier, const iree_hal_hsa_driver_options_t* options,
const iree_hal_hsa_device_params_t* default_params,
iree_allocator_t host_allocator, iree_hal_driver_t** out_driver);

#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus

#endif // IREE_EXPERIMENTAL_HSA_API_H_
Loading

0 comments on commit 24bd9b4

Please sign in to comment.