Skip to content

Commit

Permalink
[wip] HSA HAL
Browse files Browse the repository at this point in the history
  • Loading branch information
makslevental committed Aug 29, 2024
1 parent bfbd397 commit fe2cc27
Show file tree
Hide file tree
Showing 38 changed files with 7,210 additions and 1 deletion.
2 changes: 1 addition & 1 deletion build_tools/ci/build_test_cpp.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ if [[ "$OSTYPE" != "darwin"* ]]; then
-DCMAKE_CXX_COMPILER="${CXX}" \
-DLLVM_TARGET_ARCH=X86 \
-DLLVM_TARGETS_TO_BUILD=X86 \
-DIREE_EXTERNAL_HAL_DRIVERS=xrt \
-DIREE_EXTERNAL_HAL_DRIVERS=hsa \
-S $iree_dir -B $build_dir
else
cmake $CMAKE_ARGS \
Expand Down
10 changes: 10 additions & 0 deletions iree_runtime_plugin.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -26,5 +26,15 @@ if(IREE_AMD_AIE_ENABLE_XRT_DRIVER)
include(iree_aie_bootgen)
endif()

set(IREE_AMD_AIE_ENABLE_HSA_DRIVER OFF)
if("hsa" IN_LIST IREE_EXTERNAL_HAL_DRIVERS)
message(STATUS "Enabling HSA build because it is an enabled HAL driver")
set(IREE_AMD_AIE_ENABLE_HSA_DRIVER ON)
endif()

if(IREE_AMD_AIE_ENABLE_HSA_DRIVER)
find_package(hsa-runtime64 CONFIG REQUIRED)
endif()

add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/runtime/src AMD-AIE)
add_subdirectory(${CMAKE_CURRENT_LIST_DIR}/experimental AMD-AIE-experimental)
4 changes: 4 additions & 0 deletions runtime/src/iree-amd-aie/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ if(IREE_AMD_AIE_ENABLE_XRT_DRIVER)
add_subdirectory(driver/xrt)
endif()

if(IREE_AMD_AIE_ENABLE_HSA_DRIVER)
add_subdirectory(driver/hsa)
endif()

# Flatbuffer schema generation does not require XRT. Moreover the generated
# flatbuffer header files are used by the compiler to create artefacts
# (.vmfb file), and so the schema sub-directory is required even when not
Expand Down
92 changes: 92 additions & 0 deletions runtime/src/iree-amd-aie/driver/hsa/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
# Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
# Copyright 2023 The IREE Authors
#
# Licensed under the Apache License v2.0 with LLVM Exceptions.
# See https://llvm.org/LICENSE.txt for license information.
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

iree_add_all_subdirs()

iree_register_external_hal_driver(
NAME
hsa
DRIVER_TARGET
iree-amd-aie::driver::hsa::registration
REGISTER_FN
iree_hal_hsa_driver_module_register
)

iree_cc_library(
NAME
dynamic_symbols
HDRS
"dynamic_symbols.h"
"status_util.h"
TEXTUAL_HDRS
"dynamic_symbol_tables.h"
SRCS
"dynamic_symbols.c"
"hsa_headers.h"
"status_util.c"
DEPS
hsa-runtime64::hsa-runtime64
iree::base
iree::base::core_headers
iree::base::internal::dynamic_library
PUBLIC
)

iree_cc_library(
NAME
hsa
HDRS
"api.h"
SRCS
"api.h"
"event_pool.c"
"event_pool.h"
"event_semaphore.c"
"event_semaphore.h"
"hsa_allocator.c"
"hsa_allocator.h"
"hsa_buffer.c"
"hsa_buffer.h"
"hsa_device.c"
"hsa_device.h"
"hsa_driver.c"
"native_executable.c"
"native_executable.h"
"nop_executable_cache.c"
"nop_executable_cache.h"
"pending_queue_actions.c"
"pending_queue_actions.h"
"pipeline_layout.c"
"pipeline_layout.h"
"queue_command_buffer.c"
"queue_command_buffer.h"
"timepoint_pool.c"
"timepoint_pool.h"
DEPS
hsa-runtime64::hsa-runtime64
::dynamic_symbols
iree::base
iree::base::core_headers
iree::base::internal
iree::base::internal::arena
iree::base::internal::atomic_slist
iree::base::internal::event_pool
iree::base::internal::synchronization
iree::base::internal::threading
iree::base::internal::wait_handle
iree::base::internal::flatcc::parsing
iree::hal
iree::hal::utils::collective_batch
iree::hal::utils::deferred_command_buffer
iree::hal::utils::file_transfer
iree::hal::utils::memory_file
iree::hal::utils::resource_set
iree::hal::utils::semaphore_base
iree::schemas::rocm_executable_def_c_fbs
PUBLIC
)

108 changes: 108 additions & 0 deletions runtime/src/iree-amd-aie/driver/hsa/api.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
// Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
// Copyright 2023 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

// See iree/base/api.h for documentation on the API conventions used.

#ifndef IREE_EXPERIMENTAL_HSA_API_H_
#define IREE_EXPERIMENTAL_HSA_API_H_

#include "iree/base/api.h"
#include "iree/hal/api.h"

#ifdef __cplusplus
extern "C" {
#endif // __cplusplus

//===----------------------------------------------------------------------===//
// iree_hal_hsa_device_t
//===----------------------------------------------------------------------===//

typedef struct iree_hal_hsa_memory_pool_params_t {
// Minimum number of bytes to keep in the pool when trimming with
// iree_hal_device_trim.
uint64_t minimum_capacity;
// Soft maximum number of bytes to keep in the pool.
// When more than this is allocated the extra will be freed at the next
// device synchronization in order to remain under the threshold.
uint64_t release_threshold;
} iree_hal_hsa_memory_pool_params_t;

typedef struct iree_hal_hsa_memory_pooling_params_t {
// Used exclusively for DEVICE_LOCAL allocations.
iree_hal_hsa_memory_pool_params_t device_local;
// Used for any host-visible/host-local memory types.
iree_hal_hsa_memory_pool_params_t other;
} iree_hal_hsa_memory_pooling_params_t;

// Parameters configuring an iree_hal_hsa_device_t.
// Must be initialized with iree_hal_hsa_device_params_initialize prior to
// use.
typedef struct iree_hal_hsa_device_params_t {
// Number of queues exposed on the device.
// Each queue acts as a separate synchronization scope where all work executes
// concurrently unless prohibited by semaphores.
iree_host_size_t queue_count;

// Total size of each block in the device shared block pool.
// Larger sizes will lower overhead and ensure the heap isn't hit for
// transient allocations while also increasing memory consumption.
iree_host_size_t arena_block_size;

// The host and device event pool capacity.
// The HSA driver implements semaphore with host and device events. This
// parameter controls the size of those pools. Larger values would make
// creating semaphore values quicker, though with increased memory
// consumption.
iree_host_size_t event_pool_capacity;

// Enables tracing of command buffers when IREE tracing is enabled.
// May take advantage of additional extensions for more accurate timing or
// hardware-specific performance counters.
//
// NOTE: tracing has a non-trivial overhead and will skew the timing of
// submissions and introduce false barriers between dispatches. Use this to
// identify slow dispatches and refine from there; be wary of whole-program
// tracing with this enabled.
bool queue_tracing;

// Parameters for each memory pool used for queue-ordered allocations.
iree_hal_hsa_memory_pooling_params_t memory_pools;
} iree_hal_hsa_device_params_t;

// Initializes |out_params| to default values.
IREE_API_EXPORT void iree_hal_hsa_device_params_initialize(
iree_hal_hsa_device_params_t* out_params);

//===----------------------------------------------------------------------===//
// iree_hal_hsa_driver_t
//===----------------------------------------------------------------------===//

// HSA HAL driver creation options.
typedef struct iree_hal_hsa_driver_options_t {
// The index of the default HSA device to use within the list of available
// devices.
int default_device_index;
} iree_hal_hsa_driver_options_t;

// Initializes the given |out_options| with default driver creation options.
IREE_API_EXPORT void iree_hal_hsa_driver_options_initialize(
iree_hal_hsa_driver_options_t* out_options);

// Creates a HSA HAL driver with the given |options|, from which HSA devices
// can be enumerated and created with specific parameters.
//
// |out_driver| must be released by the caller (see iree_hal_driver_release).
IREE_API_EXPORT iree_status_t iree_hal_hsa_driver_create(
iree_string_view_t identifier, const iree_hal_hsa_driver_options_t* options,
const iree_hal_hsa_device_params_t* default_params,
iree_allocator_t host_allocator, iree_hal_driver_t** out_driver);

#ifdef __cplusplus
} // extern "C"
#endif // __cplusplus

#endif // IREE_EXPERIMENTAL_HSA_API_H_
93 changes: 93 additions & 0 deletions runtime/src/iree-amd-aie/driver/hsa/dynamic_symbol_tables.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright (c) 2024 Advanced Micro Devices, Inc. All Rights Reserved.
// Copyright 2023 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//===----------------------------------------------------------------------===//
// HSA symbols
//===----------------------------------------------------------------------===//

#include <stdint.h>

IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_init)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_shut_down)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_agent_get_info, hsa_agent_t,
hsa_agent_info_t, void *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_iterate_agents,
hsa_status_t (*)(hsa_agent_t, void *), void *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_queue_create, hsa_agent_t, uint32_t,
hsa_queue_type32_t,
void (*)(hsa_status_t, hsa_queue_t *, void *),
void *, uint32_t, uint32_t, hsa_queue_t **)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_signal_wait_scacquire, hsa_signal_t,
hsa_signal_condition_t, hsa_signal_value_t,
uint64_t, hsa_wait_state_t)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_queue_load_write_index_relaxed,
const hsa_queue_t *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_signal_create, hsa_signal_value_t, uint32_t,
const hsa_agent_t *, hsa_signal_t *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_queue_store_write_index_release,
const hsa_queue_t *, uint64_t)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_queue_add_write_index_relaxed,
const hsa_queue_t *, uint64_t)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_signal_store_screlease, hsa_signal_t,
hsa_signal_value_t)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_signal_store_relaxed, hsa_signal_t,
hsa_signal_value_t)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_signal_add_screlease, hsa_signal_t,
hsa_signal_value_t)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_signal_wait_acquire, hsa_signal_t,
hsa_signal_condition_t, hsa_signal_value_t,
uint64_t, hsa_wait_state_t)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_signal_destroy, hsa_signal_t)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_executable_get_symbol_by_name,
hsa_executable_t, const char *,
const hsa_agent_t *, hsa_executable_symbol_t *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_executable_symbol_get_info,
hsa_executable_symbol_t,
hsa_executable_symbol_info_t, void *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_ext_image_create, hsa_agent_t,
const hsa_ext_image_descriptor_t *, const void *,
hsa_access_permission_t, hsa_ext_image_t *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_executable_create_alt, hsa_profile_t,
hsa_default_float_rounding_mode_t, const char *,
hsa_executable_t *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_executable_load_agent_code_object,
hsa_executable_t, hsa_agent_t,
hsa_code_object_reader_t, const char *,
hsa_loaded_code_object_t *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_executable_freeze, hsa_executable_t,
const char *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_executable_destroy, hsa_executable_t)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_code_object_reader_create_from_memory,
const void *, size_t, hsa_code_object_reader_t *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_agent_iterate_regions, hsa_agent_t,
hsa_status_t (*)(hsa_region_t, void *), void *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_amd_agent_iterate_memory_pools, hsa_agent_t,
hsa_status_t (*)(hsa_amd_memory_pool_t, void *),
void *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_region_get_info, hsa_region_t,
hsa_region_info_t, void *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_amd_memory_pool_get_info,
hsa_amd_memory_pool_t,
hsa_amd_memory_pool_info_t, void *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_memory_allocate, hsa_region_t, size_t,
void **)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_memory_free, void *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_amd_memory_pool_allocate,
hsa_amd_memory_pool_t, size_t, uint32_t, void **)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_amd_memory_pool_free, void *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_amd_memory_async_copy, void *, hsa_agent_t,
const void *, hsa_agent_t, size_t, uint32_t,
const hsa_signal_t *, hsa_signal_t)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_amd_signal_async_handler, hsa_signal_t,
hsa_signal_condition_t, hsa_signal_value_t,
hsa_amd_signal_handler, void *)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_memory_copy, void *, const void *, size_t)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_amd_memory_lock_to_pool, void *, size_t,
hsa_agent_t *, int, hsa_amd_memory_pool_t,
uint32_t, void **)
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_amd_memory_fill, void *, uint32_t, size_t);
IREE_HAL_HSA_REQUIRED_PFN_DECL(hsa_status_string, hsa_status_t, const char **)
Loading

0 comments on commit fe2cc27

Please sign in to comment.