Skip to content

Commit

Permalink
runtime: Load airbin to device
Browse files Browse the repository at this point in the history
An AIRBIN is an ELF file that contains AIE configuration and executable code.
It is loaded to the device by the runtime by copying it to device memory and
then notifying the device that continues the process.

Add code to read the file, place it in device memory and notify the device.

This depends on libelf (from elfutils) that is built in:
github-clone-build-elfutils.sh

Signed-off-by: Joel Nider <[email protected]>
  • Loading branch information
jnider committed Jul 21, 2023
1 parent 9ddcf79 commit d8880d3
Show file tree
Hide file tree
Showing 12 changed files with 275 additions and 5 deletions.
7 changes: 7 additions & 0 deletions .github/workflows/buildAndTest.yml
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ jobs:
- name: Rebuild and Install libxaie
run: utils/github-clone-build-libxaie.sh

- name: Install necessary build tools
run: sudo apt install autoconf flex bison gawk autopoint -y

- name: Rebuild and Install elfutils
run: utils/github-clone-build-elfutils.sh

- name: Rebuild and Install mlir-aie
run: utils/github-build-mlir-aie.sh

Expand All @@ -89,6 +95,7 @@ jobs:
-DMLIR_DIR=../llvm/install/lib/cmake/mlir/ \
-DLLVM_DIR=../llvm/install/lib/cmake/llvm/ \
-DAIE_DIR=`pwd`/../mlir-aie/install/lib/cmake/aie/ \
-DELFUTILS_DIR=$PWD/../elfutils \
-DLibXAIE_ROOT=`pwd`/../aienginev2/install \
-DAIR_RUNTIME_TARGETS:STRING="x86_64" \
-Dx86_64_TOOLCHAIN_FILE=`pwd`/../cmake/modules/toolchain_x86_64.cmake \
Expand Down
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ foreach(target ${AIR_RUNTIME_TARGETS})
-DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
-DLibXAIE_ROOT=${LibXAIE_ROOT}
-DAIE_DIR=${AIE_DIR}
-DELFUTILS_DIR=${ELFUTILS_DIR}
BUILD_ALWAYS true
STEP_TARGETS clean build install test
USES_TERMINAL_CONFIGURE true
Expand Down
11 changes: 11 additions & 0 deletions runtime_lib/airhost/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
include_directories(
${CMAKE_CURRENT_SOURCE_DIR}/include
${AIE_INCLUDE_DIRS}/../runtime_lib/x86_64/test_lib/include
${ELFUTILS_DIR}/libelf
)

add_definitions(-DLIBXAIENGINEV2)
Expand Down Expand Up @@ -39,11 +40,21 @@ add_library(airhost_shared SHARED
)
set_property(TARGET airhost_shared PROPERTY POSITION_INDEPENDENT_CODE ON)

add_library(libelf_pic STATIC IMPORTED)
set_target_properties(libelf_pic PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES "${ELFUTILS_DIR}/libelf"
IMPORTED_LOCATION "${ELFUTILS_DIR}/libelf/libelf_pic.a"
)

target_link_libraries(airhost
${AIR_LIBXAIE_LIBS}
dl
)

target_link_libraries(airhost_shared
libelf_pic
)

set_target_properties(airhost PROPERTIES
LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/runtime_lib)
install(TARGETS airhost DESTINATION ${CMAKE_INSTALL_PREFIX}/runtime_lib/airhost)
Expand Down
2 changes: 2 additions & 0 deletions runtime_lib/airhost/include/air.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,6 @@ inline hsa_status_t air_get_agents(std::vector<air_agent_t> &agents) {

uint64_t air_wait_all(std::vector<uint64_t> &signals);

int air_load_airbin(queue_t *q, const char *filename, uint8_t column,
uint32_t device_id = 0);
#endif
2 changes: 2 additions & 0 deletions runtime_lib/airhost/include/air_host.h
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,8 @@ hsa_status_t air_get_agent_info(queue_t *queue, air_agent_info_t attribute,
hsa_status_t air_packet_rw32_init(dispatch_packet_t *pkt, bool is_write,
uint64_t address, uint32_t value);

hsa_status_t air_packet_load_airbin(dispatch_packet_t *pkt, uint64_t table);

#ifdef AIR_PCIE
hsa_status_t air_get_physical_devices();
#endif
Expand Down
1 change: 1 addition & 0 deletions runtime_lib/airhost/include/air_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#define AIR_PKT_TYPE_POST_RDMA_RECV 0x041L

#define AIR_PKT_TYPE_RW32 0x50L
#define AIR_PKT_TYPE_AIRBIN 0x51L

#define AIR_PKT_TYPE_SHIM_DMA_MEMCPY 0x0100L
#define AIR_PKT_TYPE_HERD_SHIM_DMA_MEMCPY 0x0101L
Expand Down
15 changes: 15 additions & 0 deletions runtime_lib/airhost/include/airbin.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#ifndef AIRBIN_H
#define AIRBIN_H

/*
Each entry describes a loadable section in device memory. The device uses
this information to load the data into AIE memory. This definition is shared
with the device firmware.
*/
struct airbin_table_entry {
uint32_t offset; // offset into allocated device memory
uint32_t size; // size of the loadable section
uint64_t addr; // base address to load the data
};

#endif
5 changes: 2 additions & 3 deletions runtime_lib/airhost/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,9 +119,8 @@ int air_init_dev_mem_allocator(uint64_t dev_mem_size, uint32_t device_id) {
printf("[ERROR] Could not open DDR BAR\n");
return 1;
}
dev_mem_allocator->dev_mem =
(uint32_t *)mmap(NULL, dev_mem_size /*0x8000*/, PROT_READ | PROT_WRITE,
MAP_SHARED, fd, 0x1C0000);
dev_mem_allocator->dev_mem = (uint32_t *)mmap(
NULL, dev_mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0x200000);
if (dev_mem_allocator->dev_mem == MAP_FAILED) {
printf("[ERROR] Could not map DDR BAR\n");
return 1;
Expand Down
196 changes: 196 additions & 0 deletions runtime_lib/airhost/queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include <fcntl.h>
#include <string>
#include <sys/mman.h>
#include <unistd.h>

#include <cstdio>
#include <iostream>
Expand All @@ -19,6 +20,17 @@
#include "air_host.h"
#include "air_host_impl.h"
#include "air_queue.h"
#include "airbin.h"
#include <gelf.h>

#define DEBUG_QUEUE

#ifdef DEBUG_QUEUE
#include <stdio.h>
#define DBG_PRINT printf
#else
#define DBG_PRINT(...)
#endif // DEBUG_QUEUE

#define ALIGN(_x, _size) (((_x) + ((_size)-1)) & ~((_size)-1))

Expand Down Expand Up @@ -490,3 +502,187 @@ hsa_status_t air_packet_barrier_or(barrier_or_packet_t *pkt,

return HSA_STATUS_SUCCESS;
}

/*
'table' is an offset from the beginning of device memory
*/
hsa_status_t air_packet_load_airbin(dispatch_packet_t *pkt, uint64_t table) {
printf("%s: table @ %lx\r\n", __func__, table);
pkt->type = AIR_PKT_TYPE_AIRBIN;
pkt->header = (HSA_PACKET_TYPE_AGENT_DISPATCH << HSA_PACKET_HEADER_TYPE);
pkt->arg[0] = table;

return HSA_STATUS_SUCCESS;
}

/*
Load an airbin from a file into a device
*/
int air_load_airbin(queue_t *q, const char *filename, uint8_t column,
uint32_t device_id) {
int ret;
int drv_fd = 0, elf_fd = 0;
uint32_t dram_size = 2 * 1024 * 1024; // 2MB
uint32_t dram_offset = 8 * 1024 * 1024; // 8MB, just to avoid conflicts
uint8_t *dram_ptr = NULL;
uint8_t *data_ptr = NULL;
struct timespec ts_start;
struct timespec ts_end;
Elf *inelf = NULL;
GElf_Ehdr *ehdr = NULL;
GElf_Ehdr ehdr_mem;
uint64_t wr_idx = 0;
uint64_t packet_id = 0;
dispatch_packet_t *pkt = NULL;
size_t shnum;
uint32_t table_idx = 0;
airbin_table_entry *airbin_table;
uint64_t airbin_table_offset = 0;
uint32_t table_size = 0;
uint32_t data_offset = 0;

auto time_spec_diff = [](struct timespec &start, struct timespec &end) {
return (end.tv_sec - start.tv_sec) + 1e-9 * (end.tv_nsec - start.tv_nsec);
};

DBG_PRINT("%s fname=%s col=%u\r\n", __func__, filename, column);

// ask the driver to give us some device memory
drv_fd = open(air_get_driver_name(), O_RDWR | O_SYNC);
if (drv_fd == -1) {
printf("Error opening %s\n", air_get_driver_name());
ret = HSA_STATUS_ERROR_INVALID_QUEUE_CREATION;
goto err_drv_open;
}

// get some DRAM from the device
dram_ptr = (uint8_t *)mmap(NULL, dram_size, PROT_READ | PROT_WRITE,
MAP_SHARED, drv_fd, dram_offset);

if (dram_ptr == MAP_FAILED) {
printf("Error allocating %u DRAM\n", dram_size);
ret = HSA_STATUS_ERROR_OUT_OF_RESOURCES;
goto err_dev_mem_alloc;
}

DBG_PRINT("Allocated %u device memory DVO=0x%x HVA=0x%lx\r\n", dram_size,
dram_offset, (uint64_t)dram_ptr);

// find the loadable sections and copy them to the device
elf_fd = open(filename, O_RDONLY);
if (elf_fd < 0) {
printf("Can't open %s\n", filename);
ret = HSA_STATUS_ERROR_INVALID_FILE;
goto err_elf_open;
}

elf_version(EV_CURRENT);
inelf = elf_begin(elf_fd, ELF_C_READ, NULL);

// check the characteristics
ehdr = gelf_getehdr(inelf, &ehdr_mem);
if (ehdr == NULL) {
printf("cannot get ELF header: %s\n", elf_errmsg(-1));
ret = HSA_STATUS_ERROR_INVALID_FILE;
goto err_elf_read;
}

// Read data as 64-bit little endian
if ((ehdr->e_ident[EI_CLASS] != ELFCLASS64) ||
(ehdr->e_ident[EI_DATA] != ELFDATA2LSB)) {
printf("unexpected ELF format\n");
ret = HSA_STATUS_ERROR_INVALID_FILE;
goto err_elf_read;
}

if (elf_getshdrnum(inelf, &shnum) != 0) {
printf("cannot get program header count: %s", elf_errmsg(-1));
ret = HSA_STATUS_ERROR_INVALID_FILE;
goto err_elf_read;
}

/*
Even though not all sections are loadable, we use the section count as an
upper bound for how much memory the table will take. We can then safely
place data after that point and avoid any conflicts. A small amount of
memory will be wasted but it is usually only two entries (32 bytes) so
not a big deal. This allows us to do only a single pass on the ELF
sections so it seems like a good trade-off.
*/
printf("There are %lu sections\n", shnum);
table_size = shnum * sizeof(airbin_table_entry);
airbin_table = (airbin_table_entry *)dram_ptr;
data_ptr = dram_ptr + table_size;
data_offset = dram_offset + table_size;

// Iterate through all sections to create a table in device-readable format.
for (unsigned int ndx = 0; ndx < shnum; ndx++) {
GElf_Shdr shdr;
Elf_Scn *sec = elf_getscn(inelf, ndx);
if (sec == NULL) {
printf("cannot get section %d: %s", ndx, elf_errmsg(-1));
ret = HSA_STATUS_ERROR_INVALID_FILE;
goto err_elf_read;
}

gelf_getshdr(sec, &shdr);

// for each loadable program header
if (shdr.sh_type != SHT_PROGBITS || !(shdr.sh_flags & SHF_ALLOC))
continue;

// copy the data from into device memory
Elf_Data *desc;
desc = elf_getdata(sec, NULL);
if (!desc) {
printf("Error reading data for section %u\n", ndx);
ret = HSA_STATUS_ERROR_INVALID_FILE;
goto err_elf_read;
}
memcpy(data_ptr, desc->d_buf, desc->d_size);

airbin_table[table_idx].offset = data_offset;
airbin_table[table_idx].size = shdr.sh_size;
airbin_table[table_idx].addr = shdr.sh_addr;
printf("table[%u] offset=0x%x size=0x%lx addr=0x%lx\n", table_idx,
data_offset, shdr.sh_size, shdr.sh_addr);

table_idx++;
data_offset += shdr.sh_size;
data_ptr += shdr.sh_size;
}

// the last entry must be all 0's
airbin_table[table_idx].offset = 0;
airbin_table[table_idx].size = 0;
airbin_table[table_idx].addr = 0;

// Send configuration packet
printf("Notifying device\n");
wr_idx = queue_add_write_index(q, 1);
packet_id = wr_idx % q->size;
pkt =
reinterpret_cast<dispatch_packet_t *>(q->base_address_vaddr) + packet_id;

airbin_table_offset = dram_offset + (uint8_t *)airbin_table - dram_ptr;
air_packet_load_airbin(pkt, airbin_table_offset);

clock_gettime(CLOCK_BOOTTIME, &ts_start);
air_queue_dispatch_and_wait(q, wr_idx, pkt);
clock_gettime(CLOCK_BOOTTIME, &ts_end);

printf("airbin loading time: %0.8f sec\n", time_spec_diff(ts_start, ts_end));

err_elf_read:
elf_end(inelf);
close(elf_fd);

err_elf_open:
munmap(dram_ptr, dram_size);

err_dev_mem_alloc:
close(drv_fd);

err_drv_open:
return ret;
}
File renamed without changes.
6 changes: 4 additions & 2 deletions utils/build-mlir-air-pcie.sh
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,10 @@ CMAKEMODULES_DIR=`realpath $2`
MLIR_AIE_DIR=`realpath $3`

LibXAIE_DIR=`realpath $4`
ELFUTILS_DIR=`realpath $5`

BUILD_DIR=${5:-"build-pcie"}
INSTALL_DIR=${6:-"install-pcie"}
BUILD_DIR=${6:-"build-pcie"}
INSTALL_DIR=${7:-"install-pcie"}

mkdir -p $BUILD_DIR
mkdir -p $INSTALL_DIR
Expand All @@ -57,6 +58,7 @@ cmake .. \
-DLLVM_DIR=${LLVM_DIR}/build/lib/cmake/llvm \
-DMLIR_DIR=${LLVM_DIR}/build/lib/cmake/mlir \
-DAIE_DIR=${MLIR_AIE_DIR}/build/lib/cmake/aie \
-DELFUTILS_DIR=${ELFUTILS_DIR} \
-Dpybind11_DIR=${PYTHON_ROOT}/pybind11/share/cmake/pybind11 \
-DVitisSysroot="" \
-DLibXAIE_ROOT=${LibXAIE_DIR} \
Expand Down
34 changes: 34 additions & 0 deletions utils/github-clone-build-elfutils.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
#!/usr/bin/env bash

##===- utils/github-clone-build-elfutils.sh ------------------*- Script -*-===##
#
# Copyright (C) 2022, Advanced Micro Devices, Inc.
# SPDX-License-Identifier: MIT

##===----------------------------------------------------------------------===##
#
# This script checks out and builds libelf.
# It only build the necessary libraries to minimize build time.
#
# This script is intended to be called from the github workflows.
#
# Depends on: autoconf, flex, bison, gawk
##===----------------------------------------------------------------------===##

INSTALL_DIR=elfutils
HASH="airbin"

if [[ ! -d $INSTALL_DIR ]]; then
git clone --branch $HASH --depth 1 https://github.com/jnider/elfutils.git $INSTALL_DIR
fi

cd $INSTALL_DIR
autoreconf -v -f -i
./configure --program-prefix="air-" --disable-debuginfod --disable-libdebuginfod --enable-maintainer-mode


# build libeu.a, required for libelf.so
make -C lib

# build libelf.a, libelf_pic.a and libelf.so
make -C libelf

0 comments on commit d8880d3

Please sign in to comment.