Skip to content

Commit

Permalink
cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
makslevental committed Aug 28, 2024
1 parent 66c1123 commit 22fa938
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 79 deletions.
79 changes: 26 additions & 53 deletions experimental/hsa/aie_hsa_bare_add_one.cc
Original file line number Diff line number Diff line change
@@ -1,39 +1,12 @@

/*
RUN: (add_one_test %S) | FileCheck %s
CHECK: /dev/accel/accel0 open
CHECK: Driver version 1.1
CHECK: Heap buffer @: 0x7f313c000000
CHECK: Loading pdi
CHECK: Pdi file size: 3552
CHECK: Loading dpu inst
CHECK: Loading dpu inst
CHECK: DPU 0 instructions @: 0x7f313c008000
CHECK: DPU 1 instructions @: 0x7f313c010000
CHECK: PDI file @: 0x7f313c000000
CHECK: PDI handle @: 2
CHECK: Input @: 0x7f313c018000
CHECK: Output @: 0x7f313c020000
CHECK: Input @: 0x7f313c028000
CHECK: Output @: 0x7f313c030000
CHECK: Size of param_config_cu: 0x8
CHECK: Synch bo ioctl failed for handle 11
CHECK: Synch bo ioctl failed for handle 9
CHECK: Synch bo ioctl failed for handle 10
CHECK: Checking run 0:
CHECK: Checking run 1:
CHECK: PASS!
CHECK: Closing
CHECK: Done
*/
// Copyright 2024 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#include <cstdint>
#include <cstdlib>
#include <ctime>
#include <fstream>
#include <iostream>

#include "amdxdna_accel.h"
#include "hsa_ipu.h"
Expand Down Expand Up @@ -261,7 +234,7 @@ int main(int argc, char **argv) {
// 2. Allocate the queue buffer as a user-mode queue

// Allocating a structure to store QOS information
struct amdxdna_qos_info *qos =
amdxdna_qos_info *qos =
(struct amdxdna_qos_info *)malloc(sizeof(struct amdxdna_qos_info));
qos->gops = 0;
qos->fps = 0;
Expand All @@ -271,7 +244,7 @@ int main(int argc, char **argv) {
qos->priority = 0;

// This is the structure that we pass
struct amdxdna_drm_create_hwctx create_hw_ctx = {
amdxdna_drm_create_hwctx create_hw_ctx = {
.ext = 0,
.ext_flags = 0,
.qos_p = (uint64_t)qos,
Expand All @@ -289,20 +262,20 @@ int main(int argc, char **argv) {
}

// Creating a structure to configure the CU
struct amdxdna_cu_config cu_config = {
amdxdna_cu_config cu_config = {
.cu_bo = pdi_handle,
.cu_func = 0,
};

// Creating a structure to configure the hardware context
struct amdxdna_hwctx_param_config_cu param_config_cu;
amdxdna_hwctx_param_config_cu param_config_cu;
param_config_cu.num_cus = 1;
param_config_cu.cu_configs[0] = cu_config;

printf("Size of param_config_cu: 0x%lx\n", sizeof(param_config_cu));

// Configuring the hardware context with the PDI
struct amdxdna_drm_config_hwctx config_hw_ctx = {
amdxdna_drm_config_hwctx config_hw_ctx = {
.handle = create_hw_ctx.handle,
.param_type = DRM_AMDXDNA_HWCTX_CONFIG_CU,
.param_val =
Expand All @@ -317,7 +290,7 @@ int main(int argc, char **argv) {

/////////////////////////////////////////////////////////////////////////////////
// Step 2: Configuring the CMD BOs with the different instruction sequences
struct amdxdna_drm_create_bo create_cmd_bo_0 = {
amdxdna_drm_create_bo create_cmd_bo_0 = {
.type = AMDXDNA_BO_CMD,
.size = PACKET_SIZE,
};
Expand All @@ -327,16 +300,16 @@ int main(int argc, char **argv) {
return -1;
}

struct amdxdna_drm_get_bo_info cmd_bo_0_get_bo_info = {
.handle = create_cmd_bo_0.handle};
amdxdna_drm_get_bo_info cmd_bo_0_get_bo_info = {.handle =
create_cmd_bo_0.handle};
ret = ioctl(drv_fd, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &cmd_bo_0_get_bo_info);
if (ret != 0) {
perror("Failed to get cmd BO 0 info");
return -2;
}

// Writing the first packet to the queue
struct amdxdna_cmd *cmd_0 = (struct amdxdna_cmd *)mmap(
amdxdna_cmd *cmd_0 = (struct amdxdna_cmd *)mmap(
0, PACKET_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, drv_fd,
cmd_bo_0_get_bo_info.map_offset);
cmd_0->state = 1; // ERT_CMD_STATE_NEW;
Expand All @@ -355,7 +328,7 @@ int main(int argc, char **argv) {
cmd_0->data[9] = (output_0 >> 32) & 0xFFFFFFFF; // Output high

// Writing to the second packet of the queue
struct amdxdna_drm_create_bo create_cmd_bo_1 = {
amdxdna_drm_create_bo create_cmd_bo_1 = {
.type = AMDXDNA_BO_CMD,
.size = PACKET_SIZE,
};
Expand All @@ -365,15 +338,15 @@ int main(int argc, char **argv) {
return -1;
}

struct amdxdna_drm_get_bo_info cmd_bo_1_get_bo_info = {
.handle = create_cmd_bo_1.handle};
amdxdna_drm_get_bo_info cmd_bo_1_get_bo_info = {.handle =
create_cmd_bo_1.handle};
ret = ioctl(drv_fd, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &cmd_bo_1_get_bo_info);
if (ret != 0) {
perror("Failed to get cmd BO 0 info");
return -2;
}

struct amdxdna_cmd *cmd_1 = (struct amdxdna_cmd *)mmap(
amdxdna_cmd *cmd_1 = (struct amdxdna_cmd *)mmap(
0, PACKET_SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, drv_fd,
cmd_bo_1_get_bo_info.map_offset);
cmd_1->state = 1; // ERT_CMD_STATE_NEW;
Expand All @@ -396,13 +369,13 @@ int main(int argc, char **argv) {
// the command chain that points to the instruction sequences just created

// Allocate a command chain
void *bo_cmd_chain_buf = NULL;
void *bo_cmd_chain_buf = nullptr;
cmd_bo_ret = posix_memalign(&bo_cmd_chain_buf, 4096, 4096);
if (cmd_bo_ret != 0 || bo_cmd_chain_buf == NULL) {
if (cmd_bo_ret != 0 || bo_cmd_chain_buf == nullptr) {
printf("[ERROR] Failed to allocate cmd_bo buffer of size %d\n", 4096);
}

struct amdxdna_drm_create_bo create_cmd_chain_bo = {
amdxdna_drm_create_bo create_cmd_chain_bo = {
.type = AMDXDNA_BO_CMD,
.size = 4096,
};
Expand All @@ -412,20 +385,20 @@ int main(int argc, char **argv) {
return -1;
}

struct amdxdna_drm_get_bo_info cmd_chain_bo_get_bo_info = {
amdxdna_drm_get_bo_info cmd_chain_bo_get_bo_info = {
.handle = create_cmd_chain_bo.handle};
ret = ioctl(drv_fd, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &cmd_chain_bo_get_bo_info);
if (ret != 0) {
perror("Failed to get cmd BO 0 info");
return -2;
}

struct amdxdna_cmd *cmd_chain =
amdxdna_cmd *cmd_chain =
(struct amdxdna_cmd *)mmap(0, 4096, PROT_READ | PROT_WRITE, MAP_SHARED,
drv_fd, cmd_chain_bo_get_bo_info.map_offset);

// Writing information to the command buffer
struct amdxdna_cmd_chain *cmd_chain_payload =
amdxdna_cmd_chain *cmd_chain_payload =
(struct amdxdna_cmd_chain *)(cmd_chain->data);
cmd_chain->state = 1; // ERT_CMD_STATE_NEW;
cmd_chain->extra_cu_masks = 0;
Expand All @@ -445,7 +418,7 @@ int main(int argc, char **argv) {
// Perform a submit cmd
uint32_t bo_args[6] = {dpu_0_handle, dpu_1_handle, input_0_handle,
output_0_handle, input_1_handle, output_1_handle};
struct amdxdna_drm_exec_cmd exec_cmd_0 = {
amdxdna_drm_exec_cmd exec_cmd_0 = {
.ext = 0,
.ext_flags = 0,
.hwctx = create_hw_ctx.handle,
Expand All @@ -464,7 +437,7 @@ int main(int argc, char **argv) {
/////////////////////////////////////////////////////////////////////////////////
// Step 4: Wait for the output
// Use the wait IOCTL to wait for our submission to complete
struct amdxdna_drm_wait_cmd wait_cmd = {
amdxdna_drm_wait_cmd wait_cmd = {
.hwctx = create_hw_ctx.handle,
.timeout = 50, // 50ms timeout
.seq = exec_cmd_0.seq,
Expand Down
53 changes: 27 additions & 26 deletions experimental/hsa/hsa_ipu.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
// Copyright 2024 The IREE Authors
//
// Licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
#pragma once

#include <fcntl.h>
#include <sys/ioctl.h>
Expand All @@ -15,8 +21,6 @@
#include "amdxdna_accel.h"

// want to mmap the file
#include <sys/io.h>
#include <sys/mman.h>

#define MAX_NUM_INSTRUCTIONS 1024 // Maximum number of dpu or pdi instructions.

Expand All @@ -43,9 +47,9 @@ void ring_doorbell(uint64_t doorbell) {

int get_driver_version(int fd, __u32 *major, __u32 *minor) {
int ret;
struct amdxdna_drm_query_aie_version version;
amdxdna_drm_query_aie_version version;

struct amdxdna_drm_get_info info_params = {
amdxdna_drm_get_info info_params = {
.param = DRM_AMDXDNA_QUERY_AIE_VERSION,
.buffer_size = sizeof(version),
.buffer = (__u64)&version,
Expand All @@ -63,7 +67,7 @@ int get_driver_version(int fd, __u32 *major, __u32 *minor) {
/*
Allocates a heap on the device by creating a BO of type dev heap
*/
static int alloc_heap(int fd, __u32 size, __u32 *handle) {
int alloc_heap(int fd, __u32 size, __u32 *handle) {
int ret;
void *heap_buf = NULL;
const size_t alignment = 64 * 1024 * 1024;
Expand All @@ -80,7 +84,7 @@ static int alloc_heap(int fd, __u32 size, __u32 *handle) {
return -1;
}

struct amdxdna_drm_create_bo create_bo_params = {
amdxdna_drm_create_bo create_bo_params = {
.type = AMDXDNA_BO_DEV_HEAP,
.size = size,
};
Expand All @@ -90,8 +94,7 @@ static int alloc_heap(int fd, __u32 size, __u32 *handle) {
*handle = create_bo_params.handle;
}

struct amdxdna_drm_get_bo_info get_bo_info = {.handle =
create_bo_params.handle};
amdxdna_drm_get_bo_info get_bo_info = {.handle = create_bo_params.handle};
ret = ioctl(fd, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &get_bo_info);
if (ret != 0) {
perror("Failed to get BO info");
Expand All @@ -111,9 +114,9 @@ static int alloc_heap(int fd, __u32 size, __u32 *handle) {
/*
Creates a dev bo which is carved out of the heap bo.
*/
static int create_dev_bo(int fd, uint64_t *vaddr, uint64_t *sram_vaddr,
__u32 *handle, __u64 size_in_bytes) {
struct amdxdna_drm_create_bo create_bo = {
int create_dev_bo(int fd, uint64_t *vaddr, uint64_t *sram_vaddr, __u32 *handle,
__u64 size_in_bytes) {
amdxdna_drm_create_bo create_bo = {
.type = AMDXDNA_BO_DEV,
.size = size_in_bytes,
};
Expand All @@ -123,7 +126,7 @@ static int create_dev_bo(int fd, uint64_t *vaddr, uint64_t *sram_vaddr,
return -1;
}

struct amdxdna_drm_get_bo_info get_bo_info = {.handle = create_bo.handle};
amdxdna_drm_get_bo_info get_bo_info = {.handle = create_bo.handle};
ret = ioctl(fd, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &get_bo_info);
if (ret != 0) {
perror("Failed to get BO info");
Expand All @@ -139,8 +142,8 @@ static int create_dev_bo(int fd, uint64_t *vaddr, uint64_t *sram_vaddr,
/*
Creates a shmem bo
*/
static int create_shmem_bo(int fd, uint64_t *vaddr, uint64_t *sram_vaddr,
__u32 *handle, __u64 size_in_bytes) {
int create_shmem_bo(int fd, uint64_t *vaddr, uint64_t *sram_vaddr,
__u32 *handle, __u64 size_in_bytes) {
const size_t alignment = 64 * 1024 * 1024;
void *shmem_create = NULL;
int ret = posix_memalign(&shmem_create, alignment, size_in_bytes);
Expand All @@ -153,16 +156,16 @@ static int create_shmem_bo(int fd, uint64_t *vaddr, uint64_t *sram_vaddr,

printf("Shmem BO @: %p\n", shmem_create);

struct amdxdna_drm_create_bo create_bo = {.type = AMDXDNA_BO_SHMEM,
.vaddr = (__u64)shmem_create,
.size = size_in_bytes};
amdxdna_drm_create_bo create_bo = {.type = AMDXDNA_BO_SHMEM,
.vaddr = (__u64)shmem_create,
.size = size_in_bytes};
ret = ioctl(fd, DRM_IOCTL_AMDXDNA_CREATE_BO, &create_bo);
if (ret != 0) {
perror("Failed to create BO");
return -1;
}

struct amdxdna_drm_get_bo_info get_bo_info = {.handle = create_bo.handle};
amdxdna_drm_get_bo_info get_bo_info = {.handle = create_bo.handle};
ret = ioctl(fd, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &get_bo_info);
if (ret != 0) {
perror("Failed to get BO info");
Expand All @@ -178,10 +181,8 @@ static int create_shmem_bo(int fd, uint64_t *vaddr, uint64_t *sram_vaddr,
/*
Wrapper around synch bo ioctl.
*/
static int sync_bo(int fd, __u32 handle) {
struct amdxdna_drm_sync_bo sync_params = {
.handle = handle,
};
int sync_bo(int fd, __u32 handle) {
amdxdna_drm_sync_bo sync_params = {.handle = handle};
int ret = ioctl(fd, DRM_IOCTL_AMDXDNA_SYNC_BO, &sync_params);
if (ret != 0) {
printf("Synch bo ioctl failed for handle %d\n", handle);
Expand All @@ -193,8 +194,8 @@ static int sync_bo(int fd, __u32 handle) {
Create a BO_DEV and populate it with a PDI
*/

static int load_pdi(int fd, uint64_t *vaddr, uint64_t *sram_addr, __u32 *handle,
const char *path) {
int load_pdi(int fd, uint64_t *vaddr, uint64_t *sram_addr, __u32 *handle,
const char *path) {
FILE *file = fopen(path, "r");
if (file == NULL) {
perror("Failed to open instructions file.");
Expand Down Expand Up @@ -233,8 +234,8 @@ static int load_pdi(int fd, uint64_t *vaddr, uint64_t *sram_addr, __u32 *handle,
Create a BO DEV and populate it with instructions whose virtual address is
passed to the driver via an HSA packet.
*/
static int load_instructions(int fd, uint64_t *vaddr, uint64_t *sram_addr,
__u32 *handle, const char *path, __u32 *num_inst) {
int load_instructions(int fd, uint64_t *vaddr, uint64_t *sram_addr,
__u32 *handle, const char *path, __u32 *num_inst) {
// read dpu instructions into an array
FILE *file = fopen(path, "r");
if (file == NULL) {
Expand Down

0 comments on commit 22fa938

Please sign in to comment.