-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #65 from db-tu-dresden/mwe_rtl_fpga
Minimal Working Example RTL on FPGA
- Loading branch information
Showing
10 changed files
with
257 additions
and
23 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
# CMakeFile for building the example(s). | ||
# The following parameters can be passed to cmake: | ||
# BOARD: name of fpga board [intel_s10sx_pac:pac_s10_usm] | ||
|
||
cmake_minimum_required(VERSION 3.13) | ||
project(oneAPIfpgaExamples) | ||
|
||
|
||
set(TSLROOT ${CMAKE_SOURCE_DIR}/../..) | ||
|
||
set(CMAKE_VERBOSE_MAKEFILE ON) | ||
|
||
set(project_cxx_standard 20) | ||
set(release_cxx_flag "-O2") | ||
|
||
set(warnings "-Wall;-Wextra;-Wpedantic") | ||
set(release_warnings "-Winline") | ||
|
||
if (DEFINED BOARD) | ||
set(fpga_board ${BOARD}) | ||
else() | ||
set(fpga_board "intel_s10sx_pac:pac_s10_usm") | ||
endif() | ||
|
||
set(fpga_link_options -qactypes -fsycl -fintelfpga) | ||
|
||
# generate the TSL (with SSE, AVX, AVX512, ONEAPIfpga) assuming Intel Xeon Gold Cascade Lake (e.g., Xeon Gold 6238R) | ||
include(${TSLROOT}/tsl.cmake) | ||
create_tsl( | ||
TSLGENERATOR_DIRECTORY "${TSLROOT}" | ||
DESTINATION "${CMAKE_BINARY_DIR}/tsl" | ||
TARGETS_FLAGS "sse;sse2;ssse3;sse4_1;sse4_2;avx;avx2;avx512f;avx512dq;avx512cd;avx512bw;avx512vl;avx512_vnni;bmi1;bmi2;oneAPIfpgaDev" | ||
USE_CONCEPTS | ||
LINK_OPTIONS ${fpga_link_options} | ||
) | ||
#create_tsl( | ||
# TSLGENERATOR_DIRECTORY "${TSLROOT}" | ||
# DESTINATION "${CMAKE_BINARY_DIR}/tsl" | ||
# TARGETS_FLAGS "oneAPIfpgaDev" | ||
# USE_CONCEPTS | ||
# LINK_OPTIONS ${fpga_link_options} | ||
#) | ||
message(STATUS "TSL Include Directory: ${TSL_INCLUDE_DIRECTORY}") | ||
|
||
|
||
######################################################## | ||
## Build emulator | ||
######################################################## | ||
function(create_fpga_emulator_target targetName mainFile) | ||
set(exec_target_name ${targetName}.fpga.emu) | ||
add_executable(${exec_target_name} ${mainFile}) | ||
target_include_directories(${exec_target_name} PRIVATE ${TSL_INCLUDE_DIRECTORY}) | ||
target_link_libraries(${exec_target_name} tsl) | ||
target_link_libraries(${exec_target_name} libtslOneAPIFPGA) | ||
set_target_properties(${exec_target_name} PROPERTIES CXX_STANDARD ${project_cxx_standard}) | ||
target_compile_options(${exec_target_name} PRIVATE -fsycl ${release_cxx_flag} ${warnings} ${release_warnings} -fintelfpga -qactypes) | ||
target_link_options(${exec_target_name} PRIVATE ${fpga_link_options}) | ||
endfunction() | ||
|
||
######################################################## | ||
## Build hardware | ||
######################################################## | ||
function(create_fpga_target targetName mainFile) | ||
set(exec_target_name ${targetName}.fpga) | ||
add_executable(${exec_target_name} ${mainFile}) | ||
target_include_directories(${exec_target_name} PRIVATE ${TSL_INCLUDE_DIRECTORY}) | ||
target_link_libraries(${exec_target_name} tsl) | ||
target_link_libraries(${exec_target_name} libtslOneAPIFPGA) | ||
set_target_properties(${exec_target_name} PROPERTIES CXX_STANDARD ${project_cxx_standard}) | ||
target_compile_definitions(${exec_target_name} PRIVATE ONEAPI_FPGA_HARDWARE) | ||
target_compile_options(${exec_target_name} PRIVATE -fsycl ${release_cxx_flag} ${warnings} ${release_warnings} -fintelfpga -Xsoutput-report-folder=${targetName}.prj -qactypes) | ||
target_link_options(${exec_target_name} PRIVATE -qactypes -fsycl -fintelfpga -Xshardware -Xsboard=${fpga_board} -reuse-exe=${CMAKE_CURRENT_BINARY_DIR}/${exec_target_name}) | ||
endfunction() | ||
|
||
if(NOT DEFINED ${TARGET}) | ||
message(STATUS "No target specified. Assuming emulator") | ||
set(TARGET EMULATOR) | ||
endif() | ||
if(${TARGET} STREQUAL "EMULATOR") | ||
create_fpga_emulator_target(clz_rtl clz_rtl_example.cpp) | ||
elseif(${TARGET} STREQUAL "FPGA_HARDWARE") | ||
create_fpga_target(clz_rtl clz_rtl_example.cpp) | ||
endif() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/bin/bash | ||
#check if argument passed | ||
if [ $# -eq 0 ]; then | ||
echo "No target specified. Using emulator" | ||
TARGET=EMULATOR | ||
else | ||
#check if argument is either emu or hw | ||
if [ $1 != "emu" ] && [ $1 != "hw" ]; then | ||
echo "Invalid target (emu|hw) specified. Using emulator" | ||
TARGET=EMULATOR | ||
elif [ $1 == "emu" ]; then | ||
TARGET=EMULATOR | ||
else | ||
TARGET=FPGA_HARDWARE | ||
fi | ||
fi | ||
CC=icx CXX=icpx cmake -B build -S . -DCMAKE_BUILD_TYPE=Release -DTARGET=$TARGET | ||
cmake --build build |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
#include <iostream> | ||
#include "tslintrin.hpp" | ||
|
||
template<class SimdT, typename PtrTOut, typename PtrTIn, typename SizeT> | ||
struct count_leading_zero_kernel { | ||
static void apply(PtrTOut out, PtrTIn in, SizeT element_count) { | ||
for (size_t i = 0; i < element_count; i+=SimdT::vector_element_count()) { | ||
auto in_reg = tsl::loadu<SimdT>(&in[i]); | ||
auto result_reg = tsl::lzc<SimdT>(in_reg); | ||
tsl::storeu<SimdT>(&out[i], result_reg); | ||
} | ||
} | ||
}; | ||
|
||
int main(void) { | ||
// so far, only 32-bit unsigned integers are supported as RTL code | ||
|
||
using namespace tsl; | ||
executor<runtime::cpu> cpu_executor; | ||
using cpu_simd = simd<uint32_t, avx512>; | ||
|
||
executor<runtime::oneAPI_default_fpga> fpga_executor{ | ||
sycl::property_list{sycl::property::queue::enable_profiling()} | ||
}; | ||
using fpga_simd = simd<uint32_t, oneAPIfpgaRTL, 512>; | ||
|
||
// allocate memory on host | ||
auto host_mem_data = cpu_executor.allocate<uint32_t>(128); | ||
auto host_mem_result = cpu_executor.allocate<uint32_t>(128); | ||
// allocate memory accessible from host and FPGA device | ||
// WATCH OUT: oneAPI::MEMORY_ON_HOST and oneAPI::MEMORY_ON_DEVICE will soon be moved up in the namespace hierarchy | ||
auto usm_host_mem_data = fpga_executor.allocate<uint32_t>(128, oneAPI::MEMORY_ON_HOST{}); | ||
auto usm_host_mem_result = fpga_executor.allocate<uint32_t>(128, oneAPI::MEMORY_ON_HOST{}); | ||
// allocate memory on FPGA device | ||
auto usm_dev_mem_data = fpga_executor.allocate<uint32_t>(128, oneAPI::MEMORY_ON_DEVICE{}); | ||
auto usm_dev_mem_result = fpga_executor.allocate<uint32_t>(128, oneAPI::MEMORY_ON_DEVICE{}); | ||
|
||
// initialize input data | ||
for (size_t i = 0; i < 128; i++) { | ||
host_mem_data[i] = i; | ||
usm_host_mem_data[i] = i; | ||
} | ||
// copy input data to FPGA device | ||
fpga_executor.copy(usm_dev_mem_data, usm_host_mem_data, 128); | ||
|
||
// initialize output | ||
for (size_t i = 0; i < 128; i++) { | ||
host_mem_result[i] = 0; | ||
usm_host_mem_result[i] = 0; | ||
} | ||
// copy output to FPGA device | ||
fpga_executor.copy(usm_dev_mem_result, usm_host_mem_result, 128); | ||
|
||
|
||
// run kernel on CPU using avx512 | ||
cpu_executor.submit<cpu_simd, count_leading_zero_kernel>(host_mem_result, host_mem_data, (size_t)128); | ||
|
||
// run kernel on FPGA using oneAPIfpgaRTL (RTL code is built seperately). Use USM-Host memory | ||
fpga_executor.submit<fpga_simd, count_leading_zero_kernel>(usm_host_mem_result, usm_host_mem_data, (size_t)128); | ||
|
||
// check results | ||
for (size_t i = 0; i < 128; i++) { | ||
if (host_mem_result[i] != usm_host_mem_result[i]) { | ||
std::cerr << "ERROR: host_mem_result[" << i << "] = " << host_mem_result[i] << " != usm_host_mem_result[" << i << "] = " << usm_host_mem_result[i] << std::endl; | ||
std::terminate(); | ||
} | ||
} | ||
|
||
// run kernel on FPGA using oneAPIfpgaRTL (RTL code is built seperately). Use USM-Device memory | ||
fpga_executor.submit<fpga_simd, count_leading_zero_kernel>(usm_dev_mem_result, usm_dev_mem_data, (size_t)128); | ||
|
||
// copy output to host | ||
fpga_executor.copy(usm_host_mem_result, usm_dev_mem_result, 128); | ||
|
||
// check results | ||
for (size_t i = 0; i < 128; i++) { | ||
if (host_mem_result[i] != usm_host_mem_result[i]) { | ||
std::cerr << "ERROR: host_mem_result[" << i << "] = " << host_mem_result[i] << " != usm_host_mem_result[" << i << "] = " << usm_host_mem_result[i] << std::endl; | ||
std::terminate(); | ||
} | ||
} | ||
|
||
// free memory | ||
fpga_executor.deallocate(usm_dev_mem_result); | ||
fpga_executor.deallocate(usm_dev_mem_data); | ||
fpga_executor.deallocate(usm_host_mem_result); | ||
fpga_executor.deallocate(usm_host_mem_data); | ||
cpu_executor.deallocate(host_mem_result); | ||
cpu_executor.deallocate(host_mem_data); | ||
|
||
// done | ||
std::cout << "Everything worked fine!" << std::endl; | ||
return 0; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters