Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce Risc-V Vector Intrinsic Support #642

Draft
wants to merge 4 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ check_include_file("stdint.h" HAVE_STDINT_H)
check_include_file("stdbool.h" HAVE_STDBOOL_H)
check_include_file("arm_neon.h" FLAC__HAS_NEONINTRIN)

# Toolchains won't allow riscv_vector.h to be included unless the
# vector extensions are enabled.
set(SAVED_CMAKE_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
set(CMAKE_REQUIRED_FLAGS "-march=rv64gcv")
check_include_file("riscv_vector.h" HAVE_RISCV_VECTOR_H)
set(CMAKE_REQUIRED_FLAGS "${SAVED_CMAKE_REQUIRED_FLAGS}")

if(NOT HAVE_STDINT_H OR NOT HAVE_STDBOOL_H)
message(SEND_ERROR "Header stdint.h and/or stdbool.h not found")
endif()
Expand Down
2 changes: 2 additions & 0 deletions Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ EXTRA_DIST = \
flac-config.cmake.in \
cmake/CheckA64NEON.c.in \
cmake/CheckA64NEON.cmake \
cmake/CheckRV64Vector.c.in \
cmake/CheckRV64Vector.cmake \
cmake/CheckCPUArch.c.in \
cmake/CheckCPUArch.cmake \
cmake/FindOgg.cmake \
Expand Down
4 changes: 4 additions & 0 deletions cmake/CheckCPUArch.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,7 @@ endmacro(CHECK_CPU_ARCH_X86)
macro(CHECK_CPU_ARCH_ARM64 VARIABLE)
_CHECK_CPU_ARCH(arm64 "defined(__aarch64__) || defined(__arm64__)" ${VARIABLE})
endmacro(CHECK_CPU_ARCH_ARM64)

macro(CHECK_CPU_ARCH_RISCV64 VARIABLE)
_CHECK_CPU_ARCH(riscv64 "defined(__riscv)" ${VARIABLE})
endmacro(CHECK_CPU_ARCH_RISCV64)
7 changes: 7 additions & 0 deletions cmake/CheckRV64Vector.c.in
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#include <riscv_vector.h>
int main (void)
{
size_t vl = __riscv_vsetvl_e64m2(8);
vfloat64m2_t a = __riscv_vfmv_v_f_f64m2(0.5, vl);
return 0;
}
15 changes: 15 additions & 0 deletions cmake/CheckRV64Vector.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
macro(CHECK_RV64VECTOR VARIABLE)
if(NOT DEFINED HAVE_${VARIABLE})
message(STATUS "Check whether RV64 Vector can be used")
configure_file(${PROJECT_SOURCE_DIR}/cmake/CheckRV64Vector.c.in ${PROJECT_BINARY_DIR}/CMakeFiles/CMakeTmp/CheckRV64Vector.c @ONLY)
try_compile(HAVE_${VARIABLE} "${PROJECT_BINARY_DIR}"
"${PROJECT_BINARY_DIR}/CMakeFiles/CMakeTmp/CheckRV64Vector.c" COMPILE_DEFINITIONS "-march=rv64gcv")
if(HAVE_${VARIABLE})
message(STATUS "Check whether RV64 Vector can be used - yes")
set(${VARIABLE} 1 CACHE INTERNAL "Result of CHECK_RV64VECTOR" FORCE)
else ()
message(STATUS "Check whether RV64 Vector can be used - no")
endif()
endif ()
endmacro(CHECK_RV64VECTOR)

12 changes: 12 additions & 0 deletions config.cmake.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
/* Target processor ARM64 */
#cmakedefine FLAC__CPU_ARM64

/* Target processor RiscV64 */
#cmakedefine FLAC__CPU_RISCV64

/* Set FLAC__BYTES_PER_WORD to 8 (4 is the default) */
#cmakedefine01 ENABLE_64_BIT_WORDS

Expand All @@ -31,6 +34,9 @@
/* Set to 1 if <arm_neon.h> contains A64 intrinsics */
#cmakedefine01 FLAC__HAS_A64NEONINTRIN

/* Set to 1 if <riscv_vector.h> is available. */
#cmakedefine01 FLAC__HAS_RISCVINTRIN

/* define if building for Darwin / MacOS X */
#cmakedefine FLAC__SYS_DARWIN

Expand Down Expand Up @@ -103,6 +109,9 @@
/* Define to 1 if you have the <sys/param.h> header file. */
#cmakedefine HAVE_SYS_PARAM_H

/* Define to 1 if you have the <sys/param.h> header file. */
#cmakedefine HAVE_SYS_AUXV_H

/* Define to 1 if you have the <sys/stat.h> header file. */
#cmakedefine HAVE_SYS_STAT_H

Expand All @@ -112,6 +121,9 @@
/* Define to 1 if you have the <termios.h> header file. */
#cmakedefine HAVE_TERMIOS_H

/* Define to 1 if you have the <riscv_vector.h> header file. */
#cmakedefine HAVE_RISCV_VECTOR_H

/* Define to 1 if typeof works with your compiler. */
#cmakedefine HAVE_TYPEOF

Expand Down
34 changes: 34 additions & 0 deletions configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,15 @@ AC_DEFINE(FLAC__NO_ASM)
AH_TEMPLATE(FLAC__NO_ASM, [define to disable use of assembly code])
fi

AC_ARG_ENABLE(riscv-vector-optimizations, AS_HELP_STRING([--enable-riscv-vector-optimizations],[Enable RiscV Vector Optimization Routines]), riscv_vector_opt=yes, riscv_vector_opt=no)
AM_CONDITIONAL(FLAC__RISCV_VECTOR, test "x$riscv_vector_opt" = xyes)
if test "x$riscv_vector_opt" = xyes ; then
CFLAGS="-march=rv64gcv $CFLAGS"
AC_CHECK_HEADERS([riscv_vector.h])
AC_DEFINE(FLAC__RISCV_VECTOR)
AH_TEMPLATE(FLAC__RISCV_VECTOR, [define to enable use riscv vector extensions])
fi

dnl check for getauxval in standard library
AC_CHECK_FUNCS(getauxval)

Expand Down Expand Up @@ -131,10 +140,16 @@ case "$host_cpu" in
AC_DEFINE(FLAC__CPU_ARM64)
AH_TEMPLATE(FLAC__CPU_ARM64, [define if building for ARM])
;;
riscv64)
cpu_riscv64=true
AC_DEFINE(FLAC__CPU_RISCV64)
AH_TEMPLATE(FLAC__CPU_RISCV64, [define if building for Riscv64])
;;
esac
AM_CONDITIONAL(FLAC__CPU_X86_64, test "x$cpu_x86_64" = xtrue)
AM_CONDITIONAL(FLaC__CPU_IA32, test "x$cpu_ia32" = xtrue)
AM_CONDITIONAL(FLAC__CPU_ARM64, test "x$cpu_arm64" = xtrue)
AM_CONDITIONAL(FLAC__CPU_RISCV64, test "x$cpu_riscv64" = xtrue)

if test "x$ac_cv_header_x86intrin_h" = xyes -a "x$asm_opt" = xyes; then
AC_DEFINE([FLAC__HAS_X86INTRIN], 1, [Set to 1 if <x86intrin.h> is available.])
Expand Down Expand Up @@ -162,6 +177,25 @@ else
AC_DEFINE([FLAC__HAS_NEONINTRIN], 0)
fi

if test "x$ac_cv_header_riscv_vector_h" = xyes -a "x$asm_opt" = xyes; then
AC_DEFINE([FLAC__HAS_RISCVINTRIN], 1, [Set to 1 if <riscv_vector.h> is available.])
AC_MSG_CHECKING([whether riscv_vector.h has Vector functions])
AC_COMPILE_IFELSE(
[AC_LANG_PROGRAM([[#include <riscv_vector.h>]],
[[size_t vl = __riscv_vsetvl_e64m2(8); vfloat64m2_t a = __riscv_vfmv_v_f_f64m2(0.5, vl);]])],
[AC_MSG_RESULT([yes])
has_riscvvector=yes],
[AC_MSG_RESULT([no])])
if test "x$has_riscvvector" = xyes; then
AC_DEFINE([FLAC__HAS_RISCVINTRIN], 1, [Set to 1 if <riscv_vector.h> has vector instructions.])
asm_optimisation=yes
else
AC_DEFINE([FLAC__HAS_RISCVINTRIN], 0)
fi
else
AC_DEFINE([FLAC__HAS_RISCVINTRIN], 0)
fi

case "$host" in
i386-*-openbsd3.[[0-3]]) OBJ_FORMAT=aoutb ;;
*-*-cygwin|*mingw*) OBJ_FORMAT=win32 ;;
Expand Down
16 changes: 16 additions & 0 deletions src/libFLAC/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
option(WITH_ASM "Use any assembly optimization routines" ON)
option(RISCV_VECTOR "Use RiscV Vector Optimization" OFF)

check_include_file("cpuid.h" HAVE_CPUID_H)
check_include_file("sys/param.h" HAVE_SYS_PARAM_H)
check_include_file("sys/auxv.h" HAVE_SYS_AUXV_H)

set(CMAKE_REQUIRED_LIBRARIES m)
check_function_exists(lround HAVE_LROUND)

include(CheckCSourceCompiles)
include(CheckCPUArch)
include(CheckA64NEON)
include(CheckRV64Vector)

check_cpu_arch_x64(FLAC__CPU_X86_64)
if(NOT FLAC__CPU_X86_64)
Expand All @@ -26,12 +29,24 @@ else()
if(FLAC__CPU_ARM64)
check_a64neon(FLAC__HAS_A64NEONINTRIN)
endif()

check_cpu_arch_riscv64(FLAC__CPU_RISCV64)
if(FLAC__CPU_RISCV64)
check_rv64vector(FLAC__HAS_RISCVINTRIN)
if (RISCV_VECTOR AND FLAC__HAS_RISCVINTRIN)
set_property(SOURCE lpc_intrin_riscv.c cpu.c APPEND_STRING PROPERTY COMPILE_FLAGS " -march=rv64gcv ")
endif()
endif()
endif()

if(NOT WITH_ASM)
add_definitions(-DFLAC__NO_ASM)
endif()

if(RISCV_VECTOR)
add_definitions(-DFLAC__RISCV_VECTOR)
endif()

include_directories("include")

add_library(FLAC
Expand All @@ -53,6 +68,7 @@ add_library(FLAC
lpc_intrin_sse41.c
lpc_intrin_avx2.c
lpc_intrin_fma.c
lpc_intrin_riscv.c
md5.c
memory.c
metadata_iterators.c
Expand Down
1 change: 1 addition & 0 deletions src/libFLAC/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ libFLAC_sources = \
lpc_intrin_avx2.c \
lpc_intrin_fma.c \
lpc_intrin_neon.c \
lpc_intrin_riscv.c \
md5.c \
memory.c \
metadata_iterators.c \
Expand Down
29 changes: 29 additions & 0 deletions src/libFLAC/cpu.c
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@
#include <sys/auxv.h>
#endif

#if defined(HAVE_RISCV_VECTOR_H) && defined(FLAC__RISCV_VECTOR) && defined(FLAC__HAS_RISCVINTRIN)
#include <riscv_vector.h>
#endif

#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN && !defined FLAC__NO_ASM

/* these are flags in EDX of CPUID AX=00000001 */
Expand Down Expand Up @@ -231,6 +235,26 @@ x86_cpu_info (FLAC__CPUInfo *info)
#endif
}

static void
rv64_cpu_info(FLAC__CPUInfo *info)
{
#if defined(FLAC__CPU_RISCV64) && defined(FLAC__HAS_RISCVINTRIN) && !defined(FLAC__NO_ASM) && defined(HAVE_SYS_AUXV_H) && defined(FLAC__RISCV_VECTOR) && defined(HAVE_RISCV_VECTOR_H)
#define ISA_V_HWCAP (1 << ('v' - 'a'))
// Check that the kernel and the hardware support RiscV Vector.
unsigned long hw_cap = getauxval(AT_HWCAP);
info->rv64.has_vector = (hw_cap & ISA_V_HWCAP) == ISA_V_HWCAP;
if(info->rv64.has_vector) {
info->rv64.vlenb = __riscv_vsetvlmax_e8m1();
}
else {
info->rv64.vlenb = 0;
}
#else
info->rv64.has_vector = false;
info->rv64.vlenb = 0;
#endif
}

void FLAC__cpu_info (FLAC__CPUInfo *info)
{
memset(info, 0, sizeof(*info));
Expand All @@ -239,6 +263,8 @@ void FLAC__cpu_info (FLAC__CPUInfo *info)
info->type = FLAC__CPUINFO_TYPE_IA32;
#elif defined FLAC__CPU_X86_64
info->type = FLAC__CPUINFO_TYPE_X86_64;
#elif defined FLAC__CPU_RISCV64
info->type = FLAC__CPUINFO_TYPE_RISCV_64;
#else
info->type = FLAC__CPUINFO_TYPE_UNKNOWN;
#endif
Expand All @@ -248,6 +274,9 @@ void FLAC__cpu_info (FLAC__CPUInfo *info)
case FLAC__CPUINFO_TYPE_X86_64:
x86_cpu_info (info);
break;
case FLAC__CPUINFO_TYPE_RISCV_64:
rv64_cpu_info(info);
break;
default:
info->use_asm = false;
break;
Expand Down
7 changes: 7 additions & 0 deletions src/libFLAC/include/private/cpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@
typedef enum {
FLAC__CPUINFO_TYPE_IA32,
FLAC__CPUINFO_TYPE_X86_64,
FLAC__CPUINFO_TYPE_RISCV_64,
FLAC__CPUINFO_TYPE_UNKNOWN
} FLAC__CPUInfo_Type;

Expand All @@ -183,10 +184,16 @@ typedef struct {
FLAC__bool bmi2;
} FLAC__CPUInfo_x86;

typedef struct {
FLAC__bool has_vector;
FLAC__uint32 vlenb; // Vector register length in bytes if CPU supports it.
} FLAC__CPUInfo_RV64;

typedef struct {
FLAC__bool use_asm;
FLAC__CPUInfo_Type type;
FLAC__CPUInfo_x86 x86;
FLAC__CPUInfo_RV64 rv64;
} FLAC__CPUInfo;

void FLAC__cpu_info(FLAC__CPUInfo *info);
Expand Down
7 changes: 7 additions & 0 deletions src/libFLAC/include/private/lpc.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ void FLAC__lpc_compute_autocorrelation_intrin_neon_lag_8(const FLAC__real data[]
void FLAC__lpc_compute_autocorrelation_intrin_neon_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
void FLAC__lpc_compute_autocorrelation_intrin_neon_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
#endif
#if defined FLAC__CPU_RISCV64 && FLAC__HAS_RISCVINTRIN
void FLAC__lpc_compute_autocorrelation_intrin_riscv(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
#endif
#endif /* FLAC__NO_ASM */

/*
Expand Down Expand Up @@ -162,6 +165,10 @@ void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_neon(const FLAC__in
void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_neon(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
# endif

#ifdef FLAC__CPU_RISCV64
void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_riscv(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
#endif

# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
# ifdef FLAC__SSE2_SUPPORTED
void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
Expand Down
Loading