From 88403a9b498cd246e64e473b81e2ffcc5da1471d Mon Sep 17 00:00:00 2001 From: Marat Dukhan Date: Wed, 19 Jun 2024 23:11:08 -0700 Subject: [PATCH] Support native conversions without __fp16/_Float16 types --- .github/workflows/cmake.yml | 49 +++++++++++++++++++++ include/fp16/fp16.h | 86 +++++++++++++++++++++++++------------ include/fp16/macros.h | 14 ++++++ 3 files changed, 121 insertions(+), 28 deletions(-) diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 8878700..7bf724d 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -65,6 +65,27 @@ jobs: run: cmake --build build --parallel - name: Test run: ctest --test-dir build --parallel --output-on-failure + cmake-linux-x86-f16c: + runs-on: ubuntu-20.04 + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + - name: Update apt + run: sudo apt update + - name: Install multilib gcc + run: sudo apt install gcc-multilib g++-multilib + - name: Install ninja + run: sudo apt install ninja-build + - name: Configure + run: cmake -Bbuild -S. -G Ninja -DCMAKE_BUILD_TYPE=Release -DFP16_BUILD_COMPARATIVE_BENCHMARKS=ON + env: + CFLAGS: "-m32 -mf16c" + CXXFLAGS: "-m32 -mf16c" + LDFLAGS: "-m32" + - name: Build + run: cmake --build build --parallel + - name: Test + run: ctest --test-dir build --parallel --output-on-failure cmake-macos-x86_64: runs-on: macos-12 timeout-minutes: 15 @@ -115,6 +136,20 @@ jobs: run: cmake --build build --config Release --parallel - name: Test run: ctest --test-dir build --build-config Release --parallel --output-on-failure + cmake-windows-x86-avx2: + runs-on: windows-2019 + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + - name: Configure + run: cmake -Bbuild -S. -G "Visual Studio 16 2019" -A Win32 -DFP16_BUILD_COMPARATIVE_BENCHMARKS=ON + env: + CFLAGS: "/arch:AVX2" + CXXFLAGS: "/arch:AVX2" + - name: Build + run: cmake --build build --config Release --parallel + - name: Test + run: ctest --test-dir build --build-config Release --parallel --output-on-failure cmake-windows-x64: runs-on: windows-2019 timeout-minutes: 15 @@ -126,6 +161,20 @@ jobs: run: cmake --build build --config Release --parallel - name: Test run: ctest --test-dir build --build-config Release --parallel --output-on-failure + cmake-windows-x64-avx2: + runs-on: windows-2019 + timeout-minutes: 15 + steps: + - uses: actions/checkout@v4 + - name: Configure + run: cmake -Bbuild -S. -G "Visual Studio 16 2019" -A x64 -DFP16_BUILD_COMPARATIVE_BENCHMARKS=ON + env: + CFLAGS: "/arch:AVX2" + CXXFLAGS: "/arch:AVX2" + - name: Build + run: cmake --build build --config Release --parallel + - name: Test + run: ctest --test-dir build --build-config Release --parallel --output-on-failure cmake-windows-arm64: runs-on: windows-2019 timeout-minutes: 15 diff --git a/include/fp16/fp16.h b/include/fp16/fp16.h index 0bcf61b..e87cf52 100644 --- a/include/fp16/fp16.h +++ b/include/fp16/fp16.h @@ -10,13 +10,19 @@ #include #endif -#ifdef _MSC_VER - #include -#endif - #include #include +#if defined(_MSC_VER) + #include +#endif +#if defined(__F16C__) && FP16_USE_NATIVE_CONVERSION && !FP16_USE_FLOAT16_TYPE && !FP16_USE_FP16_TYPE + #include +#endif +#if (defined(__aarch64__) || defined(_M_ARM64)) && FP16_USE_NATIVE_CONVERSION && !FP16_USE_FLOAT16_TYPE && !FP16_USE_FP16_TYPE + #include +#endif + /* * Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to @@ -107,18 +113,30 @@ static inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) { * floating-point operations and bitcasts between integer and floating-point variables. */ static inline float fp16_ieee_to_fp32_value(uint16_t h) { -#if FP16_USE_FLOAT16_TYPE - union { - uint16_t as_bits; - _Float16 as_value; - } fp16 = { h }; - return (float) fp16.as_value; -#elif FP16_USE_FP16_TYPE - union { - uint16_t as_bits; - __fp16 as_value; - } fp16 = { h }; - return (float) fp16.as_value; +#if FP16_USE_NATIVE_CONVERSION + #if FP16_USE_FLOAT16_TYPE + union { + uint16_t as_bits; + _Float16 as_value; + } fp16 = { h }; + return (float) fp16.as_value; + #elif FP16_USE_FP16_TYPE + union { + uint16_t as_bits; + __fp16 as_value; + } fp16 = { h }; + return (float) fp16.as_value; + #else + #if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__) + return _cvtsh_ss((unsigned short) h); + #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__) + return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128((int) (unsigned int) h))); + #elif defined(_M_ARM64) || defined(__aarch64__) + return vget_lane_f32(vcvt_f32_f16(vreinterpret_f16_u16(vdup_n_u16(h))), 0); + #else + #error "Archtecture- or compiler-specific implementation required" + #endif + #endif #else /* * Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word: @@ -236,18 +254,30 @@ static inline float fp16_ieee_to_fp32_value(uint16_t h) { * floating-point operations and bitcasts between integer and floating-point variables. */ static inline uint16_t fp16_ieee_from_fp32_value(float f) { -#if FP16_USE_FLOAT16_TYPE - union { - _Float16 as_value; - uint16_t as_bits; - } fp16 = { (_Float16) f }; - return fp16.as_bits; -#elif FP16_USE_FP16_TYPE - union { - __fp16 as_value; - uint16_t as_bits; - } fp16 = { (__fp16) f }; - return fp16.as_bits; +#if FP16_USE_NATIVE_CONVERSION + #if FP16_USE_FLOAT16_TYPE + union { + _Float16 as_value; + uint16_t as_bits; + } fp16 = { (_Float16) f }; + return fp16.as_bits; + #elif FP16_USE_FP16_TYPE + union { + __fp16 as_value; + uint16_t as_bits; + } fp16 = { (__fp16) f }; + return fp16.as_bits; + #else + #if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__) + return _cvtss_sh(f, _MM_FROUND_CUR_DIRECTION); + #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__) + return (uint16_t) _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(f), _MM_FROUND_CUR_DIRECTION)); + #elif defined(_M_ARM64) || defined(__aarch64__) + return vget_lane_u16(vcvt_f16_f32(vdupq_n_f32(f)), 0); + #else + #error "Archtecture- or compiler-specific implementation required" + #endif + #endif #else #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__) const float scale_to_inf = 0x1.0p+112f; diff --git a/include/fp16/macros.h b/include/fp16/macros.h index 2503f63..4018b0c 100644 --- a/include/fp16/macros.h +++ b/include/fp16/macros.h @@ -2,6 +2,20 @@ #ifndef FP16_MACROS_H #define FP16_MACROS_H +#ifndef FP16_USE_NATIVE_CONVERSION + #if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__) + #define FP16_USE_NATIVE_CONVERSION 1 + #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__) + #define FP16_USE_NATIVE_CONVERSION 1 + #elif defined(_MSC_VER) && defined(_M_ARM64) + #define FP16_USE_NATIVE_CONVERSION 1 + #elif defined(__GNUC__) && defined(__aarch64__) + #define FP16_USE_NATIVE_CONVERSION 1 + #endif + #if !defined(FP16_USE_NATIVE_CONVERSION) + #define FP16_USE_NATIVE_CONVERSION 0 + #endif // !defined(FP16_USE_NATIVE_CONVERSION) +#endif // !define(FP16_USE_NATIVE_CONVERSION) #ifndef FP16_USE_FLOAT16_TYPE #if !defined(__clang__) && !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ >= 12)