Skip to content

Commit

Permalink
Support native conversions without __fp16/_Float16 types
Browse files Browse the repository at this point in the history
  • Loading branch information
Maratyszcza committed Jun 20, 2024
1 parent 0854aa1 commit 8b39ef3
Show file tree
Hide file tree
Showing 3 changed files with 121 additions and 28 deletions.
49 changes: 49 additions & 0 deletions .github/workflows/cmake.yml
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,27 @@ jobs:
run: cmake --build build --parallel
- name: Test
run: ctest --test-dir build --parallel --output-on-failure
cmake-linux-x86-f16c:
runs-on: ubuntu-20.04
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- name: Update apt
run: sudo apt update
- name: Install multilib gcc
run: sudo apt install gcc-multilib g++-multilib
- name: Install ninja
run: sudo apt install ninja-build
- name: Configure
run: cmake -Bbuild -S. -G Ninja -DCMAKE_BUILD_TYPE=Release -DFP16_BUILD_COMPARATIVE_BENCHMARKS=ON
env:
CFLAGS: "-m32 -mf16c"
CXXFLAGS: "-m32 -mf16c"
LDFLAGS: "-m32"
- name: Build
run: cmake --build build --parallel
- name: Test
run: ctest --test-dir build --parallel --output-on-failure
cmake-macos-x86_64:
runs-on: macos-12
timeout-minutes: 15
Expand Down Expand Up @@ -115,6 +136,20 @@ jobs:
run: cmake --build build --config Release --parallel
- name: Test
run: ctest --test-dir build --build-config Release --parallel --output-on-failure
cmake-windows-x86-avx2:
runs-on: windows-2019
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- name: Configure
run: cmake -Bbuild -S. -G "Visual Studio 16 2019" -A Win32 -DFP16_BUILD_COMPARATIVE_BENCHMARKS=ON
env:
CFLAGS: "/arch:AVX2"
CXXFLAGS: "/arch:AVX2"
- name: Build
run: cmake --build build --config Release --parallel
- name: Test
run: ctest --test-dir build --build-config Release --parallel --output-on-failure
cmake-windows-x64:
runs-on: windows-2019
timeout-minutes: 15
Expand All @@ -126,6 +161,20 @@ jobs:
run: cmake --build build --config Release --parallel
- name: Test
run: ctest --test-dir build --build-config Release --parallel --output-on-failure
cmake-windows-x64-avx2:
runs-on: windows-2019
timeout-minutes: 15
steps:
- uses: actions/checkout@v4
- name: Configure
run: cmake -Bbuild -S. -G "Visual Studio 16 2019" -A x64 -DFP16_BUILD_COMPARATIVE_BENCHMARKS=ON
env:
CFLAGS: "/arch:AVX2"
CXXFLAGS: "/arch:AVX2"
- name: Build
run: cmake --build build --config Release --parallel
- name: Test
run: ctest --test-dir build --build-config Release --parallel --output-on-failure
cmake-windows-arm64:
runs-on: windows-2019
timeout-minutes: 15
Expand Down
86 changes: 58 additions & 28 deletions include/fp16/fp16.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,19 @@
#include <math.h>
#endif

#ifdef _MSC_VER
#include <intrin.h>
#endif

#include <fp16/bitcasts.h>
#include <fp16/macros.h>

#if defined(_MSC_VER)
#include <intrin.h>
#endif
#if defined(__F16C__) && FP16_USE_NATIVE_CONVERSION && !FP16_USE_FLOAT16_TYPE && !FP16_USE_FP16_TYPE
#include <immintrin.h>
#endif
#if (defined(__aarch64__) || defined(_M_ARM64)) && FP16_USE_NATIVE_CONVERSION && !FP16_USE_FLOAT16_TYPE && !FP16_USE_FP16_TYPE
#include <arm_neon.h>
#endif


/*
* Convert a 16-bit floating-point number in IEEE half-precision format, in bit representation, to
Expand Down Expand Up @@ -107,18 +113,30 @@ static inline uint32_t fp16_ieee_to_fp32_bits(uint16_t h) {
* floating-point operations and bitcasts between integer and floating-point variables.
*/
static inline float fp16_ieee_to_fp32_value(uint16_t h) {
#if FP16_USE_FLOAT16_TYPE
union {
uint16_t as_bits;
_Float16 as_value;
} fp16 = { h };
return (float) fp16.as_value;
#elif FP16_USE_FP16_TYPE
union {
uint16_t as_bits;
__fp16 as_value;
} fp16 = { h };
return (float) fp16.as_value;
#if FP16_USE_NATIVE_CONVERSION
#if FP16_USE_FLOAT16_TYPE
union {
uint16_t as_bits;
_Float16 as_value;
} fp16 = { h };
return (float) fp16.as_value;
#elif FP16_USE_FP16_TYPE
union {
uint16_t as_bits;
__fp16 as_value;
} fp16 = { h };
return (float) fp16.as_value;
#else
#if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__)
return _cvtsh_ss((unsigned short) h);
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128((int) (unsigned int) h)));
#elif defined(_M_ARM64) || defined(__aarch64__)
return vget_lane_f32(vcvt_f32_f16(vreinterpret_f16_u16(vdup_n_u16(h))), 0);
#else
#error "Archtecture- or compiler-specific implementation required"
#endif
#endif
#else
/*
* Extend the half-precision floating-point number to 32 bits and shift to the upper part of the 32-bit word:
Expand Down Expand Up @@ -236,18 +254,30 @@ static inline float fp16_ieee_to_fp32_value(uint16_t h) {
* floating-point operations and bitcasts between integer and floating-point variables.
*/
static inline uint16_t fp16_ieee_from_fp32_value(float f) {
#if FP16_USE_FLOAT16_TYPE
union {
_Float16 as_value;
uint16_t as_bits;
} fp16 = { (_Float16) f };
return fp16.as_bits;
#elif FP16_USE_FP16_TYPE
union {
__fp16 as_value;
uint16_t as_bits;
} fp16 = { (__fp16) f };
return fp16.as_bits;
#if FP16_USE_NATIVE_CONVERSION
#if FP16_USE_FLOAT16_TYPE
union {
_Float16 as_value;
uint16_t as_bits;
} fp16 = { (_Float16) f };
return fp16.as_bits;
#elif FP16_USE_FP16_TYPE
union {
__fp16 as_value;
uint16_t as_bits;
} fp16 = { (__fp16) f };
return fp16.as_bits;
#else
#if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__)
return _cvtss_sh(f, _MM_FROUND_CUR_DIRECTION);
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
return (uint16_t) _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(f), _MM_FROUND_CUR_DIRECTION))
#elif defined(_M_ARM64) || defined(__aarch64__)
return vget_lane_u16(vcvt_f16_f32(vdupq_n_f32(f)), 0);
#else
#error "Archtecture- or compiler-specific implementation required"
#endif
#endif
#else
#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) || defined(__GNUC__) && !defined(__STRICT_ANSI__)
const float scale_to_inf = 0x1.0p+112f;
Expand Down
14 changes: 14 additions & 0 deletions include/fp16/macros.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@
#ifndef FP16_MACROS_H
#define FP16_MACROS_H

#ifndef FP16_USE_NATIVE_CONVERSION
#if (defined(__INTEL_COMPILER) || defined(__GNUC__)) && defined(__F16C__)
#define FP16_USE_NATIVE_CONVERSION 1
#elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && defined(__AVX2__)
#define FP16_USE_NATIVE_CONVERSION 1
#elif defined(_MSC_VER) && defined(_M_ARM64)
#define FP16_USE_NATIVE_CONVERSION 1
#elif defined(__GNUC__) && defined(__aarch64__)
#define FP16_USE_NATIVE_CONVERSION 1
#endif
#if !defined(FP16_USE_NATIVE_CONVERSION)
#define FP16_USE_NATIVE_CONVERSION 0
#endif // !defined(FP16_USE_NATIVE_CONVERSION)
#endif // !define(FP16_USE_NATIVE_CONVERSION)

#ifndef FP16_USE_FLOAT16_TYPE
#if !defined(__clang__) && !defined(__INTEL_COMPILER) && defined(__GNUC__) && (__GNUC__ >= 12)
Expand Down

0 comments on commit 8b39ef3

Please sign in to comment.