From 48fa2fd9e4653657fd0f126b2c20963665ea933b Mon Sep 17 00:00:00 2001
From: Marat Dukhan <maratek@gmail.com>
Date: Mon, 27 May 2024 20:34:40 -0700
Subject: [PATCH] Test x87 FPU build in CI

---
 .github/workflows/cmake.yml | 12 ++++++----
 include/fp16/fp16.h         |  7 +++++-
 test/bitcasts.cc            | 48 +++++++++++++++++++++++++++++++++----
 3 files changed, 57 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml
index faeaf6c..c1a49b5 100644
--- a/.github/workflows/cmake.yml
+++ b/.github/workflows/cmake.yml
@@ -25,7 +25,7 @@ jobs:
       - name: Build
         run: cmake --build build --parallel
       - name: Test
-        run: ctest --test-dir build --parallel
+        run: ctest --test-dir build --parallel --output-on-failure
   cmake-macos-x86_64:
     runs-on: macos-12
     timeout-minutes: 15
@@ -36,7 +36,7 @@ jobs:
       - name: Build
         run: cmake --build build --config Release --parallel -- -quiet
       - name: Test
-        run: ctest --test-dir build --build-config Release --parallel
+        run: ctest --test-dir build --build-config Release --parallel --output-on-failure
   cmake-macos-arm64:
     runs-on: macos-14
     timeout-minutes: 15
@@ -47,7 +47,7 @@ jobs:
       - name: Build
         run: cmake --build build --config Release --parallel -- -quiet
       - name: Test
-        run: ctest --test-dir build --build-config Release --parallel
+        run: ctest --test-dir build --build-config Release --parallel --output-on-failure
   cmake-windows-x86:
     runs-on: windows-2019
     timeout-minutes: 15
@@ -55,10 +55,12 @@ jobs:
       - uses: actions/checkout@v4
       - name: Configure
         run: cmake -Bbuild -S. -G "Visual Studio 16 2019" -A Win32 -DFP16_BUILD_COMPARATIVE_BENCHMARKS=ON
+        env:
+          CXXFLAGS: "/arch:IA32"
       - name: Build
         run: cmake --build build --config Release --parallel
       - name: Test
-        run: ctest --test-dir build --build-config Release --parallel
+        run: ctest --test-dir build --build-config Release --parallel --output-on-failure
   cmake-windows-x64:
     runs-on: windows-2019
     timeout-minutes: 15
@@ -69,7 +71,7 @@ jobs:
       - name: Build
         run: cmake --build build --config Release --parallel
       - name: Test
-        run: ctest --test-dir build --build-config Release --parallel
+        run: ctest --test-dir build --build-config Release --parallel --output-on-failure
   cmake-windows-arm64:
     runs-on: windows-2019
     timeout-minutes: 15
diff --git a/include/fp16/fp16.h b/include/fp16/fp16.h
index 2b61fff..1a35ed5 100644
--- a/include/fp16/fp16.h
+++ b/include/fp16/fp16.h
@@ -228,7 +228,12 @@ static inline uint16_t fp16_ieee_from_fp32_value(float f) {
 	const float scale_to_inf = fp32_from_bits(UINT32_C(0x77800000));
 	const float scale_to_zero = fp32_from_bits(UINT32_C(0x08800000));
 #endif
-	float base = (fabsf(f) * scale_to_inf) * scale_to_zero;
+#if defined(_MSC_VER) && defined(_M_IX86_FP) && (_M_IX86_FP == 0)
+	const volatile float saturated_f = fabsf(f) * scale_to_inf;
+#else
+	const float saturated_f = fabsf(f) * scale_to_inf;
+#endif
+	float base = saturated_f * scale_to_zero;
 
 	const uint32_t w = fp32_to_bits(f);
 	const uint32_t shl1_w = w + w;
diff --git a/test/bitcasts.cc b/test/bitcasts.cc
index f7be29d..b106935 100644
--- a/test/bitcasts.cc
+++ b/test/bitcasts.cc
@@ -6,7 +6,7 @@
 
 
 TEST(FP32_TO_BITS, positive) {
-	for (uint32_t bits = UINT32_C(0x00000000); bits <= UINT32_C(0x7FFFFFFF); bits++) {
+	for (uint32_t bits = UINT32_C(0x00000000); bits <= UINT32_C(0x7F800000); bits++) {
 		float value;
 		memcpy(&value, &bits, sizeof(value));
 
@@ -18,7 +18,7 @@ TEST(FP32_TO_BITS, positive) {
 }
 
 TEST(FP32_TO_BITS, negative) {
-	for (uint32_t bits = UINT32_C(0xFFFFFFFF); bits >= UINT32_C(0x80000000); bits--) {
+	for (uint32_t bits = UINT32_C(0xFF800000); bits >= UINT32_C(0x80000000); bits--) {
 		float value;
 		memcpy(&value, &bits, sizeof(value));
 
@@ -29,8 +29,30 @@ TEST(FP32_TO_BITS, negative) {
 	}
 }
 
+TEST(FP32_TO_BITS, nan) {
+	for (uint32_t bits = UINT32_C(0x7F800001); bits <= UINT32_C(0x7FFFFFFF); bits++) {
+		float value;
+		memcpy(&value, &bits, sizeof(value));
+
+		ASSERT_GT(fp32_to_bits(value) & UINT32_C(0x7FFFFFFF), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"BITS = 0x" << std::setw(8) << bits << ", " <<
+			"BITCAST(VALUE) = 0x" << std::setw(8) << fp32_to_bits(value);
+	}
+
+	for (uint32_t bits = UINT32_C(0xFFFFFFFF); bits >= UINT32_C(0xFF800001); bits--) {
+		float value;
+		memcpy(&value, &bits, sizeof(value));
+
+		ASSERT_GT(fp32_to_bits(value) & UINT32_C(0x7FFFFFFF), UINT32_C(0x7F800000)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"BITS = 0x" << std::setw(8) << bits << ", " <<
+			"BITCAST(VALUE) = 0x" << std::setw(8) << fp32_to_bits(value);
+	}
+}
+
 TEST(FP32_FROM_BITS, positive) {
-	for (uint32_t bits = UINT32_C(0x00000000); bits <= UINT32_C(0x7FFFFFFF); bits++) {
+	for (uint32_t bits = UINT32_C(0x00000000); bits <= UINT32_C(0x7F800000); bits++) {
 		const float value = fp32_from_bits(bits);
 		uint32_t bitcast;
 		memcpy(&bitcast, &value, sizeof(bitcast));
@@ -43,7 +65,7 @@ TEST(FP32_FROM_BITS, positive) {
 }
 
 TEST(FP32_FROM_BITS, negative) {
-	for (uint32_t bits = UINT32_C(0xFFFFFFFF); bits >= UINT32_C(0x80000000); bits--) {
+	for (uint32_t bits = UINT32_C(0xFF800000); bits >= UINT32_C(0x80000000); bits--) {
 		const float value = fp32_from_bits(bits);
 		uint32_t bitcast;
 		memcpy(&bitcast, &value, sizeof(bitcast));
@@ -54,3 +76,21 @@ TEST(FP32_FROM_BITS, negative) {
 			"VALUE = 0x" << std::setw(8) << bitcast;
 	}
 }
+
+TEST(FP32_FROM_BITS, nan) {
+	for (uint32_t bits = UINT32_C(0x7F800001); bits <= UINT32_C(0x7FFFFFFF); bits++) {
+		const float value = fp32_from_bits(bits);
+
+		ASSERT_TRUE(std::isnan(value)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"BITS = 0x" << std::setw(8) << bits;
+	}
+
+	for (uint32_t bits = UINT32_C(0xFFFFFFFF); bits >= UINT32_C(0xFF800001); bits--) {
+		const float value = fp32_from_bits(bits);
+
+		ASSERT_TRUE(std::isnan(value)) <<
+			std::hex << std::uppercase << std::setfill('0') <<
+			"BITS = 0x" << std::setw(8) << bits;
+	}
+}