Add vfcvt for single <-> double

jiegec · Dec 12, 2023 · 614b0b1 · 614b0b1
1 parent cbdcf78
commit 614b0b1
Show file tree

Hide file tree

Showing 15 changed files with 151 additions and 23 deletions.
diff --git a/README.md b/README.md
@@ -202,7 +202,7 @@ Vector Multiplication High
 
 ### vfmina.s/d
 
-### vfcvf.h.s/s.d
+### vfcvt.h.s
 
 ### vffint.s.l
 
@@ -282,10 +282,6 @@ Vector Multiplication High
 
 ### vfcvth.s.h
 
-### vfcvtl.d.s
-
-### vfcvth.d.s
-
 ### vffint.s.w/wu/d.l/d.lu
 
 ### vffintl.d.w

diff --git a/code/Makefile b/code/Makefile
@@ -13,3 +13,5 @@ clean:
 
 %: %.cpp %.h
 	$(CXX) $< -mlsx -mlasx -o $@
+
+.SUFFIXES:
diff --git a/code/common.h b/code/common.h
@@ -55,16 +55,23 @@ union v128 {
   u16 half[8];
   u32 word[4];
   u64 dword[2];
-  __int128 qword[1];
+  u128 qword[1];
+
+  float fp32[4];
+  double fp64[2];
 
   v128(__m128i other) { m128i = other; }
+  v128(__m128d other) { m128d = other; }
+  v128(__m128 other) { m128 = other; }
   v128() {
     for (int i = 0; i < 8; i++) {
       half[i] = rand();
     }
   }
 
   operator __m128i() { return m128i; }
+  operator __m128() { return m128; }
+  operator __m128d() { return m128d; }
   // duplicate with __m128i
   // operator v2i64() { return __v2i64; }
   operator v2u64() { return __v2u64; }
@@ -77,10 +84,20 @@ union v128 {
   bool operator==(const v128 &other) const {
     return memcmp(byte, other.byte, 16) == 0;
   }
+  bool operator!=(const v128 &other) const {
+    return memcmp(byte, other.byte, 16) != 0;
+  }
 };
 
 void test();
 
+void print(const char *s, v128 num) {
+  printf("v128 as __m128i %s: %016lx %016lx\n", s, num.dword[0], num.dword[1]);
+  printf("v128 as __m128 %s: %f %f %f %f\n", s, num.fp32[0], num.fp32[1],
+         num.fp32[2], num.fp32[3]);
+  printf("v128 as __m128d %s: %lf %lf\n", s, num.fp64[0], num.fp64[1]);
+}
+
 void print(const char *s, __m128i num) {
   printf("__m128i %s: %016llx %016llx\n", s, num[0], num[1]);
 }
@@ -99,37 +116,45 @@ void print(const char *s, __m128d num) {
   do {                                                                         \
     for (int i = 0; i < 64; i++) {                                             \
       v128 a;                                                                  \
-      PRINT(a);                                                                \
-      PRINT(__lsx_##func(a __VA_OPT__(, ) __VA_ARGS__));                       \
-      PRINT(func(a __VA_OPT__(, ) __VA_ARGS__));                               \
-      assert(func(a __VA_OPT__(, ) __VA_ARGS__) ==                             \
-             __lsx_##func(a __VA_OPT__(, ) __VA_ARGS__));                      \
+      if (func(a __VA_OPT__(, ) __VA_ARGS__) !=                                \
+          __lsx_##func(a __VA_OPT__(, ) __VA_ARGS__)) {                        \
+        PRINT(a);                                                              \
+        PRINT(__lsx_##func(a __VA_OPT__(, ) __VA_ARGS__));                     \
+        PRINT(func(a __VA_OPT__(, ) __VA_ARGS__));                             \
+        assert(func(a __VA_OPT__(, ) __VA_ARGS__) ==                           \
+               __lsx_##func(a __VA_OPT__(, ) __VA_ARGS__));                    \
+      }                                                                        \
     }                                                                          \
   } while (0);
 
 #define FUZZ2(func, ...)                                                       \
   do {                                                                         \
     for (int i = 0; i < 64; i++) {                                             \
       v128 a, b;                                                               \
-      PRINT(a);                                                                \
-      PRINT(b);                                                                \
-      PRINT(__lsx_##func(a, b __VA_OPT__(, ) __VA_ARGS__));                    \
-      PRINT(func(a, b __VA_OPT__(, ) __VA_ARGS__));                            \
-      assert(func(a, b __VA_OPT__(, ) __VA_ARGS__) ==                          \
-             __lsx_##func(a, b __VA_OPT__(, ) __VA_ARGS__));                   \
+      if (func(a, b __VA_OPT__(, ) __VA_ARGS__) !=                             \
+          __lsx_##func(a, b __VA_OPT__(, ) __VA_ARGS__)) {                     \
+        PRINT(a);                                                              \
+        PRINT(b);                                                              \
+        PRINT(__lsx_##func(a, b __VA_OPT__(, ) __VA_ARGS__));                  \
+        PRINT(func(a, b __VA_OPT__(, ) __VA_ARGS__));                          \
+        assert(func(a, b __VA_OPT__(, ) __VA_ARGS__) ==                        \
+               __lsx_##func(a, b __VA_OPT__(, ) __VA_ARGS__));                 \
+      }                                                                        \
     }                                                                          \
   } while (0);
 
 #define FUZZ3(func)                                                            \
   do {                                                                         \
     for (int i = 0; i < 64; i++) {                                             \
       v128 a, b, c;                                                            \
-      PRINT(a);                                                                \
-      PRINT(b);                                                                \
-      PRINT(c);                                                                \
-      PRINT(__lsx_##func(a, b, c));                                            \
-      PRINT(func(a, b, c));                                                    \
-      assert(func(a, b, c) == __lsx_##func(a, b, c));                          \
+      if (func(a, b, c) != __lsx_##func(a, b, c)) {                            \
+        PRINT(a);                                                              \
+        PRINT(b);                                                              \
+        PRINT(c);                                                              \
+        PRINT(__lsx_##func(a, b, c));                                          \
+        PRINT(func(a, b, c));                                                  \
+        assert(func(a, b, c) == __lsx_##func(a, b, c));                        \
+      }                                                                        \
     }                                                                          \
   } while (0);
 

diff --git a/code/vfcmp.h b/code/vfcmp.h
diff --git a/code/vfcvt_s_d.cpp b/code/vfcvt_s_d.cpp
@@ -0,0 +1,9 @@
+#include "common.h"
+
+v128 vfcvt_s_d(v128 a, v128 b) {
+  v128 dst;
+#include "vfcvt_s_d.h"
+  return dst;
+}
+
+void test() { FUZZ2(vfcvt_s_d); }
diff --git a/code/vfcvt_s_d.h b/code/vfcvt_s_d.h
@@ -0,0 +1,7 @@
+for (int i = 0; i < 4; i++) {
+  if (i < 2) {
+    dst.fp32[i] = b.fp64[i];
+  } else {
+    dst.fp32[i] = a.fp64[i - 2];
+  }
+}
diff --git a/code/vfcvth_d_s.cpp b/code/vfcvth_d_s.cpp
@@ -0,0 +1,9 @@
+#include "common.h"
+
+v128 vfcvth_d_s(v128 a) {
+  v128 dst;
+#include "vfcvth_d_s.h"
+  return dst;
+}
+
+void test() { FUZZ1(vfcvth_d_s); }
diff --git a/code/vfcvth_d_s.h b/code/vfcvth_d_s.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp64[i] = a.fp32[2 + i];
+}
diff --git a/code/vfcvtl_d_s.cpp b/code/vfcvtl_d_s.cpp
@@ -0,0 +1,9 @@
+#include "common.h"
+
+v128 vfcvth_d_s(v128 a) {
+  v128 dst;
+#include "vfcvth_d_s.h"
+  return dst;
+}
+
+void test() { FUZZ1(vfcvth_d_s); }
diff --git a/code/vfcvtl_d_s.h b/code/vfcvtl_d_s.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp64[i] = a.fp32[i];
+}
diff --git a/code/vfmadd.h b/code/vfmadd.h
diff --git a/code/vfmsub.h b/code/vfmsub.h
diff --git a/code/vfnmadd.h b/code/vfnmadd.h
diff --git a/code/vfnmsub.h b/code/vfnmsub.h
diff --git a/docs/lsx/float_conversion.md b/docs/lsx/float_conversion.md
@@ -0,0 +1,65 @@
+# Floating Point Conversion
+
+## __m128d __lsx_vfcvth_d_s (__m128 a)
+
+### Synopsis
+
+```c++
+__m128d __lsx_vfcvth_d_s (__m128 a)
+#include <lsxintrin.h>
+Instruction: vfcvth.d.s vr, vr
+CPU Flags: LSX
+```
+
+### Description
+
+Convert single precision floating point elements in higher half of `a` to double precision.
+
+### Operation
+
+```c++
+{% include('vfcvth_d_s.h') %}
+```
+
+## __m128d __lsx_vfcvtl_d_s (__m128 a)
+
+### Synopsis
+
+```c++
+__m128d __lsx_vfcvtl_d_s (__m128 a)
+#include <lsxintrin.h>
+Instruction: vfcvtl.d.s vr, vr
+CPU Flags: LSX
+```
+
+### Description
+
+Convert single precision floating point elements in lower half of `a` to double precision.
+
+### Operation
+
+```c++
+{% include('vfcvtl_d_s.h') %}
+```
+
+
+## __m128 __lsx_vfcvt_s_d (__m128d a, __m128d b)
+
+### Synopsis
+
+```c++
+__m128 __lsx_vfcvt_s_d (__m128a, __m128d b)
+#include <lsxintrin.h>
+Instruction: vfcvt.s.d vr, vr, vr
+CPU Flags: LSX
+```
+
+### Description
+
+Convert double precision floating point elements in `a` and `b` to double precision.
+
+### Operation
+
+```c++
+{% include('vfcvt_s_d.h') %}
+```
Original file line number	Diff line number	Diff line change
Expand Up		@@ -13,3 +13,5 @@ clean:

		%: %.cpp %.h
		$(CXX) $< -mlsx -mlasx -o $@

		.SUFFIXES: