diff --git a/code/gen_impl.py b/code/gen_impl.py index 9d05528f..7f66212f 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -53,6 +53,11 @@ "d": "fp64", } +widths_fp = { + "s": 32, + "d": 64, +} + for width in ["b", "bu", "h", "hu", "w", "wu", "d", "du"]: w = widths[width] m = members[width] @@ -763,6 +768,7 @@ for width in ["s", "d"]: m = members_fp[width] + w = widths_fp[width] for name, op in [("div", "/"), ("mul", "*"), ("sub", "-"), ("add", "+")]: with open(f"vf{name}_{width}.h", "w") as f: print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) diff --git a/code/vfadd_d.cpp b/code/vfadd_d.cpp new file mode 100644 index 00000000..63fd65a6 --- /dev/null +++ b/code/vfadd_d.cpp @@ -0,0 +1,19 @@ +#include "common.h" + +v128 vfadd_d(v128 a, v128 b) { + v128 dst; +#include "vfadd_d.h" + return dst; +} + +void test() { + { + __m128d a = {1.0, 2.0}; + __m128d b = {5.0, 6.0}; + PRINT(a); + PRINT(b); + PRINT(__lsx_vfadd_d(a, b)); + PRINT(vfadd_d(a, b)); + assert(vfadd_d(a, b) == __lsx_vfadd_d(a, b)); + } +} diff --git a/code/vfadd_s.cpp b/code/vfadd_s.cpp new file mode 100644 index 00000000..cb328089 --- /dev/null +++ b/code/vfadd_s.cpp @@ -0,0 +1,19 @@ +#include "common.h" + +v128 vfadd_s(v128 a, v128 b) { + v128 dst; +#include "vfadd_s.h" + return dst; +} + +void test() { + { + __m128 a = {1.0, 2.0, 3.0, 4.0}; + __m128 b = {5.0, 6.0, 7.0, 8.0}; + PRINT(a); + PRINT(b); + PRINT(__lsx_vfadd_s(a, b)); + PRINT(vfadd_s(a, b)); + assert(vfadd_s(a, b) == __lsx_vfadd_s(a, b)); + } +} diff --git a/code/vfadd_s.h b/code/vfadd_s.h index 3f67c7f7..401f4385 100644 --- a/code/vfadd_s.h +++ b/code/vfadd_s.h @@ -1,3 +1,3 @@ -for (int i = 0; i < 2; i++) { +for (int i = 0; i < 4; i++) { dst.fp32[i] = a.fp32[i] + b.fp32[i]; } diff --git a/code/vfdiv_s.h b/code/vfdiv_s.h index a13b10ee..34f2464a 100644 --- a/code/vfdiv_s.h +++ b/code/vfdiv_s.h @@ -1,3 +1,3 @@ -for (int i = 0; i < 2; i++) { +for (int i = 0; i < 4; i++) { dst.fp32[i] = a.fp32[i] / b.fp32[i]; } diff --git a/code/vfmax_s.h b/code/vfmax_s.h index 85a2b177..f30ee8f5 100644 --- a/code/vfmax_s.h +++ b/code/vfmax_s.h @@ -1,3 +1,3 @@ -for (int i = 0; i < 2; i++) { +for (int i = 0; i < 4; i++) { dst.fp32[i] = +(a.fp32[i], b.fp32[i]); } diff --git a/code/vfmaxa_s.h b/code/vfmaxa_s.h index 90c964e9..2c703f1a 100644 --- a/code/vfmaxa_s.h +++ b/code/vfmaxa_s.h @@ -1,3 +1,3 @@ -for (int i = 0; i < 2; i++) { +for (int i = 0; i < 4; i++) { dst.fp32[i] = (abs(a.fp32[i]) > abs(b.fp32[i])) ? a.fp32[i] : b.fp32[i]; } diff --git a/code/vfmin_s.h b/code/vfmin_s.h index 85a2b177..f30ee8f5 100644 --- a/code/vfmin_s.h +++ b/code/vfmin_s.h @@ -1,3 +1,3 @@ -for (int i = 0; i < 2; i++) { +for (int i = 0; i < 4; i++) { dst.fp32[i] = +(a.fp32[i], b.fp32[i]); } diff --git a/code/vfmina_s.h b/code/vfmina_s.h index da77ed57..834c5a0a 100644 --- a/code/vfmina_s.h +++ b/code/vfmina_s.h @@ -1,3 +1,3 @@ -for (int i = 0; i < 2; i++) { +for (int i = 0; i < 4; i++) { dst.fp32[i] = (abs(a.fp32[i]) < abs(b.fp32[i])) ? a.fp32[i] : b.fp32[i]; } diff --git a/code/vfmul_s.h b/code/vfmul_s.h index 93fc790b..13c64799 100644 --- a/code/vfmul_s.h +++ b/code/vfmul_s.h @@ -1,3 +1,3 @@ -for (int i = 0; i < 2; i++) { +for (int i = 0; i < 4; i++) { dst.fp32[i] = a.fp32[i] * b.fp32[i]; } diff --git a/code/vfsub_d.cpp b/code/vfsub_d.cpp new file mode 100644 index 00000000..e1be91c0 --- /dev/null +++ b/code/vfsub_d.cpp @@ -0,0 +1,19 @@ +#include "common.h" + +v128 vfsub_d(v128 a, v128 b) { + v128 dst; +#include "vfsub_d.h" + return dst; +} + +void test() { + { + __m128d a = {1.0, 2.0}; + __m128d b = {5.0, 6.0}; + PRINT(a); + PRINT(b); + PRINT(__lsx_vfsub_d(a, b)); + PRINT(vfsub_d(a, b)); + assert(vfsub_d(a, b) == __lsx_vfsub_d(a, b)); + } +} diff --git a/code/vfsub_s.cpp b/code/vfsub_s.cpp new file mode 100644 index 00000000..3042e3f3 --- /dev/null +++ b/code/vfsub_s.cpp @@ -0,0 +1,19 @@ +#include "common.h" + +v128 vfsub_s(v128 a, v128 b) { + v128 dst; +#include "vfsub_s.h" + return dst; +} + +void test() { + { + __m128 a = {1.0, 2.0, 3.0, 4.0}; + __m128 b = {5.0, 6.0, 7.0, 8.0}; + PRINT(a); + PRINT(b); + PRINT(__lsx_vfsub_s(a, b)); + PRINT(vfsub_s(a, b)); + assert(vfsub_s(a, b) == __lsx_vfsub_s(a, b)); + } +} diff --git a/code/vfsub_s.h b/code/vfsub_s.h index 4937b20b..efe840f9 100644 --- a/code/vfsub_s.h +++ b/code/vfsub_s.h @@ -1,3 +1,3 @@ -for (int i = 0; i < 2; i++) { +for (int i = 0; i < 4; i++) { dst.fp32[i] = a.fp32[i] - b.fp32[i]; }