diff --git a/README.md b/README.md index 42a4abc3..214b629e 100644 --- a/README.md +++ b/README.md @@ -132,8 +132,6 @@ TODO List: ### vftintrneh.l.s -### vreplgr2vr.b/h/w/d - ### vrotri.b/h/w/d ### vsrlri.b/h/w/d diff --git a/code/common.h b/code/common.h index 99fa18c9..6de5c0c2 100644 --- a/code/common.h +++ b/code/common.h @@ -154,6 +154,17 @@ void print(const char *s, __m128d num) { #define FUZZ_N 128 +#define FUZZ0(func, ...) \ + do { \ + for (int i = 0; i < FUZZ_N; i++) { \ + if (func(__VA_ARGS__) != __lsx_##func(__VA_ARGS__)) { \ + PRINT(__lsx_##func(__VA_ARGS__)); \ + PRINT(func(__VA_ARGS__)); \ + assert(func(__VA_ARGS__) == __lsx_##func(__VA_ARGS__)); \ + } \ + } \ + } while (0); + #define FUZZ1(func, ...) \ do { \ for (int i = 0; i < FUZZ_N; i++) { \ diff --git a/code/gen_impl.py b/code/gen_impl.py index c0cf2c7a..c90aa6d2 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -471,6 +471,13 @@ file=f, ) print(f"}}", file=f) + with open(f"vreplgr2vr_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = val;", + file=f, + ) + print(f"}}", file=f) with open(f"vsigncov_{width}.h", "w") as f: print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) print( diff --git a/code/gen_tb.py b/code/gen_tb.py index 7d86b058..83e6400b 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -88,6 +88,7 @@ "vrotr": (widths_signed, "v128 a, v128 b"), "vreplve": (widths_signed, "v128 a, int idx", [0, 1]), "vreplvei": (widths_signed, "v128 a, int idx", [0, 1]), + "vreplgr2vr": (widths_signed, "int val", [0, 1, 256]), "vsadd": (widths_all, "v128 a, v128 b"), "vseq": (widths_signed, "v128 a, v128 b"), "vseqi": (widths_signed, "v128 a, int imm", [-16, 0, 15]), diff --git a/code/vreplgr2vr_b.cpp b/code/vreplgr2vr_b.cpp new file mode 100644 index 00000000..2e3a918a --- /dev/null +++ b/code/vreplgr2vr_b.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vreplgr2vr_b(int val) { + v128 dst; +#include "vreplgr2vr_b.h" + return dst; +} + +void test() { + FUZZ0(vreplgr2vr_b, 0); + FUZZ0(vreplgr2vr_b, 1); + FUZZ0(vreplgr2vr_b, 256); +} diff --git a/code/vreplgr2vr_b.h b/code/vreplgr2vr_b.h new file mode 100644 index 00000000..9c70e26f --- /dev/null +++ b/code/vreplgr2vr_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = val; +} diff --git a/code/vreplgr2vr_d.cpp b/code/vreplgr2vr_d.cpp new file mode 100644 index 00000000..14b65b2a --- /dev/null +++ b/code/vreplgr2vr_d.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vreplgr2vr_d(int val) { + v128 dst; +#include "vreplgr2vr_d.h" + return dst; +} + +void test() { + FUZZ0(vreplgr2vr_d, 0); + FUZZ0(vreplgr2vr_d, 1); + FUZZ0(vreplgr2vr_d, 256); +} diff --git a/code/vreplgr2vr_d.h b/code/vreplgr2vr_d.h new file mode 100644 index 00000000..224b924f --- /dev/null +++ b/code/vreplgr2vr_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = val; +} diff --git a/code/vreplgr2vr_h.cpp b/code/vreplgr2vr_h.cpp new file mode 100644 index 00000000..268c362e --- /dev/null +++ b/code/vreplgr2vr_h.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vreplgr2vr_h(int val) { + v128 dst; +#include "vreplgr2vr_h.h" + return dst; +} + +void test() { + FUZZ0(vreplgr2vr_h, 0); + FUZZ0(vreplgr2vr_h, 1); + FUZZ0(vreplgr2vr_h, 256); +} diff --git a/code/vreplgr2vr_h.h b/code/vreplgr2vr_h.h new file mode 100644 index 00000000..24e0d18d --- /dev/null +++ b/code/vreplgr2vr_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = val; +} diff --git a/code/vreplgr2vr_w.cpp b/code/vreplgr2vr_w.cpp new file mode 100644 index 00000000..2a55ae75 --- /dev/null +++ b/code/vreplgr2vr_w.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vreplgr2vr_w(int val) { + v128 dst; +#include "vreplgr2vr_w.h" + return dst; +} + +void test() { + FUZZ0(vreplgr2vr_w, 0); + FUZZ0(vreplgr2vr_w, 1); + FUZZ0(vreplgr2vr_w, 256); +} diff --git a/code/vreplgr2vr_w.h b/code/vreplgr2vr_w.h new file mode 100644 index 00000000..8b67b069 --- /dev/null +++ b/code/vreplgr2vr_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = val; +} diff --git a/docs/lsx/misc.md b/docs/lsx/misc.md index e2c6b317..24b4e04d 100644 --- a/docs/lsx/misc.md +++ b/docs/lsx/misc.md @@ -44,6 +44,11 @@ {{ vpickod('w') }} {{ vpickod('d') }} +{{ vreplgr2vr('b') }} +{{ vreplgr2vr('h') }} +{{ vreplgr2vr('w') }} +{{ vreplgr2vr('d') }} + {{ vreplve('b') }} {{ vreplve('h') }} {{ vreplve('w') }} diff --git a/main.py b/main.py index f0f53747..b3d71529 100644 --- a/main.py +++ b/main.py @@ -800,6 +800,19 @@ def vreplvei(name): desc=f"Repeat the element in lane `idx` of `a` to whole vector.", ) + @env.macro + def vreplgr2vr(name): + width = widths[name] + if name == "d": + long = "long " + else: + long = "" + return instruction( + intrinsic=f"__m128i __lsx_vreplgr2vr_{name} ({long}int val)", + instr=f"vreplgr2vr.{name} vr, r", + desc=f"Repeat `val` to whole vector.", + ) + @env.macro def vsigncov(name): width = widths[name]