diff --git a/README.md b/README.md index 214b629e..34a475f1 100644 --- a/README.md +++ b/README.md @@ -132,12 +132,6 @@ TODO List: ### vftintrneh.l.s -### vrotri.b/h/w/d - -### vsrlri.b/h/w/d - -### vsrari.b/h/w/d - ### vpickve2gr.b/h/w/d/bu/hu/wu/du ### vsllwil.h.b/w.h/d.w diff --git a/code/gen_impl.py b/code/gen_impl.py index c90aa6d2..e0505e4e 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -401,6 +401,13 @@ file=f, ) print(f"}}", file=f) + with open(f"vrotri_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = (a.{m}[i] >> imm) | (a.{m}[i] << ({w} - imm));", + file=f, + ) + print(f"}}", file=f) with open(f"vsrlr_{width}.h", "w") as f: print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) print(f" if ((b.{m}[i] & 0x{w-1:x}) == 0) {{", file=f) @@ -415,6 +422,20 @@ ) print(f" }}", file=f) print(f"}}", file=f) + with open(f"vsrlri_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print(f" if (imm == 0) {{", file=f) + print( + f" dst.{m}[i] = a.{m}[i];", + file=f, + ) + print(f" }} else {{", file=f) + print( + f" dst.{m}[i] = (a.{m}[i] >> imm) + ((a.{m}[i] >> (imm - 1)) & 0x1);", + file=f, + ) + print(f" }}", file=f) + print(f"}}", file=f) with open(f"vsrar_{width}.h", "w") as f: print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) print(f" if ((b.{m}[i] & 0x{w-1:x}) == 0) {{", file=f) @@ -429,6 +450,20 @@ ) print(f" }}", file=f) print(f"}}", file=f) + with open(f"vsrari_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print(f" if (imm == 0) {{", file=f) + print( + f" dst.{m}[i] = a.{m}[i];", + file=f, + ) + print(f" }} else {{", file=f) + print( + f" dst.{m}[i] = ((s{w})a.{m}[i] >> imm) + (((s{w})a.{m}[i] >> (imm - 1)) & 0x1);", + file=f, + ) + print(f" }}", file=f) + print(f"}}", file=f) with open(f"vpackev_{width}.h", "w") as f: print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) print( diff --git a/code/gen_tb.py b/code/gen_tb.py index 83e6400b..7fe1045a 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -86,6 +86,7 @@ "vpickev": (widths_signed, "v128 a, v128 b"), "vpickod": (widths_signed, "v128 a, v128 b"), "vrotr": (widths_signed, "v128 a, v128 b"), + "vrotri": (widths_signed, "v128 a, int imm", [0, 7]), "vreplve": (widths_signed, "v128 a, int idx", [0, 1]), "vreplvei": (widths_signed, "v128 a, int idx", [0, 1]), "vreplgr2vr": (widths_signed, "int val", [0, 1, 256]), @@ -103,8 +104,10 @@ "vslei": (widths_all, "v128 a, int imm", [0, 15]), "vsrl": (widths_signed, "v128 a, v128 b"), "vsrlr": (widths_signed, "v128 a, v128 b"), + "vsrlri": (widths_signed, "v128 a, int imm", [0, 7]), "vsra": (widths_signed, "v128 a, v128 b"), "vsrar": (widths_signed, "v128 a, v128 b"), + "vsrari": (widths_signed, "v128 a, int imm", [0, 7]), "vsub": (widths_signed, "v128 a, v128 b"), "vsubwev": (widths_vsubw, "v128 a, v128 b"), "vsubwod": (widths_vsubw, "v128 a, v128 b"), diff --git a/code/vrotri_b.cpp b/code/vrotri_b.cpp new file mode 100644 index 00000000..2974614d --- /dev/null +++ b/code/vrotri_b.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vrotri_b(v128 a, int imm) { + v128 dst; +#include "vrotri_b.h" + return dst; +} + +void test() { + FUZZ1(vrotri_b, 0); + FUZZ1(vrotri_b, 7); +} diff --git a/code/vrotri_b.h b/code/vrotri_b.h new file mode 100644 index 00000000..101057e9 --- /dev/null +++ b/code/vrotri_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = (a.byte[i] >> imm) | (a.byte[i] << (8 - imm)); +} diff --git a/code/vrotri_d.cpp b/code/vrotri_d.cpp new file mode 100644 index 00000000..0b94e799 --- /dev/null +++ b/code/vrotri_d.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vrotri_d(v128 a, int imm) { + v128 dst; +#include "vrotri_d.h" + return dst; +} + +void test() { + FUZZ1(vrotri_d, 0); + FUZZ1(vrotri_d, 7); +} diff --git a/code/vrotri_d.h b/code/vrotri_d.h new file mode 100644 index 00000000..b31abe58 --- /dev/null +++ b/code/vrotri_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (a.dword[i] >> imm) | (a.dword[i] << (64 - imm)); +} diff --git a/code/vrotri_h.cpp b/code/vrotri_h.cpp new file mode 100644 index 00000000..831f91d1 --- /dev/null +++ b/code/vrotri_h.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vrotri_h(v128 a, int imm) { + v128 dst; +#include "vrotri_h.h" + return dst; +} + +void test() { + FUZZ1(vrotri_h, 0); + FUZZ1(vrotri_h, 7); +} diff --git a/code/vrotri_h.h b/code/vrotri_h.h new file mode 100644 index 00000000..561e9585 --- /dev/null +++ b/code/vrotri_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = (a.half[i] >> imm) | (a.half[i] << (16 - imm)); +} diff --git a/code/vrotri_w.cpp b/code/vrotri_w.cpp new file mode 100644 index 00000000..5b917d3c --- /dev/null +++ b/code/vrotri_w.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vrotri_w(v128 a, int imm) { + v128 dst; +#include "vrotri_w.h" + return dst; +} + +void test() { + FUZZ1(vrotri_w, 0); + FUZZ1(vrotri_w, 7); +} diff --git a/code/vrotri_w.h b/code/vrotri_w.h new file mode 100644 index 00000000..3c9c8b9b --- /dev/null +++ b/code/vrotri_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (a.word[i] >> imm) | (a.word[i] << (32 - imm)); +} diff --git a/code/vsrari_b.cpp b/code/vsrari_b.cpp new file mode 100644 index 00000000..39e1e03f --- /dev/null +++ b/code/vsrari_b.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrari_b(v128 a, int imm) { + v128 dst; +#include "vsrari_b.h" + return dst; +} + +void test() { + FUZZ1(vsrari_b, 0); + FUZZ1(vsrari_b, 7); +} diff --git a/code/vsrari_b.h b/code/vsrari_b.h new file mode 100644 index 00000000..4cc045b8 --- /dev/null +++ b/code/vsrari_b.h @@ -0,0 +1,7 @@ +for (int i = 0; i < 16; i++) { + if (imm == 0) { + dst.byte[i] = a.byte[i]; + } else { + dst.byte[i] = ((s8)a.byte[i] >> imm) + (((s8)a.byte[i] >> (imm - 1)) & 0x1); + } +} diff --git a/code/vsrari_d.cpp b/code/vsrari_d.cpp new file mode 100644 index 00000000..24965fd0 --- /dev/null +++ b/code/vsrari_d.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrari_d(v128 a, int imm) { + v128 dst; +#include "vsrari_d.h" + return dst; +} + +void test() { + FUZZ1(vsrari_d, 0); + FUZZ1(vsrari_d, 7); +} diff --git a/code/vsrari_d.h b/code/vsrari_d.h new file mode 100644 index 00000000..4a0df41a --- /dev/null +++ b/code/vsrari_d.h @@ -0,0 +1,8 @@ +for (int i = 0; i < 2; i++) { + if (imm == 0) { + dst.dword[i] = a.dword[i]; + } else { + dst.dword[i] = + ((s64)a.dword[i] >> imm) + (((s64)a.dword[i] >> (imm - 1)) & 0x1); + } +} diff --git a/code/vsrari_h.cpp b/code/vsrari_h.cpp new file mode 100644 index 00000000..79e61feb --- /dev/null +++ b/code/vsrari_h.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrari_h(v128 a, int imm) { + v128 dst; +#include "vsrari_h.h" + return dst; +} + +void test() { + FUZZ1(vsrari_h, 0); + FUZZ1(vsrari_h, 7); +} diff --git a/code/vsrari_h.h b/code/vsrari_h.h new file mode 100644 index 00000000..69d6765a --- /dev/null +++ b/code/vsrari_h.h @@ -0,0 +1,8 @@ +for (int i = 0; i < 8; i++) { + if (imm == 0) { + dst.half[i] = a.half[i]; + } else { + dst.half[i] = + ((s16)a.half[i] >> imm) + (((s16)a.half[i] >> (imm - 1)) & 0x1); + } +} diff --git a/code/vsrari_w.cpp b/code/vsrari_w.cpp new file mode 100644 index 00000000..27ac65a2 --- /dev/null +++ b/code/vsrari_w.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrari_w(v128 a, int imm) { + v128 dst; +#include "vsrari_w.h" + return dst; +} + +void test() { + FUZZ1(vsrari_w, 0); + FUZZ1(vsrari_w, 7); +} diff --git a/code/vsrari_w.h b/code/vsrari_w.h new file mode 100644 index 00000000..dd5d35be --- /dev/null +++ b/code/vsrari_w.h @@ -0,0 +1,8 @@ +for (int i = 0; i < 4; i++) { + if (imm == 0) { + dst.word[i] = a.word[i]; + } else { + dst.word[i] = + ((s32)a.word[i] >> imm) + (((s32)a.word[i] >> (imm - 1)) & 0x1); + } +} diff --git a/code/vsrlri_b.cpp b/code/vsrlri_b.cpp new file mode 100644 index 00000000..53754f9c --- /dev/null +++ b/code/vsrlri_b.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrlri_b(v128 a, int imm) { + v128 dst; +#include "vsrlri_b.h" + return dst; +} + +void test() { + FUZZ1(vsrlri_b, 0); + FUZZ1(vsrlri_b, 7); +} diff --git a/code/vsrlri_b.h b/code/vsrlri_b.h new file mode 100644 index 00000000..40f0e2e5 --- /dev/null +++ b/code/vsrlri_b.h @@ -0,0 +1,7 @@ +for (int i = 0; i < 16; i++) { + if (imm == 0) { + dst.byte[i] = a.byte[i]; + } else { + dst.byte[i] = (a.byte[i] >> imm) + ((a.byte[i] >> (imm - 1)) & 0x1); + } +} diff --git a/code/vsrlri_d.cpp b/code/vsrlri_d.cpp new file mode 100644 index 00000000..c8b347f6 --- /dev/null +++ b/code/vsrlri_d.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrlri_d(v128 a, int imm) { + v128 dst; +#include "vsrlri_d.h" + return dst; +} + +void test() { + FUZZ1(vsrlri_d, 0); + FUZZ1(vsrlri_d, 7); +} diff --git a/code/vsrlri_d.h b/code/vsrlri_d.h new file mode 100644 index 00000000..f7dfbf3d --- /dev/null +++ b/code/vsrlri_d.h @@ -0,0 +1,7 @@ +for (int i = 0; i < 2; i++) { + if (imm == 0) { + dst.dword[i] = a.dword[i]; + } else { + dst.dword[i] = (a.dword[i] >> imm) + ((a.dword[i] >> (imm - 1)) & 0x1); + } +} diff --git a/code/vsrlri_h.cpp b/code/vsrlri_h.cpp new file mode 100644 index 00000000..63fd108f --- /dev/null +++ b/code/vsrlri_h.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrlri_h(v128 a, int imm) { + v128 dst; +#include "vsrlri_h.h" + return dst; +} + +void test() { + FUZZ1(vsrlri_h, 0); + FUZZ1(vsrlri_h, 7); +} diff --git a/code/vsrlri_h.h b/code/vsrlri_h.h new file mode 100644 index 00000000..2bfb01c7 --- /dev/null +++ b/code/vsrlri_h.h @@ -0,0 +1,7 @@ +for (int i = 0; i < 8; i++) { + if (imm == 0) { + dst.half[i] = a.half[i]; + } else { + dst.half[i] = (a.half[i] >> imm) + ((a.half[i] >> (imm - 1)) & 0x1); + } +} diff --git a/code/vsrlri_w.cpp b/code/vsrlri_w.cpp new file mode 100644 index 00000000..78b626d9 --- /dev/null +++ b/code/vsrlri_w.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrlri_w(v128 a, int imm) { + v128 dst; +#include "vsrlri_w.h" + return dst; +} + +void test() { + FUZZ1(vsrlri_w, 0); + FUZZ1(vsrlri_w, 7); +} diff --git a/code/vsrlri_w.h b/code/vsrlri_w.h new file mode 100644 index 00000000..4c17b653 --- /dev/null +++ b/code/vsrlri_w.h @@ -0,0 +1,7 @@ +for (int i = 0; i < 4; i++) { + if (imm == 0) { + dst.word[i] = a.word[i]; + } else { + dst.word[i] = (a.word[i] >> imm) + ((a.word[i] >> (imm - 1)) & 0x1); + } +} diff --git a/docs/lsx/shift.md b/docs/lsx/shift.md index 5591fa1c..42444115 100644 --- a/docs/lsx/shift.md +++ b/docs/lsx/shift.md @@ -57,6 +57,11 @@ Compute 128-bit `a` shifted right by `imm * 8` bits. {{ vsrlr('w') }} {{ vsrlr('d') }} +{{ vsrlri('b') }} +{{ vsrlri('h') }} +{{ vsrlri('w') }} +{{ vsrlri('d') }} + {{ vsra('b') }} {{ vsra('h') }} {{ vsra('w') }} @@ -67,7 +72,17 @@ Compute 128-bit `a` shifted right by `imm * 8` bits. {{ vsrar('w') }} {{ vsrar('d') }} +{{ vsrari('b') }} +{{ vsrari('h') }} +{{ vsrari('w') }} +{{ vsrari('d') }} + {{ vrotr('b') }} {{ vrotr('h') }} {{ vrotr('w') }} {{ vrotr('d') }} + +{{ vrotri('b') }} +{{ vrotri('h') }} +{{ vrotri('w') }} +{{ vrotri('d') }} \ No newline at end of file diff --git a/main.py b/main.py index b3d71529..e38a9acb 100644 --- a/main.py +++ b/main.py @@ -725,6 +725,16 @@ def vrotr(name): desc=f"Rotate right the unsigned {width}-bit elements in `a` by elements in `b`, store the result to `dst`.", ) + @env.macro + def vrotri(name): + width = widths[name] + signedness = signednesses[name] + return instruction( + intrinsic=f"__m128i __lsx_vrotri_{name} (__m128i a, imm0_{width-1} imm)", + instr=f"vrotri.{name} vr, vr, imm", + desc=f"Rotate right the unsigned {width}-bit elements in `a` by `imm`, store the result to `dst`.", + ) + @env.macro def vsrlr(name): width = widths[name] @@ -735,6 +745,16 @@ def vsrlr(name): desc=f"Logical right shift (with rounding) the unsigned {width}-bit elements in `a` by elements in `b`, store the result to `dst`.", ) + @env.macro + def vsrlri(name): + width = widths[name] + signedness = signednesses[name] + return instruction( + intrinsic=f"__m128i __lsx_vsrlri_{name} (__m128i a, imm0_{width-1} imm)", + instr=f"vsrlri.{name} vr, vr, imm", + desc=f"Logical right shift (with rounding) the unsigned {width}-bit elements in `a` by `imm`, store the result to `dst`.", + ) + @env.macro def vsrar(name): width = widths[name] @@ -745,6 +765,17 @@ def vsrar(name): desc=f"Arithmetic right shift (with rounding) the signed {width}-bit elements in `a` by elements in `b`, store the result to `dst`.", ) + @env.macro + def vsrari(name): + width = widths[name] + signedness = signednesses[name] + return instruction( + intrinsic=f"__m128i __lsx_vsrari_{name} (__m128i a, imm0_{width-1} imm)", + instr=f"vsrari.{name} vr, vr, imm", + desc=f"Arithmetic right shift (with rounding) the signed {width}-bit elements in `a` by `imm`, store the result to `dst`.", + ) + + @env.macro def vpackev(name): width = widths[name]