diff --git a/README.md b/README.md index 45358fd2..6abf39d6 100644 --- a/README.md +++ b/README.md @@ -6,8 +6,6 @@ Arranged from QEMU implementation and [GCC Intrinsics](https://gcc.gnu.org/onlin TODO List: -### vsran.b.h/h.w/w.d - ### vsrlrn.b.h/h.w/w.d ### vsrarn.b.h/h.w/w.d @@ -124,16 +122,12 @@ TODO List: ### vsat.b/h/w/d/bu/hu/wu/du -### vsrlni.b.h/h.w/w.d/d.q - ### vsrlrni.b.h/h.w/w.d/d.q ### vssrlni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q ### vssrlrni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q -### vsrani.b.h/h.w/w.d/d.q - ### vsrarni.b.h/h.w/w.d/d.q ### vssrani.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q diff --git a/code/gen_impl.py b/code/gen_impl.py index d12ad468..01d0917c 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -129,6 +129,28 @@ file=f, ) print(f"}}", file=f) + with open(f"vsran_{width}_{double_width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = (i < {64 // w}) ? (s{w})((s{double_w})a.{double_m}[i] >> (b.{double_m}[i] & {double_w-1})) : 0;", + file=f, + ) + print(f"}}", file=f) + if sign == "s": + with open(f"vsrlni_{width}_{double_width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = (i < {64 // w}) ? (u{w})((u{double_w})b.{double_m}[i] >> imm) : (u{w})((u{double_w})a.{double_m}[i - {64 // w}] >> imm);", + file=f, + ) + print(f"}}", file=f) + with open(f"vsrani_{width}_{double_width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = (i < {64 // w}) ? (s{w})((s{double_w})b.{double_m}[i] >> imm) : (s{w})((s{double_w})a.{double_m}[i - {64 // w}] >> imm);", + file=f, + ) + print(f"}}", file=f) if width == "d" or width == "du": with open(f"vextl_{double_width}_{width}.h", "w") as f: diff --git a/code/gen_tb.py b/code/gen_tb.py index 91e365d4..d5705dbc 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -7,6 +7,7 @@ widths_vexth = ["h_b", "hu_bu", "w_h", "wu_hu", "d_w", "du_wu", "q_d", "qu_du"] widths_vsllwil = ["h_b", "hu_bu", "w_h", "wu_hu", "d_w", "du_wu"] widths_vsrln = ["b_h", "h_w", "w_d"] +widths_vsrlni = ["b_h", "h_w", "w_d", "d_q"] widths_vaddw = [ "h_b", "h_bu", @@ -108,15 +109,18 @@ "vslti": (widths_all, "v128 a, int imm", [0, 15]), "vsle": (widths_all, "v128 a, v128 b"), "vslei": (widths_all, "v128 a, int imm", [0, 15]), + "vsra": (widths_signed, "v128 a, v128 b"), + "vsrai": (widths_signed, "v128 a, int imm", [0, 7]), + "vsran": (widths_vsrln, "v128 a, v128 b"), + "vsrani": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7]), + "vsrar": (widths_signed, "v128 a, v128 b"), + "vsrari": (widths_signed, "v128 a, int imm", [0, 7]), "vsrl": (widths_signed, "v128 a, v128 b"), "vsrli": (widths_signed, "v128 a, int imm", [0, 7]), "vsrln": (widths_vsrln, "v128 a, v128 b"), + "vsrlni": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7]), "vsrlr": (widths_signed, "v128 a, v128 b"), "vsrlri": (widths_signed, "v128 a, int imm", [0, 7]), - "vsra": (widths_signed, "v128 a, v128 b"), - "vsrai": (widths_signed, "v128 a, int imm", [0, 7]), - "vsrar": (widths_signed, "v128 a, v128 b"), - "vsrari": (widths_signed, "v128 a, int imm", [0, 7]), "vsub": (widths_signed, "v128 a, v128 b"), "vsubwev": (widths_vsubw, "v128 a, v128 b"), "vsubwod": (widths_vsubw, "v128 a, v128 b"), diff --git a/code/vsran_b_h.cpp b/code/vsran_b_h.cpp new file mode 100644 index 00000000..99d13d21 --- /dev/null +++ b/code/vsran_b_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vsran_b_h(v128 a, v128 b) { + v128 dst; +#include "vsran_b_h.h" + return dst; +} + +void test() { FUZZ2(vsran_b_h); } diff --git a/code/vsran_b_h.h b/code/vsran_b_h.h new file mode 100644 index 00000000..10935e2b --- /dev/null +++ b/code/vsran_b_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = (i < 8) ? (s8)((s16)a.half[i] >> (b.half[i] & 15)) : 0; +} diff --git a/code/vsran_d_q.h b/code/vsran_d_q.h new file mode 100644 index 00000000..35d4b6f5 --- /dev/null +++ b/code/vsran_d_q.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (i < 1) ? (s64)((s128)a.qword[i] >> (b.qword[i] & 127)) : 0; +} diff --git a/code/vsran_h_w.cpp b/code/vsran_h_w.cpp new file mode 100644 index 00000000..212d34af --- /dev/null +++ b/code/vsran_h_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vsran_h_w(v128 a, v128 b) { + v128 dst; +#include "vsran_h_w.h" + return dst; +} + +void test() { FUZZ2(vsran_h_w); } diff --git a/code/vsran_h_w.h b/code/vsran_h_w.h new file mode 100644 index 00000000..56c08b27 --- /dev/null +++ b/code/vsran_h_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = (i < 4) ? (s16)((s32)a.word[i] >> (b.word[i] & 31)) : 0; +} diff --git a/code/vsran_w_d.cpp b/code/vsran_w_d.cpp new file mode 100644 index 00000000..28eb0ba6 --- /dev/null +++ b/code/vsran_w_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vsran_w_d(v128 a, v128 b) { + v128 dst; +#include "vsran_w_d.h" + return dst; +} + +void test() { FUZZ2(vsran_w_d); } diff --git a/code/vsran_w_d.h b/code/vsran_w_d.h new file mode 100644 index 00000000..3a49f81f --- /dev/null +++ b/code/vsran_w_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (i < 2) ? (s32)((s64)a.dword[i] >> (b.dword[i] & 63)) : 0; +} diff --git a/code/vsrani_b_h.cpp b/code/vsrani_b_h.cpp new file mode 100644 index 00000000..bcb50d50 --- /dev/null +++ b/code/vsrani_b_h.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrani_b_h(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrani_b_h.h" + return dst; +} + +void test() { + FUZZ2(vsrani_b_h, 0); + FUZZ2(vsrani_b_h, 7); +} diff --git a/code/vsrani_b_h.h b/code/vsrani_b_h.h new file mode 100644 index 00000000..30cc13ef --- /dev/null +++ b/code/vsrani_b_h.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = + (i < 8) ? (s8)((s16)b.half[i] >> imm) : (s8)((s16)a.half[i - 8] >> imm); +} diff --git a/code/vsrani_d_q.cpp b/code/vsrani_d_q.cpp new file mode 100644 index 00000000..e98d8dc5 --- /dev/null +++ b/code/vsrani_d_q.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrani_d_q(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrani_d_q.h" + return dst; +} + +void test() { + FUZZ2(vsrani_d_q, 0); + FUZZ2(vsrani_d_q, 7); +} diff --git a/code/vsrani_d_q.h b/code/vsrani_d_q.h new file mode 100644 index 00000000..673c77fe --- /dev/null +++ b/code/vsrani_d_q.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (i < 1) ? (s64)((s128)b.qword[i] >> imm) + : (s64)((s128)a.qword[i - 1] >> imm); +} diff --git a/code/vsrani_h_w.cpp b/code/vsrani_h_w.cpp new file mode 100644 index 00000000..744cabf0 --- /dev/null +++ b/code/vsrani_h_w.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrani_h_w(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrani_h_w.h" + return dst; +} + +void test() { + FUZZ2(vsrani_h_w, 0); + FUZZ2(vsrani_h_w, 7); +} diff --git a/code/vsrani_h_w.h b/code/vsrani_h_w.h new file mode 100644 index 00000000..bda54900 --- /dev/null +++ b/code/vsrani_h_w.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = + (i < 4) ? (s16)((s32)b.word[i] >> imm) : (s16)((s32)a.word[i - 4] >> imm); +} diff --git a/code/vsrani_w_d.cpp b/code/vsrani_w_d.cpp new file mode 100644 index 00000000..f5239645 --- /dev/null +++ b/code/vsrani_w_d.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrani_w_d(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrani_w_d.h" + return dst; +} + +void test() { + FUZZ2(vsrani_w_d, 0); + FUZZ2(vsrani_w_d, 7); +} diff --git a/code/vsrani_w_d.h b/code/vsrani_w_d.h new file mode 100644 index 00000000..a50254b1 --- /dev/null +++ b/code/vsrani_w_d.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (i < 2) ? (s32)((s64)b.dword[i] >> imm) + : (s32)((s64)a.dword[i - 2] >> imm); +} diff --git a/code/vsrlni_b_h.cpp b/code/vsrlni_b_h.cpp new file mode 100644 index 00000000..760d5039 --- /dev/null +++ b/code/vsrlni_b_h.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrlni_b_h(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrlni_b_h.h" + return dst; +} + +void test() { + FUZZ2(vsrlni_b_h, 0); + FUZZ2(vsrlni_b_h, 7); +} diff --git a/code/vsrlni_b_h.h b/code/vsrlni_b_h.h new file mode 100644 index 00000000..51cc722d --- /dev/null +++ b/code/vsrlni_b_h.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = + (i < 8) ? (u8)((u16)b.half[i] >> imm) : (u8)((u16)a.half[i - 8] >> imm); +} diff --git a/code/vsrlni_d_q.cpp b/code/vsrlni_d_q.cpp new file mode 100644 index 00000000..be081d3b --- /dev/null +++ b/code/vsrlni_d_q.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrlni_d_q(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrlni_d_q.h" + return dst; +} + +void test() { + FUZZ2(vsrlni_d_q, 0); + FUZZ2(vsrlni_d_q, 7); +} diff --git a/code/vsrlni_d_q.h b/code/vsrlni_d_q.h new file mode 100644 index 00000000..addbe4ff --- /dev/null +++ b/code/vsrlni_d_q.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (i < 1) ? (u64)((u128)b.qword[i] >> imm) + : (u64)((u128)a.qword[i - 1] >> imm); +} diff --git a/code/vsrlni_h_w.cpp b/code/vsrlni_h_w.cpp new file mode 100644 index 00000000..a6fd12b9 --- /dev/null +++ b/code/vsrlni_h_w.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrlni_h_w(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrlni_h_w.h" + return dst; +} + +void test() { + FUZZ2(vsrlni_h_w, 0); + FUZZ2(vsrlni_h_w, 7); +} diff --git a/code/vsrlni_h_w.h b/code/vsrlni_h_w.h new file mode 100644 index 00000000..797b0322 --- /dev/null +++ b/code/vsrlni_h_w.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = + (i < 4) ? (u16)((u32)b.word[i] >> imm) : (u16)((u32)a.word[i - 4] >> imm); +} diff --git a/code/vsrlni_w_d.cpp b/code/vsrlni_w_d.cpp new file mode 100644 index 00000000..5ff6c30c --- /dev/null +++ b/code/vsrlni_w_d.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrlni_w_d(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrlni_w_d.h" + return dst; +} + +void test() { + FUZZ2(vsrlni_w_d, 0); + FUZZ2(vsrlni_w_d, 7); +} diff --git a/code/vsrlni_w_d.h b/code/vsrlni_w_d.h new file mode 100644 index 00000000..a0dacb12 --- /dev/null +++ b/code/vsrlni_w_d.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (i < 2) ? (u32)((u64)b.dword[i] >> imm) + : (u32)((u64)a.dword[i - 2] >> imm); +} diff --git a/docs/lsx/shift.md b/docs/lsx/shift.md index 212bee74..65bf6fb2 100644 --- a/docs/lsx/shift.md +++ b/docs/lsx/shift.md @@ -59,6 +59,36 @@ Compute 128-bit `a` shifted right by `imm * 8` bits. {{ vsllwil('d', 'w') }} {{ vsllwil('du', 'wu') }} +{{ vsra('b') }} +{{ vsra('h') }} +{{ vsra('w') }} +{{ vsra('d') }} + +{{ vsrai('b') }} +{{ vsrai('h') }} +{{ vsrai('w') }} +{{ vsrai('d') }} + +{{ vsran('b', 'h') }} +{{ vsran('h', 'w') }} +{{ vsran('w', 'd') }} + +{{ vsrani('b', 'h') }} +{{ vsrani('h', 'w') }} +{{ vsrani('w', 'd') }} +{{ vsrani('d', 'q') }} + +{{ vsrar('b') }} +{{ vsrar('h') }} +{{ vsrar('w') }} +{{ vsrar('d') }} + +{{ vsrari('b') }} +{{ vsrari('h') }} +{{ vsrari('w') }} +{{ vsrari('d') }} + + {{ vsrl('b') }} {{ vsrl('h') }} {{ vsrl('w') }} @@ -73,6 +103,11 @@ Compute 128-bit `a` shifted right by `imm * 8` bits. {{ vsrln('h', 'w') }} {{ vsrln('w', 'd') }} +{{ vsrlni('b', 'h') }} +{{ vsrlni('h', 'w') }} +{{ vsrlni('w', 'd') }} +{{ vsrlni('d', 'q') }} + {{ vsrlr('b') }} {{ vsrlr('h') }} {{ vsrlr('w') }} @@ -83,25 +118,6 @@ Compute 128-bit `a` shifted right by `imm * 8` bits. {{ vsrlri('w') }} {{ vsrlri('d') }} -{{ vsra('b') }} -{{ vsra('h') }} -{{ vsra('w') }} -{{ vsra('d') }} - -{{ vsrai('b') }} -{{ vsrai('h') }} -{{ vsrai('w') }} -{{ vsrai('d') }} - -{{ vsrar('b') }} -{{ vsrar('h') }} -{{ vsrar('w') }} -{{ vsrar('d') }} - -{{ vsrari('b') }} -{{ vsrari('h') }} -{{ vsrari('w') }} -{{ vsrari('d') }} {{ vrotr('b') }} {{ vrotr('h') }} diff --git a/main.py b/main.py index e5ee2c1d..8b66a05e 100644 --- a/main.py +++ b/main.py @@ -1017,4 +1017,34 @@ def vsrln(name, name2): intrinsic=f"__m128i __lsx_vsrln_{name}_{name2} (__m128i a, __m128i b)", instr=f"vsrln.{name}.{name2} vr, vr, vr", desc=f"Logical right shift the unsigned {width2}-bit elements in `a` by elements in `b`, truncate to {width}-bit and store the result to `dst`.", + ) + + @env.macro + def vsran(name, name2): + width = widths[name[0]] + width2 = widths[name2[0]] + return instruction( + intrinsic=f"__m128i __lsx_vsran_{name}_{name2} (__m128i a, __m128i b)", + instr=f"vsran.{name}.{name2} vr, vr, vr", + desc=f"Arithmetic right shift the signed {width2}-bit elements in `a` by elements in `b`, truncate to {width}-bit and store the result to `dst`.", + ) + + @env.macro + def vsrlni(name, name2): + width = widths[name[0]] + width2 = widths[name2[0]] + return instruction( + intrinsic=f"__m128i __lsx_vsrlni_{name}_{name2} (__m128i a, __m128i b, imm0_{width2-1} imm)", + instr=f"vsrlni.{name}.{name2} vr, vr, imm", + desc=f"Logical right shift the unsigned {width2}-bit elements in `a` and `b` by `imm`, truncate to {width}-bit and store the result to `dst`.", + ) + + @env.macro + def vsrani(name, name2): + width = widths[name[0]] + width2 = widths[name2[0]] + return instruction( + intrinsic=f"__m128i __lsx_vsrani_{name}_{name2} (__m128i a, __m128i b, imm0_{width2-1} imm)", + instr=f"vsrani.{name}.{name2} vr, vr, imm", + desc=f"Arithemtic right shift the signed {width2}-bit elements in `a` and `b` by `imm`, truncate to {width}-bit and store the result to `dst`.", ) \ No newline at end of file