diff --git a/README.md b/README.md index 15fc2d0d..2ec388c1 100644 --- a/README.md +++ b/README.md @@ -118,14 +118,10 @@ TODO List: ### vsat.b/h/w/d/bu/hu/wu/du -### vsrlrni.b.h/h.w/w.d/d.q - ### vssrlni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q ### vssrlrni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q -### vsrarni.b.h/h.w/w.d/d.q - ### vssrani.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q ### vssrarni.b.h/h.w/w.d/d.q diff --git a/code/gen_impl.py b/code/gen_impl.py index 816a27aa..08ea141f 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -189,6 +189,63 @@ file=f, ) print(f"}}", file=f) + with open(f"vsrlrni_{width}_{double_width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print(f"if (i < {64 // w}) {{", file=f) + print(f"if (imm == 0) {{", file=f) + print( + f" dst.{m}[i] = (u{w})(u{double_w})b.{double_m}[i];", + file=f, + ) + print(f"}} else {{", file=f) + print( + f" dst.{m}[i] = (u{w})(((u{double_w})b.{double_m}[i] >> imm) + (((u{double_w})b.{double_m}[i] >> (imm - 1)) & 0x1));", + file=f, + ) + print(f"}}", file=f) + print(f"}} else {{", file=f) + print(f"if (imm == 0) {{", file=f) + print( + f" dst.{m}[i] = (u{w})(u{double_w})a.{double_m}[i - {64 // w}];", + file=f, + ) + print(f"}} else {{", file=f) + print( + f" dst.{m}[i] = (u{w})(((u{double_w})a.{double_m}[i - {64 // w}] >> imm) + (((u{double_w})a.{double_m}[i - {64 // w}] >> (imm - 1)) & 0x1));", + file=f, + ) + print(f"}}", file=f) + print(f"}}", file=f) + print(f"}}", file=f) + with open(f"vsrarni_{width}_{double_width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print(f"if (i < {64 // w}) {{", file=f) + print(f"if (imm == 0) {{", file=f) + print( + f" dst.{m}[i] = (s{w})(s{double_w})b.{double_m}[i];", + file=f, + ) + print(f"}} else {{", file=f) + print( + f" dst.{m}[i] = (s{w})(((s{double_w})b.{double_m}[i] >> imm) + (((s{double_w})b.{double_m}[i] >> (imm - 1)) & 0x1));", + file=f, + ) + print(f"}}", file=f) + print(f"}} else {{", file=f) + print(f"if (imm == 0) {{", file=f) + print( + f" dst.{m}[i] = (s{w})(s{double_w})a.{double_m}[i - {64 // w}];", + file=f, + ) + print(f"}} else {{", file=f) + print( + f" dst.{m}[i] = (u{w})(((s{double_w})a.{double_m}[i - {64 // w}] >> imm) + (((s{double_w})a.{double_m}[i - {64 // w}] >> (imm - 1)) & 0x1));", + file=f, + ) + print(f"}}", file=f) + print(f"}}", file=f) + print(f"}}", file=f) + if width == "d" or width == "du": with open(f"vextl_{double_width}_{width}.h", "w") as f: diff --git a/code/gen_tb.py b/code/gen_tb.py index fe37ab5e..87aa735d 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -112,17 +112,19 @@ "vsra": (widths_signed, "v128 a, v128 b"), "vsrai": (widths_signed, "v128 a, int imm", [0, 7]), "vsran": (widths_vsrln, "v128 a, v128 b"), - "vsrani": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7]), + "vsrani": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7, 15]), "vsrar": (widths_signed, "v128 a, v128 b"), "vsrari": (widths_signed, "v128 a, int imm", [0, 7]), "vsrarn": (widths_vsrln, "v128 a, v128 b"), + "vsrarni": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7, 15]), "vsrl": (widths_signed, "v128 a, v128 b"), "vsrli": (widths_signed, "v128 a, int imm", [0, 7]), "vsrln": (widths_vsrln, "v128 a, v128 b"), - "vsrlni": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7]), + "vsrlni": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7, 15]), "vsrlr": (widths_signed, "v128 a, v128 b"), "vsrlri": (widths_signed, "v128 a, int imm", [0, 7]), "vsrlrn": (widths_vsrln, "v128 a, v128 b"), + "vsrlrni": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7, 15]), "vsub": (widths_signed, "v128 a, v128 b"), "vsubwev": (widths_vsubw, "v128 a, v128 b"), "vsubwod": (widths_vsubw, "v128 a, v128 b"), diff --git a/code/vsrani_b_h.cpp b/code/vsrani_b_h.cpp index bcb50d50..3b99a233 100644 --- a/code/vsrani_b_h.cpp +++ b/code/vsrani_b_h.cpp @@ -9,4 +9,5 @@ v128 vsrani_b_h(v128 a, v128 b, int imm) { void test() { FUZZ2(vsrani_b_h, 0); FUZZ2(vsrani_b_h, 7); + FUZZ2(vsrani_b_h, 15); } diff --git a/code/vsrani_d_q.cpp b/code/vsrani_d_q.cpp index e98d8dc5..c333ab61 100644 --- a/code/vsrani_d_q.cpp +++ b/code/vsrani_d_q.cpp @@ -9,4 +9,5 @@ v128 vsrani_d_q(v128 a, v128 b, int imm) { void test() { FUZZ2(vsrani_d_q, 0); FUZZ2(vsrani_d_q, 7); + FUZZ2(vsrani_d_q, 15); } diff --git a/code/vsrani_h_w.cpp b/code/vsrani_h_w.cpp index 744cabf0..b07ef297 100644 --- a/code/vsrani_h_w.cpp +++ b/code/vsrani_h_w.cpp @@ -9,4 +9,5 @@ v128 vsrani_h_w(v128 a, v128 b, int imm) { void test() { FUZZ2(vsrani_h_w, 0); FUZZ2(vsrani_h_w, 7); + FUZZ2(vsrani_h_w, 15); } diff --git a/code/vsrani_w_d.cpp b/code/vsrani_w_d.cpp index f5239645..c6753474 100644 --- a/code/vsrani_w_d.cpp +++ b/code/vsrani_w_d.cpp @@ -9,4 +9,5 @@ v128 vsrani_w_d(v128 a, v128 b, int imm) { void test() { FUZZ2(vsrani_w_d, 0); FUZZ2(vsrani_w_d, 7); + FUZZ2(vsrani_w_d, 15); } diff --git a/code/vsrarni_b_h.cpp b/code/vsrarni_b_h.cpp new file mode 100644 index 00000000..7a04cac5 --- /dev/null +++ b/code/vsrarni_b_h.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vsrarni_b_h(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrarni_b_h.h" + return dst; +} + +void test() { + FUZZ2(vsrarni_b_h, 0); + FUZZ2(vsrarni_b_h, 7); + FUZZ2(vsrarni_b_h, 15); +} diff --git a/code/vsrarni_b_h.h b/code/vsrarni_b_h.h new file mode 100644 index 00000000..e2a4a41f --- /dev/null +++ b/code/vsrarni_b_h.h @@ -0,0 +1,17 @@ +for (int i = 0; i < 16; i++) { + if (i < 8) { + if (imm == 0) { + dst.byte[i] = (s8)(s16)b.half[i]; + } else { + dst.byte[i] = + (s8)(((s16)b.half[i] >> imm) + (((s16)b.half[i] >> (imm - 1)) & 0x1)); + } + } else { + if (imm == 0) { + dst.byte[i] = (s8)(s16)a.half[i - 8]; + } else { + dst.byte[i] = (u8)(((s16)a.half[i - 8] >> imm) + + (((s16)a.half[i - 8] >> (imm - 1)) & 0x1)); + } + } +} diff --git a/code/vsrarni_d_q.cpp b/code/vsrarni_d_q.cpp new file mode 100644 index 00000000..54d7c108 --- /dev/null +++ b/code/vsrarni_d_q.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vsrarni_d_q(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrarni_d_q.h" + return dst; +} + +void test() { + FUZZ2(vsrarni_d_q, 0); + FUZZ2(vsrarni_d_q, 7); + FUZZ2(vsrarni_d_q, 15); +} diff --git a/code/vsrarni_d_q.h b/code/vsrarni_d_q.h new file mode 100644 index 00000000..1feb2ad8 --- /dev/null +++ b/code/vsrarni_d_q.h @@ -0,0 +1,17 @@ +for (int i = 0; i < 2; i++) { + if (i < 1) { + if (imm == 0) { + dst.dword[i] = (s64)(s128)b.qword[i]; + } else { + dst.dword[i] = (s64)(((s128)b.qword[i] >> imm) + + (((s128)b.qword[i] >> (imm - 1)) & 0x1)); + } + } else { + if (imm == 0) { + dst.dword[i] = (s64)(s128)a.qword[i - 1]; + } else { + dst.dword[i] = (u64)(((s128)a.qword[i - 1] >> imm) + + (((s128)a.qword[i - 1] >> (imm - 1)) & 0x1)); + } + } +} diff --git a/code/vsrarni_h_w.cpp b/code/vsrarni_h_w.cpp new file mode 100644 index 00000000..e7a6efed --- /dev/null +++ b/code/vsrarni_h_w.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vsrarni_h_w(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrarni_h_w.h" + return dst; +} + +void test() { + FUZZ2(vsrarni_h_w, 0); + FUZZ2(vsrarni_h_w, 7); + FUZZ2(vsrarni_h_w, 15); +} diff --git a/code/vsrarni_h_w.h b/code/vsrarni_h_w.h new file mode 100644 index 00000000..82be1f34 --- /dev/null +++ b/code/vsrarni_h_w.h @@ -0,0 +1,17 @@ +for (int i = 0; i < 8; i++) { + if (i < 4) { + if (imm == 0) { + dst.half[i] = (s16)(s32)b.word[i]; + } else { + dst.half[i] = (s16)(((s32)b.word[i] >> imm) + + (((s32)b.word[i] >> (imm - 1)) & 0x1)); + } + } else { + if (imm == 0) { + dst.half[i] = (s16)(s32)a.word[i - 4]; + } else { + dst.half[i] = (u16)(((s32)a.word[i - 4] >> imm) + + (((s32)a.word[i - 4] >> (imm - 1)) & 0x1)); + } + } +} diff --git a/code/vsrarni_w_d.cpp b/code/vsrarni_w_d.cpp new file mode 100644 index 00000000..94890c08 --- /dev/null +++ b/code/vsrarni_w_d.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vsrarni_w_d(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrarni_w_d.h" + return dst; +} + +void test() { + FUZZ2(vsrarni_w_d, 0); + FUZZ2(vsrarni_w_d, 7); + FUZZ2(vsrarni_w_d, 15); +} diff --git a/code/vsrarni_w_d.h b/code/vsrarni_w_d.h new file mode 100644 index 00000000..308c8af1 --- /dev/null +++ b/code/vsrarni_w_d.h @@ -0,0 +1,17 @@ +for (int i = 0; i < 4; i++) { + if (i < 2) { + if (imm == 0) { + dst.word[i] = (s32)(s64)b.dword[i]; + } else { + dst.word[i] = (s32)(((s64)b.dword[i] >> imm) + + (((s64)b.dword[i] >> (imm - 1)) & 0x1)); + } + } else { + if (imm == 0) { + dst.word[i] = (s32)(s64)a.dword[i - 2]; + } else { + dst.word[i] = (u32)(((s64)a.dword[i - 2] >> imm) + + (((s64)a.dword[i - 2] >> (imm - 1)) & 0x1)); + } + } +} diff --git a/code/vsrlni_b_h.cpp b/code/vsrlni_b_h.cpp index 760d5039..855817d8 100644 --- a/code/vsrlni_b_h.cpp +++ b/code/vsrlni_b_h.cpp @@ -9,4 +9,5 @@ v128 vsrlni_b_h(v128 a, v128 b, int imm) { void test() { FUZZ2(vsrlni_b_h, 0); FUZZ2(vsrlni_b_h, 7); + FUZZ2(vsrlni_b_h, 15); } diff --git a/code/vsrlni_d_q.cpp b/code/vsrlni_d_q.cpp index be081d3b..08a5a4cc 100644 --- a/code/vsrlni_d_q.cpp +++ b/code/vsrlni_d_q.cpp @@ -9,4 +9,5 @@ v128 vsrlni_d_q(v128 a, v128 b, int imm) { void test() { FUZZ2(vsrlni_d_q, 0); FUZZ2(vsrlni_d_q, 7); + FUZZ2(vsrlni_d_q, 15); } diff --git a/code/vsrlni_h_w.cpp b/code/vsrlni_h_w.cpp index a6fd12b9..bbe6e8b7 100644 --- a/code/vsrlni_h_w.cpp +++ b/code/vsrlni_h_w.cpp @@ -9,4 +9,5 @@ v128 vsrlni_h_w(v128 a, v128 b, int imm) { void test() { FUZZ2(vsrlni_h_w, 0); FUZZ2(vsrlni_h_w, 7); + FUZZ2(vsrlni_h_w, 15); } diff --git a/code/vsrlni_w_d.cpp b/code/vsrlni_w_d.cpp index 5ff6c30c..181ab781 100644 --- a/code/vsrlni_w_d.cpp +++ b/code/vsrlni_w_d.cpp @@ -9,4 +9,5 @@ v128 vsrlni_w_d(v128 a, v128 b, int imm) { void test() { FUZZ2(vsrlni_w_d, 0); FUZZ2(vsrlni_w_d, 7); + FUZZ2(vsrlni_w_d, 15); } diff --git a/code/vsrlrni_b_h.cpp b/code/vsrlrni_b_h.cpp new file mode 100644 index 00000000..8aa5c535 --- /dev/null +++ b/code/vsrlrni_b_h.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vsrlrni_b_h(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrlrni_b_h.h" + return dst; +} + +void test() { + FUZZ2(vsrlrni_b_h, 0); + FUZZ2(vsrlrni_b_h, 7); + FUZZ2(vsrlrni_b_h, 15); +} diff --git a/code/vsrlrni_b_h.h b/code/vsrlrni_b_h.h new file mode 100644 index 00000000..1cfb3c16 --- /dev/null +++ b/code/vsrlrni_b_h.h @@ -0,0 +1,17 @@ +for (int i = 0; i < 16; i++) { + if (i < 8) { + if (imm == 0) { + dst.byte[i] = (u8)(u16)b.half[i]; + } else { + dst.byte[i] = + (u8)(((u16)b.half[i] >> imm) + (((u16)b.half[i] >> (imm - 1)) & 0x1)); + } + } else { + if (imm == 0) { + dst.byte[i] = (u8)(u16)a.half[i - 8]; + } else { + dst.byte[i] = (u8)(((u16)a.half[i - 8] >> imm) + + (((u16)a.half[i - 8] >> (imm - 1)) & 0x1)); + } + } +} diff --git a/code/vsrlrni_d_q.cpp b/code/vsrlrni_d_q.cpp new file mode 100644 index 00000000..029ccd9f --- /dev/null +++ b/code/vsrlrni_d_q.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vsrlrni_d_q(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrlrni_d_q.h" + return dst; +} + +void test() { + FUZZ2(vsrlrni_d_q, 0); + FUZZ2(vsrlrni_d_q, 7); + FUZZ2(vsrlrni_d_q, 15); +} diff --git a/code/vsrlrni_d_q.h b/code/vsrlrni_d_q.h new file mode 100644 index 00000000..7816c276 --- /dev/null +++ b/code/vsrlrni_d_q.h @@ -0,0 +1,17 @@ +for (int i = 0; i < 2; i++) { + if (i < 1) { + if (imm == 0) { + dst.dword[i] = (u64)(u128)b.qword[i]; + } else { + dst.dword[i] = (u64)(((u128)b.qword[i] >> imm) + + (((u128)b.qword[i] >> (imm - 1)) & 0x1)); + } + } else { + if (imm == 0) { + dst.dword[i] = (u64)(u128)a.qword[i - 1]; + } else { + dst.dword[i] = (u64)(((u128)a.qword[i - 1] >> imm) + + (((u128)a.qword[i - 1] >> (imm - 1)) & 0x1)); + } + } +} diff --git a/code/vsrlrni_h_w.cpp b/code/vsrlrni_h_w.cpp new file mode 100644 index 00000000..4e2316b1 --- /dev/null +++ b/code/vsrlrni_h_w.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vsrlrni_h_w(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrlrni_h_w.h" + return dst; +} + +void test() { + FUZZ2(vsrlrni_h_w, 0); + FUZZ2(vsrlrni_h_w, 7); + FUZZ2(vsrlrni_h_w, 15); +} diff --git a/code/vsrlrni_h_w.h b/code/vsrlrni_h_w.h new file mode 100644 index 00000000..8e0c8009 --- /dev/null +++ b/code/vsrlrni_h_w.h @@ -0,0 +1,17 @@ +for (int i = 0; i < 8; i++) { + if (i < 4) { + if (imm == 0) { + dst.half[i] = (u16)(u32)b.word[i]; + } else { + dst.half[i] = (u16)(((u32)b.word[i] >> imm) + + (((u32)b.word[i] >> (imm - 1)) & 0x1)); + } + } else { + if (imm == 0) { + dst.half[i] = (u16)(u32)a.word[i - 4]; + } else { + dst.half[i] = (u16)(((u32)a.word[i - 4] >> imm) + + (((u32)a.word[i - 4] >> (imm - 1)) & 0x1)); + } + } +} diff --git a/code/vsrlrni_w_d.cpp b/code/vsrlrni_w_d.cpp new file mode 100644 index 00000000..d5cfc24b --- /dev/null +++ b/code/vsrlrni_w_d.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vsrlrni_w_d(v128 a, v128 b, int imm) { + v128 dst; +#include "vsrlrni_w_d.h" + return dst; +} + +void test() { + FUZZ2(vsrlrni_w_d, 0); + FUZZ2(vsrlrni_w_d, 7); + FUZZ2(vsrlrni_w_d, 15); +} diff --git a/code/vsrlrni_w_d.h b/code/vsrlrni_w_d.h new file mode 100644 index 00000000..e0d111fe --- /dev/null +++ b/code/vsrlrni_w_d.h @@ -0,0 +1,17 @@ +for (int i = 0; i < 4; i++) { + if (i < 2) { + if (imm == 0) { + dst.word[i] = (u32)(u64)b.dword[i]; + } else { + dst.word[i] = (u32)(((u64)b.dword[i] >> imm) + + (((u64)b.dword[i] >> (imm - 1)) & 0x1)); + } + } else { + if (imm == 0) { + dst.word[i] = (u32)(u64)a.dword[i - 2]; + } else { + dst.word[i] = (u32)(((u64)a.dword[i - 2] >> imm) + + (((u64)a.dword[i - 2] >> (imm - 1)) & 0x1)); + } + } +} diff --git a/docs/lsx/shift.md b/docs/lsx/shift.md index 765c6902..0764ed8f 100644 --- a/docs/lsx/shift.md +++ b/docs/lsx/shift.md @@ -92,6 +92,11 @@ Compute 128-bit `a` shifted right by `imm * 8` bits. {{ vsrarn('h', 'w') }} {{ vsrarn('w', 'd') }} +{{ vsrarni('b', 'h') }} +{{ vsrarni('h', 'w') }} +{{ vsrarni('w', 'd') }} +{{ vsrarni('d', 'q') }} + {{ vsrl('b') }} {{ vsrl('h') }} {{ vsrl('w') }} @@ -125,6 +130,11 @@ Compute 128-bit `a` shifted right by `imm * 8` bits. {{ vsrlrn('h', 'w') }} {{ vsrlrn('w', 'd') }} +{{ vsrlrni('b', 'h') }} +{{ vsrlrni('h', 'w') }} +{{ vsrlrni('w', 'd') }} +{{ vsrlrni('d', 'q') }} + {{ vrotr('b') }} {{ vrotr('h') }} {{ vrotr('w') }} diff --git a/main.py b/main.py index efa9a37e..41f209f5 100644 --- a/main.py +++ b/main.py @@ -1067,4 +1067,24 @@ def vsrarn(name, name2): intrinsic=f"__m128i __lsx_vsrarn_{name}_{name2} (__m128i a, __m128i b)", instr=f"vsrarn.{name}.{name2} vr, vr, vr", desc=f"Arithmetic right shift (with rounding) the signed {width2}-bit elements in `a` by elements in `b`, truncate to {width}-bit and store the result to `dst`.", + ) + + @env.macro + def vsrlrni(name, name2): + width = widths[name[0]] + width2 = widths[name2[0]] + return instruction( + intrinsic=f"__m128i __lsx_vsrlrni_{name}_{name2} (__m128i a, __m128i b, imm0_{width2-1} imm)", + instr=f"vsrlrni.{name}.{name2} vr, vr, imm", + desc=f"Logical right shift (with rounding) the unsigned {width2}-bit elements in `a` and `b` by `imm`, truncate to {width}-bit and store the result to `dst`.", + ) + + @env.macro + def vsrarni(name, name2): + width = widths[name[0]] + width2 = widths[name2[0]] + return instruction( + intrinsic=f"__m128i __lsx_vsrarni_{name}_{name2} (__m128i a, __m128i b, imm0_{width2-1} imm)", + instr=f"vsrarni.{name}.{name2} vr, vr, imm", + desc=f"Arithemtic right shift (with rounding) the signed {width2}-bit elements in `a` and `b` by `imm`, truncate to {width}-bit and store the result to `dst`.", ) \ No newline at end of file