From f72a956315985bbffd93ac480db78ea1dd78ea07 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Wed, 13 Dec 2023 14:47:04 +0800 Subject: [PATCH] Add vssrani/vssrlni --- README.md | 16 ---------------- code/gen_impl.py | 38 ++++++++++++++++++++++++++++++++++++++ code/gen_tb.py | 5 ++++- code/vssrani_b_h.cpp | 13 +++++++++++++ code/vssrani_b_h.h | 9 +++++++++ code/vssrani_bu_h.cpp | 13 +++++++++++++ code/vssrani_bu_h.h | 9 +++++++++ code/vssrani_d_q.cpp | 13 +++++++++++++ code/vssrani_d_q.h | 9 +++++++++ code/vssrani_du_q.cpp | 13 +++++++++++++ code/vssrani_du_q.h | 9 +++++++++ code/vssrani_h_w.cpp | 13 +++++++++++++ code/vssrani_h_w.h | 9 +++++++++ code/vssrani_hu_w.cpp | 13 +++++++++++++ code/vssrani_hu_w.h | 9 +++++++++ code/vssrani_w_d.cpp | 13 +++++++++++++ code/vssrani_w_d.h | 9 +++++++++ code/vssrani_wu_d.cpp | 13 +++++++++++++ code/vssrani_wu_d.h | 9 +++++++++ code/vssrlni_b_h.cpp | 13 +++++++++++++ code/vssrlni_b_h.h | 9 +++++++++ code/vssrlni_bu_h.cpp | 13 +++++++++++++ code/vssrlni_bu_h.h | 9 +++++++++ code/vssrlni_d_q.cpp | 13 +++++++++++++ code/vssrlni_d_q.h | 9 +++++++++ code/vssrlni_du_q.cpp | 13 +++++++++++++ code/vssrlni_du_q.h | 9 +++++++++ code/vssrlni_h_w.cpp | 13 +++++++++++++ code/vssrlni_h_w.h | 9 +++++++++ code/vssrlni_hu_w.cpp | 13 +++++++++++++ code/vssrlni_hu_w.h | 9 +++++++++ code/vssrlni_w_d.cpp | 13 +++++++++++++ code/vssrlni_w_d.h | 9 +++++++++ code/vssrlni_wu_d.cpp | 13 +++++++++++++ code/vssrlni_wu_d.h | 9 +++++++++ docs/lsx/shift.md | 18 ++++++++++++++++++ main.py | 26 ++++++++++++++++++++++++-- 37 files changed, 436 insertions(+), 19 deletions(-) create mode 100644 code/vssrani_b_h.cpp create mode 100644 code/vssrani_b_h.h create mode 100644 code/vssrani_bu_h.cpp create mode 100644 code/vssrani_bu_h.h create mode 100644 code/vssrani_d_q.cpp create mode 100644 code/vssrani_d_q.h create mode 100644 code/vssrani_du_q.cpp create mode 100644 code/vssrani_du_q.h create mode 100644 code/vssrani_h_w.cpp create mode 100644 code/vssrani_h_w.h create mode 100644 code/vssrani_hu_w.cpp create mode 100644 code/vssrani_hu_w.h create mode 100644 code/vssrani_w_d.cpp create mode 100644 code/vssrani_w_d.h create mode 100644 code/vssrani_wu_d.cpp create mode 100644 code/vssrani_wu_d.h create mode 100644 code/vssrlni_b_h.cpp create mode 100644 code/vssrlni_b_h.h create mode 100644 code/vssrlni_bu_h.cpp create mode 100644 code/vssrlni_bu_h.h create mode 100644 code/vssrlni_d_q.cpp create mode 100644 code/vssrlni_d_q.h create mode 100644 code/vssrlni_du_q.cpp create mode 100644 code/vssrlni_du_q.h create mode 100644 code/vssrlni_h_w.cpp create mode 100644 code/vssrlni_h_w.h create mode 100644 code/vssrlni_hu_w.cpp create mode 100644 code/vssrlni_hu_w.h create mode 100644 code/vssrlni_w_d.cpp create mode 100644 code/vssrlni_w_d.h create mode 100644 code/vssrlni_wu_d.cpp create mode 100644 code/vssrlni_wu_d.h diff --git a/README.md b/README.md index 2ec388c1..8d834509 100644 --- a/README.md +++ b/README.md @@ -6,18 +6,6 @@ Arranged from QEMU implementation and [GCC Intrinsics](https://gcc.gnu.org/onlin TODO List: -### vssrln.b.h/h.w/w.d - -### vssran.b.h/h.w/w.d - -### vssrlrn.b.h/h.w/w.d - -### vssrarn.b.h/h.w/w.d - -### vssrln.bu.h/hu.w/wu.d - -### vssran.bu.h/hu.w/wu.d - ### vssrlrn.bu.h/hu.w/wu.d ### vssrarn.bu.h/hu.w/wu.d @@ -118,12 +106,8 @@ TODO List: ### vsat.b/h/w/d/bu/hu/wu/du -### vssrlni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q - ### vssrlrni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q -### vssrani.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q - ### vssrarni.b.h/h.w/w.d/d.q ### vssrarni.bu.h/hu.w/wu.d/du.q diff --git a/code/gen_impl.py b/code/gen_impl.py index 3093bfd0..ac4535c3 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -204,6 +204,44 @@ print(f"}}", file=f) print(f"}}", file=f) + for name, shift_sign in [("srl", "u"), ("sra", "s")]: + double_width_signed = double_width[:1] + with open(f"vs{name}ni_{width}_{double_width_signed}.h", "w") as f: + if shift_sign == "u": + min = 0 + if sign == "u": + max = (2**w) - 1 + else: + max = (2**(w - 1)) - 1 + else: + if sign == "u": + min = 0 + max = (2**w) - 1 + else: + min = -(2 ** (w - 1)) + max = (2 ** (w - 1)) - 1 + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print(f"if (i < {64 // w}) {{", file=f) + print( + f" {shift_sign}{double_w} temp = ({shift_sign}{double_w})b.{double_m}[i] >> imm;", + file=f, + ) + print( + f" dst.{m}[i] = clamp<{shift_sign}{double_w}>(temp, {min}, {max});", + file=f, + ) + print(f"}} else {{", file=f) + print( + f" {shift_sign}{double_w} temp = ({shift_sign}{double_w})a.{double_m}[i - {64 // w}] >> imm;", + file=f, + ) + print( + f" dst.{m}[i] = clamp<{shift_sign}{double_w}>(temp, {min}, {max});", + file=f, + ) + print(f"}}", file=f) + print(f"}}", file=f) + if width == "d" or width == "du": with open(f"vextl_{double_width}_{width}.h", "w") as f: print(f"for (int i = 0;i < {128 // double_w};i++) {{", file=f) diff --git a/code/gen_tb.py b/code/gen_tb.py index 50d19307..8c0765d4 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -9,6 +9,7 @@ widths_vsrln = ["b_h", "h_w", "w_d"] widths_vssrln = ["b_h", "bu_h", "h_w", "hu_w", "w_d", "wu_d"] widths_vsrlni = ["b_h", "h_w", "w_d", "d_q"] +widths_vssrlni = ["b_h", "bu_h", "h_w", "hu_w", "w_d", "wu_d", "d_q", "du_q"] widths_vaddw = [ "h_b", "h_bu", @@ -126,8 +127,10 @@ "vsrlri": (widths_signed, "v128 a, int imm", [0, 7]), "vsrlrn": (widths_vsrln, "v128 a, v128 b"), "vsrlrni": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7, 15]), - "vssrln": (widths_vssrln, "v128 a, v128 b"), "vssran": (widths_vssrln, "v128 a, v128 b"), + "vssrani": (widths_vssrlni, "v128 a, v128 b, int imm", [0, 7, 15]), + "vssrln": (widths_vssrln, "v128 a, v128 b"), + "vssrlni": (widths_vssrlni, "v128 a, v128 b, int imm", [0, 7, 15]), "vsub": (widths_signed, "v128 a, v128 b"), "vsubwev": (widths_vsubw, "v128 a, v128 b"), "vsubwod": (widths_vsubw, "v128 a, v128 b"), diff --git a/code/vssrani_b_h.cpp b/code/vssrani_b_h.cpp new file mode 100644 index 00000000..4dbd2ae1 --- /dev/null +++ b/code/vssrani_b_h.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrani_b_h(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrani_b_h.h" + return dst; +} + +void test() { + FUZZ2(vssrani_b_h, 0); + FUZZ2(vssrani_b_h, 7); + FUZZ2(vssrani_b_h, 15); +} diff --git a/code/vssrani_b_h.h b/code/vssrani_b_h.h new file mode 100644 index 00000000..93653db4 --- /dev/null +++ b/code/vssrani_b_h.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 16; i++) { + if (i < 8) { + s16 temp = (s16)b.half[i] >> imm; + dst.byte[i] = clamp(temp, -128, 127); + } else { + s16 temp = (s16)a.half[i - 8] >> imm; + dst.byte[i] = clamp(temp, -128, 127); + } +} diff --git a/code/vssrani_bu_h.cpp b/code/vssrani_bu_h.cpp new file mode 100644 index 00000000..c4a8de95 --- /dev/null +++ b/code/vssrani_bu_h.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrani_bu_h(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrani_bu_h.h" + return dst; +} + +void test() { + FUZZ2(vssrani_bu_h, 0); + FUZZ2(vssrani_bu_h, 7); + FUZZ2(vssrani_bu_h, 15); +} diff --git a/code/vssrani_bu_h.h b/code/vssrani_bu_h.h new file mode 100644 index 00000000..70bc55a6 --- /dev/null +++ b/code/vssrani_bu_h.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 16; i++) { + if (i < 8) { + s16 temp = (s16)b.half[i] >> imm; + dst.byte[i] = clamp(temp, 0, 255); + } else { + s16 temp = (s16)a.half[i - 8] >> imm; + dst.byte[i] = clamp(temp, 0, 255); + } +} diff --git a/code/vssrani_d_q.cpp b/code/vssrani_d_q.cpp new file mode 100644 index 00000000..46370548 --- /dev/null +++ b/code/vssrani_d_q.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrani_d_q(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrani_d_q.h" + return dst; +} + +void test() { + FUZZ2(vssrani_d_q, 0); + FUZZ2(vssrani_d_q, 7); + FUZZ2(vssrani_d_q, 15); +} diff --git a/code/vssrani_d_q.h b/code/vssrani_d_q.h new file mode 100644 index 00000000..ffe71fd0 --- /dev/null +++ b/code/vssrani_d_q.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 2; i++) { + if (i < 1) { + s128 temp = (s128)b.qword[i] >> imm; + dst.dword[i] = clamp(temp, -9223372036854775808, 9223372036854775807); + } else { + s128 temp = (s128)a.qword[i - 1] >> imm; + dst.dword[i] = clamp(temp, -9223372036854775808, 9223372036854775807); + } +} diff --git a/code/vssrani_du_q.cpp b/code/vssrani_du_q.cpp new file mode 100644 index 00000000..4cdc467f --- /dev/null +++ b/code/vssrani_du_q.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrani_du_q(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrani_du_q.h" + return dst; +} + +void test() { + FUZZ2(vssrani_du_q, 0); + FUZZ2(vssrani_du_q, 7); + FUZZ2(vssrani_du_q, 15); +} diff --git a/code/vssrani_du_q.h b/code/vssrani_du_q.h new file mode 100644 index 00000000..74cba48d --- /dev/null +++ b/code/vssrani_du_q.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 2; i++) { + if (i < 1) { + s128 temp = (s128)b.qword[i] >> imm; + dst.dword[i] = clamp(temp, 0, 18446744073709551615); + } else { + s128 temp = (s128)a.qword[i - 1] >> imm; + dst.dword[i] = clamp(temp, 0, 18446744073709551615); + } +} diff --git a/code/vssrani_h_w.cpp b/code/vssrani_h_w.cpp new file mode 100644 index 00000000..086ae9d0 --- /dev/null +++ b/code/vssrani_h_w.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrani_h_w(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrani_h_w.h" + return dst; +} + +void test() { + FUZZ2(vssrani_h_w, 0); + FUZZ2(vssrani_h_w, 7); + FUZZ2(vssrani_h_w, 15); +} diff --git a/code/vssrani_h_w.h b/code/vssrani_h_w.h new file mode 100644 index 00000000..96075130 --- /dev/null +++ b/code/vssrani_h_w.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 8; i++) { + if (i < 4) { + s32 temp = (s32)b.word[i] >> imm; + dst.half[i] = clamp(temp, -32768, 32767); + } else { + s32 temp = (s32)a.word[i - 4] >> imm; + dst.half[i] = clamp(temp, -32768, 32767); + } +} diff --git a/code/vssrani_hu_w.cpp b/code/vssrani_hu_w.cpp new file mode 100644 index 00000000..41ec0077 --- /dev/null +++ b/code/vssrani_hu_w.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrani_hu_w(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrani_hu_w.h" + return dst; +} + +void test() { + FUZZ2(vssrani_hu_w, 0); + FUZZ2(vssrani_hu_w, 7); + FUZZ2(vssrani_hu_w, 15); +} diff --git a/code/vssrani_hu_w.h b/code/vssrani_hu_w.h new file mode 100644 index 00000000..0c6651f7 --- /dev/null +++ b/code/vssrani_hu_w.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 8; i++) { + if (i < 4) { + s32 temp = (s32)b.word[i] >> imm; + dst.half[i] = clamp(temp, 0, 65535); + } else { + s32 temp = (s32)a.word[i - 4] >> imm; + dst.half[i] = clamp(temp, 0, 65535); + } +} diff --git a/code/vssrani_w_d.cpp b/code/vssrani_w_d.cpp new file mode 100644 index 00000000..718a926a --- /dev/null +++ b/code/vssrani_w_d.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrani_w_d(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrani_w_d.h" + return dst; +} + +void test() { + FUZZ2(vssrani_w_d, 0); + FUZZ2(vssrani_w_d, 7); + FUZZ2(vssrani_w_d, 15); +} diff --git a/code/vssrani_w_d.h b/code/vssrani_w_d.h new file mode 100644 index 00000000..fd6ba8e2 --- /dev/null +++ b/code/vssrani_w_d.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 4; i++) { + if (i < 2) { + s64 temp = (s64)b.dword[i] >> imm; + dst.word[i] = clamp(temp, -2147483648, 2147483647); + } else { + s64 temp = (s64)a.dword[i - 2] >> imm; + dst.word[i] = clamp(temp, -2147483648, 2147483647); + } +} diff --git a/code/vssrani_wu_d.cpp b/code/vssrani_wu_d.cpp new file mode 100644 index 00000000..e89b0b2d --- /dev/null +++ b/code/vssrani_wu_d.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrani_wu_d(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrani_wu_d.h" + return dst; +} + +void test() { + FUZZ2(vssrani_wu_d, 0); + FUZZ2(vssrani_wu_d, 7); + FUZZ2(vssrani_wu_d, 15); +} diff --git a/code/vssrani_wu_d.h b/code/vssrani_wu_d.h new file mode 100644 index 00000000..98680cee --- /dev/null +++ b/code/vssrani_wu_d.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 4; i++) { + if (i < 2) { + s64 temp = (s64)b.dword[i] >> imm; + dst.word[i] = clamp(temp, 0, 4294967295); + } else { + s64 temp = (s64)a.dword[i - 2] >> imm; + dst.word[i] = clamp(temp, 0, 4294967295); + } +} diff --git a/code/vssrlni_b_h.cpp b/code/vssrlni_b_h.cpp new file mode 100644 index 00000000..cadd2c81 --- /dev/null +++ b/code/vssrlni_b_h.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrlni_b_h(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrlni_b_h.h" + return dst; +} + +void test() { + FUZZ2(vssrlni_b_h, 0); + FUZZ2(vssrlni_b_h, 7); + FUZZ2(vssrlni_b_h, 15); +} diff --git a/code/vssrlni_b_h.h b/code/vssrlni_b_h.h new file mode 100644 index 00000000..7caeafcb --- /dev/null +++ b/code/vssrlni_b_h.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 16; i++) { + if (i < 8) { + u16 temp = (u16)b.half[i] >> imm; + dst.byte[i] = clamp(temp, 0, 127); + } else { + u16 temp = (u16)a.half[i - 8] >> imm; + dst.byte[i] = clamp(temp, 0, 127); + } +} diff --git a/code/vssrlni_bu_h.cpp b/code/vssrlni_bu_h.cpp new file mode 100644 index 00000000..d3ff235d --- /dev/null +++ b/code/vssrlni_bu_h.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrlni_bu_h(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrlni_bu_h.h" + return dst; +} + +void test() { + FUZZ2(vssrlni_bu_h, 0); + FUZZ2(vssrlni_bu_h, 7); + FUZZ2(vssrlni_bu_h, 15); +} diff --git a/code/vssrlni_bu_h.h b/code/vssrlni_bu_h.h new file mode 100644 index 00000000..e37aecc3 --- /dev/null +++ b/code/vssrlni_bu_h.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 16; i++) { + if (i < 8) { + u16 temp = (u16)b.half[i] >> imm; + dst.byte[i] = clamp(temp, 0, 255); + } else { + u16 temp = (u16)a.half[i - 8] >> imm; + dst.byte[i] = clamp(temp, 0, 255); + } +} diff --git a/code/vssrlni_d_q.cpp b/code/vssrlni_d_q.cpp new file mode 100644 index 00000000..84a98747 --- /dev/null +++ b/code/vssrlni_d_q.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrlni_d_q(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrlni_d_q.h" + return dst; +} + +void test() { + FUZZ2(vssrlni_d_q, 0); + FUZZ2(vssrlni_d_q, 7); + FUZZ2(vssrlni_d_q, 15); +} diff --git a/code/vssrlni_d_q.h b/code/vssrlni_d_q.h new file mode 100644 index 00000000..964b4ac7 --- /dev/null +++ b/code/vssrlni_d_q.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 2; i++) { + if (i < 1) { + u128 temp = (u128)b.qword[i] >> imm; + dst.dword[i] = clamp(temp, 0, 9223372036854775807); + } else { + u128 temp = (u128)a.qword[i - 1] >> imm; + dst.dword[i] = clamp(temp, 0, 9223372036854775807); + } +} diff --git a/code/vssrlni_du_q.cpp b/code/vssrlni_du_q.cpp new file mode 100644 index 00000000..23d2920e --- /dev/null +++ b/code/vssrlni_du_q.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrlni_du_q(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrlni_du_q.h" + return dst; +} + +void test() { + FUZZ2(vssrlni_du_q, 0); + FUZZ2(vssrlni_du_q, 7); + FUZZ2(vssrlni_du_q, 15); +} diff --git a/code/vssrlni_du_q.h b/code/vssrlni_du_q.h new file mode 100644 index 00000000..2c18c9de --- /dev/null +++ b/code/vssrlni_du_q.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 2; i++) { + if (i < 1) { + u128 temp = (u128)b.qword[i] >> imm; + dst.dword[i] = clamp(temp, 0, 18446744073709551615); + } else { + u128 temp = (u128)a.qword[i - 1] >> imm; + dst.dword[i] = clamp(temp, 0, 18446744073709551615); + } +} diff --git a/code/vssrlni_h_w.cpp b/code/vssrlni_h_w.cpp new file mode 100644 index 00000000..1dd31bf0 --- /dev/null +++ b/code/vssrlni_h_w.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrlni_h_w(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrlni_h_w.h" + return dst; +} + +void test() { + FUZZ2(vssrlni_h_w, 0); + FUZZ2(vssrlni_h_w, 7); + FUZZ2(vssrlni_h_w, 15); +} diff --git a/code/vssrlni_h_w.h b/code/vssrlni_h_w.h new file mode 100644 index 00000000..1a54d8f2 --- /dev/null +++ b/code/vssrlni_h_w.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 8; i++) { + if (i < 4) { + u32 temp = (u32)b.word[i] >> imm; + dst.half[i] = clamp(temp, 0, 32767); + } else { + u32 temp = (u32)a.word[i - 4] >> imm; + dst.half[i] = clamp(temp, 0, 32767); + } +} diff --git a/code/vssrlni_hu_w.cpp b/code/vssrlni_hu_w.cpp new file mode 100644 index 00000000..63ab6660 --- /dev/null +++ b/code/vssrlni_hu_w.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrlni_hu_w(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrlni_hu_w.h" + return dst; +} + +void test() { + FUZZ2(vssrlni_hu_w, 0); + FUZZ2(vssrlni_hu_w, 7); + FUZZ2(vssrlni_hu_w, 15); +} diff --git a/code/vssrlni_hu_w.h b/code/vssrlni_hu_w.h new file mode 100644 index 00000000..c3705d8a --- /dev/null +++ b/code/vssrlni_hu_w.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 8; i++) { + if (i < 4) { + u32 temp = (u32)b.word[i] >> imm; + dst.half[i] = clamp(temp, 0, 65535); + } else { + u32 temp = (u32)a.word[i - 4] >> imm; + dst.half[i] = clamp(temp, 0, 65535); + } +} diff --git a/code/vssrlni_w_d.cpp b/code/vssrlni_w_d.cpp new file mode 100644 index 00000000..248ce490 --- /dev/null +++ b/code/vssrlni_w_d.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrlni_w_d(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrlni_w_d.h" + return dst; +} + +void test() { + FUZZ2(vssrlni_w_d, 0); + FUZZ2(vssrlni_w_d, 7); + FUZZ2(vssrlni_w_d, 15); +} diff --git a/code/vssrlni_w_d.h b/code/vssrlni_w_d.h new file mode 100644 index 00000000..7fe8a39a --- /dev/null +++ b/code/vssrlni_w_d.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 4; i++) { + if (i < 2) { + u64 temp = (u64)b.dword[i] >> imm; + dst.word[i] = clamp(temp, 0, 2147483647); + } else { + u64 temp = (u64)a.dword[i - 2] >> imm; + dst.word[i] = clamp(temp, 0, 2147483647); + } +} diff --git a/code/vssrlni_wu_d.cpp b/code/vssrlni_wu_d.cpp new file mode 100644 index 00000000..6dc85814 --- /dev/null +++ b/code/vssrlni_wu_d.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vssrlni_wu_d(v128 a, v128 b, int imm) { + v128 dst; +#include "vssrlni_wu_d.h" + return dst; +} + +void test() { + FUZZ2(vssrlni_wu_d, 0); + FUZZ2(vssrlni_wu_d, 7); + FUZZ2(vssrlni_wu_d, 15); +} diff --git a/code/vssrlni_wu_d.h b/code/vssrlni_wu_d.h new file mode 100644 index 00000000..c42012c4 --- /dev/null +++ b/code/vssrlni_wu_d.h @@ -0,0 +1,9 @@ +for (int i = 0; i < 4; i++) { + if (i < 2) { + u64 temp = (u64)b.dword[i] >> imm; + dst.word[i] = clamp(temp, 0, 4294967295); + } else { + u64 temp = (u64)a.dword[i - 2] >> imm; + dst.word[i] = clamp(temp, 0, 4294967295); + } +} diff --git a/docs/lsx/shift.md b/docs/lsx/shift.md index cc40493e..5f44e0c3 100644 --- a/docs/lsx/shift.md +++ b/docs/lsx/shift.md @@ -142,6 +142,15 @@ Compute 128-bit `a` shifted right by `imm * 8` bits. {{ vssran('w', 'd') }} {{ vssran('wu', 'd') }} +{{ vssrani('b', 'h') }} +{{ vssrani('bu', 'h') }} +{{ vssrani('h', 'w') }} +{{ vssrani('hu', 'w') }} +{{ vssrani('w', 'd') }} +{{ vssrani('wu', 'd') }} +{{ vssrani('d', 'q') }} +{{ vssrani('du', 'q') }} + {{ vssrln('b', 'h') }} {{ vssrln('bu', 'h') }} {{ vssrln('h', 'w') }} @@ -149,6 +158,15 @@ Compute 128-bit `a` shifted right by `imm * 8` bits. {{ vssrln('w', 'd') }} {{ vssrln('wu', 'd') }} +{{ vssrlni('b', 'h') }} +{{ vssrlni('bu', 'h') }} +{{ vssrlni('h', 'w') }} +{{ vssrlni('hu', 'w') }} +{{ vssrlni('w', 'd') }} +{{ vssrlni('wu', 'd') }} +{{ vssrlni('d', 'q') }} +{{ vssrlni('du', 'q') }} + {{ vrotr('b') }} {{ vrotr('h') }} {{ vrotr('w') }} diff --git a/main.py b/main.py index cb05ff22..9ab75a87 100644 --- a/main.py +++ b/main.py @@ -1097,7 +1097,7 @@ def vssrln(name, name2): return instruction( intrinsic=f"__m128i __lsx_vssrln_{name}_{name2} (__m128i a, __m128i b)", instr=f"vssrln.{name}.{name2} vr, vr, vr", - desc=f"Logical right shift the unsigned {width2}-bit elements in `a` by elements in `b`, clamp to fit in {signedness} {width}-bit and store the result to `dst`.", + desc=f"Logical right shift the unsigned {width2}-bit elements in `a` by elements in `b`, clamp to fit in {signedness} {width}-bit integer and store the result to `dst`.", ) @env.macro @@ -1108,5 +1108,27 @@ def vssran(name, name2): return instruction( intrinsic=f"__m128i __lsx_vssran_{name}_{name2} (__m128i a, __m128i b)", instr=f"vssran.{name}.{name2} vr, vr, vr", - desc=f"Arithemtic right shift the signed {width2}-bit elements in `a` by elements in `b`, clamp to fit in {signedness} {width}-bit and store the result to `dst`.", + desc=f"Arithemtic right shift the signed {width2}-bit elements in `a` by elements in `b`, clamp to fit in {signedness} {width}-bit integer and store the result to `dst`.", ) + + @env.macro + def vssrlni(name, name2): + width = widths[name[0]] + signedness = signednesses[name] + width2 = widths[name2[0]] + return instruction( + intrinsic=f"__m128i __lsx_vssrlni_{name}_{name2} (__m128i a, __m128i b, imm0_{width2-1} imm)", + instr=f"vssrlni.{name}.{name2} vr, vr, imm", + desc=f"Logical right shift the unsigned {width2}-bit elements in `a` and `b` by `imm`, clamp to fit in {signedness} {width}-bit integer and store the result to `dst`.", + ) + + @env.macro + def vssrani(name, name2): + width = widths[name[0]] + signedness = signednesses[name] + width2 = widths[name2[0]] + return instruction( + intrinsic=f"__m128i __lsx_vssrani_{name}_{name2} (__m128i a, __m128i b, imm0_{width2-1} imm)", + instr=f"vssrani.{name}.{name2} vr, vr, imm", + desc=f"Arithemtic right shift the signed {width2}-bit elements in `a` and `b` by `imm`, clamp to fit in {signedness} {width}-bit integer and store the result to `dst`.", + ) \ No newline at end of file