From 3ae250881e234bb62275c4fc2c0e14d6a9131627 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Tue, 12 Dec 2023 23:24:38 +0800 Subject: [PATCH] Add vmskltz --- README.md | 2 -- code/gen_tb.py | 1 + code/vmskltz_b.cpp | 9 +++++++++ code/vmskltz_b.h | 14 ++++++++++++++ code/vmskltz_d.cpp | 9 +++++++++ code/vmskltz_d.h | 8 ++++++++ code/vmskltz_h.cpp | 9 +++++++++ code/vmskltz_h.h | 12 ++++++++++++ code/vmskltz_w.cpp | 9 +++++++++ code/vmskltz_w.h | 10 ++++++++++ docs/lsx/misc.md | 5 +++++ main.py | 9 +++++++++ 12 files changed, 95 insertions(+), 2 deletions(-) create mode 100644 code/vmskltz_b.cpp create mode 100644 code/vmskltz_b.h create mode 100644 code/vmskltz_d.cpp create mode 100644 code/vmskltz_d.h create mode 100644 code/vmskltz_h.cpp create mode 100644 code/vmskltz_h.h create mode 100644 code/vmskltz_w.cpp create mode 100644 code/vmskltz_w.h diff --git a/README.md b/README.md index 4e3568e3..32ac07d7 100644 --- a/README.md +++ b/README.md @@ -60,8 +60,6 @@ TODO List: ### vfrstpi.b/h -### vmskltz.b/h/w/d - ### vmskgez.b ### vmsknz.b diff --git a/code/gen_tb.py b/code/gen_tb.py index f1d07a82..d92ea8e4 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -72,6 +72,7 @@ "vmini": (widths_all, "v128 a, int imm", [0, 3, 15]), "vmod": (widths_all, "v128 a, v128 b"), "vmsub": (widths_signed, "v128 a, v128 b, v128 c"), + "vmskltz": (widths_signed, "v128 a"), "vmuh": (widths_all, "v128 a, v128 b"), "vmul": (widths_signed, "v128 a, v128 b"), "vmulwev": (widths_vaddw, "v128 a, v128 b"), diff --git a/code/vmskltz_b.cpp b/code/vmskltz_b.cpp new file mode 100644 index 00000000..94d5e5b0 --- /dev/null +++ b/code/vmskltz_b.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vmskltz_b(v128 a) { + v128 dst; +#include "vmskltz_b.h" + return dst; +} + +void test() { FUZZ1(vmskltz_b); } diff --git a/code/vmskltz_b.h b/code/vmskltz_b.h new file mode 100644 index 00000000..7406f5c8 --- /dev/null +++ b/code/vmskltz_b.h @@ -0,0 +1,14 @@ +u64 m = 0x8080808080808080; +u64 c = m & a.dword[0]; +c |= c << 7; +c |= c << 14; +c |= c << 28; +c >>= 56; +dst.dword[0] = c; +c = m & a.dword[1]; +c |= c << 7; +c |= c << 14; +c |= c << 28; +c >>= 56; +dst.dword[0] |= c << 8; +dst.dword[1] = 0; diff --git a/code/vmskltz_d.cpp b/code/vmskltz_d.cpp new file mode 100644 index 00000000..b97265e5 --- /dev/null +++ b/code/vmskltz_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vmskltz_d(v128 a) { + v128 dst; +#include "vmskltz_d.h" + return dst; +} + +void test() { FUZZ1(vmskltz_d); } diff --git a/code/vmskltz_d.h b/code/vmskltz_d.h new file mode 100644 index 00000000..41692aae --- /dev/null +++ b/code/vmskltz_d.h @@ -0,0 +1,8 @@ +u64 m = 0x8000000000000000; +u64 c = m & a.dword[0]; +c >>= 63; +dst.dword[0] = c; +c = m & a.dword[1]; +c >>= 63; +dst.dword[0] |= c << 1; +dst.dword[1] = 0; diff --git a/code/vmskltz_h.cpp b/code/vmskltz_h.cpp new file mode 100644 index 00000000..d5be2cef --- /dev/null +++ b/code/vmskltz_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vmskltz_h(v128 a) { + v128 dst; +#include "vmskltz_h.h" + return dst; +} + +void test() { FUZZ1(vmskltz_h); } diff --git a/code/vmskltz_h.h b/code/vmskltz_h.h new file mode 100644 index 00000000..2600a9f3 --- /dev/null +++ b/code/vmskltz_h.h @@ -0,0 +1,12 @@ +u64 m = 0x8000800080008000; +u64 c = m & a.dword[0]; +c |= c << 15; +c |= c << 30; +c >>= 60; +dst.dword[0] = c; +c = m & a.dword[1]; +c |= c << 15; +c |= c << 30; +c >>= 60; +dst.dword[0] |= c << 4; +dst.dword[1] = 0; diff --git a/code/vmskltz_w.cpp b/code/vmskltz_w.cpp new file mode 100644 index 00000000..a111f0c4 --- /dev/null +++ b/code/vmskltz_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vmskltz_w(v128 a) { + v128 dst; +#include "vmskltz_w.h" + return dst; +} + +void test() { FUZZ1(vmskltz_w); } diff --git a/code/vmskltz_w.h b/code/vmskltz_w.h new file mode 100644 index 00000000..8d8a66eb --- /dev/null +++ b/code/vmskltz_w.h @@ -0,0 +1,10 @@ +u64 m = 0x8000000080000000; +u64 c = m & a.dword[0]; +c |= c << 31; +c >>= 62; +dst.dword[0] = c; +c = m & a.dword[1]; +c |= c << 31; +c >>= 62; +dst.dword[0] |= c << 2; +dst.dword[1] = 0; diff --git a/docs/lsx/misc.md b/docs/lsx/misc.md index 6c9fa46d..45c7dc78 100644 --- a/docs/lsx/misc.md +++ b/docs/lsx/misc.md @@ -15,6 +15,11 @@ {{ vinsgr2vr('w') }} {{ vinsgr2vr('d') }} +{{ vmskltz('b') }} +{{ vmskltz('h') }} +{{ vmskltz('w') }} +{{ vmskltz('d') }} + {{ vpackev('b') }} {{ vpackev('h') }} {{ vpackev('w') }} diff --git a/main.py b/main.py index 90cde345..f2388196 100644 --- a/main.py +++ b/main.py @@ -816,4 +816,13 @@ def vneg(name): intrinsic=f"__m128i __lsx_vneg_{name} (__m128i a)", instr=f"vneg.{name} vr, vr", desc=f"Negate {width}-bit elements in `a` and save the result in `dst`.", + ) + + @env.macro + def vmskltz(name): + width = widths[name] + return instruction( + intrinsic=f"__m128i __lsx_vmskltz_{name} (__m128i a)", + instr=f"vmskltz.{name} vr, vr", + desc=f"For each {width}-bit element in `a`, if the element is less than zero, set one bit in `dst`, otherwise clear it.", ) \ No newline at end of file