From 2161b5577082b60f947ab83154e2a8a055ad3fe7 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Tue, 12 Dec 2023 15:16:40 +0800 Subject: [PATCH] Add vbitclr --- README.md | 4 ---- code/common.h | 9 +++++++++ code/gen_impl.py | 20 +++++++++++++++++++- code/gen_tb.py | 10 +++++++++- code/vbitclr_b.cpp | 9 +++++++++ code/vbitclr_b.h | 3 +++ code/vbitclr_d.cpp | 9 +++++++++ code/vbitclr_d.h | 3 +++ code/vbitclr_h.cpp | 9 +++++++++ code/vbitclr_h.h | 3 +++ code/vbitclr_w.cpp | 9 +++++++++ code/vbitclr_w.h | 3 +++ code/vbitclri_b.cpp | 13 +++++++++++++ code/vbitclri_b.h | 3 +++ code/vbitclri_d.cpp | 13 +++++++++++++ code/vbitclri_d.h | 3 +++ code/vbitclri_h.cpp | 13 +++++++++++++ code/vbitclri_h.h | 3 +++ code/vbitclri_w.cpp | 13 +++++++++++++ code/vbitclri_w.h | 3 +++ docs/lsx_bitops/{vbitsel.md => vbitwise.md} | 12 +++++++++++- main.py | 19 +++++++++++++++++++ 22 files changed, 179 insertions(+), 7 deletions(-) create mode 100644 code/vbitclr_b.cpp create mode 100644 code/vbitclr_b.h create mode 100644 code/vbitclr_d.cpp create mode 100644 code/vbitclr_d.h create mode 100644 code/vbitclr_h.cpp create mode 100644 code/vbitclr_h.h create mode 100644 code/vbitclr_w.cpp create mode 100644 code/vbitclr_w.h create mode 100644 code/vbitclri_b.cpp create mode 100644 code/vbitclri_b.h create mode 100644 code/vbitclri_d.cpp create mode 100644 code/vbitclri_d.h create mode 100644 code/vbitclri_h.cpp create mode 100644 code/vbitclri_h.h create mode 100644 code/vbitclri_w.cpp create mode 100644 code/vbitclri_w.h rename docs/lsx_bitops/{vbitsel.md => vbitwise.md} (70%) diff --git a/README.md b/README.md index 53ee95d8..bbd07c23 100644 --- a/README.md +++ b/README.md @@ -162,8 +162,6 @@ Vector Multiplication High ### vssrarn.bu.h/hu.w/wu.d -### vbitclr.b/h/w/d - ### vbitset.b/h/w/d ### vbitrev.b/h/w/d @@ -366,8 +364,6 @@ Vector Multiplication High ### vextl.qu.du -### vbitclri.b/h/w/d - ### vbitseti.b/h/w/d ### vbitrevi.b/h/w/d diff --git a/code/common.h b/code/common.h index e8eb4b8c..d8aa5251 100644 --- a/code/common.h +++ b/code/common.h @@ -24,8 +24,12 @@ union v128 { __m128i m128i; __m128 m128; __m128d m128d; + v2i64 __v2i64; + v2u64 __v2u64; v4i32 __v4i32; + v4u32 __v4u32; v8i16 __v8i16; + v8u16 __v8u16; v16i8 __v16i8; v16u8 __v16u8; @@ -43,8 +47,13 @@ union v128 { } operator __m128i() { return m128i; } + // duplicate with __m128i + // operator v2i64() { return __v2i64; } + operator v2u64() { return __v2u64; } operator v4i32() { return __v4i32; } + operator v4u32() { return __v4u32; } operator v8i16() { return __v8i16; } + operator v8u16() { return __v8u16; } operator v16i8() { return __v16i8; } operator v16u8() { return __v16u8; } bool operator==(const v128 &other) const { diff --git a/code/gen_impl.py b/code/gen_impl.py index b16b27dd..629617b3 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -95,4 +95,22 @@ ) print(f"}}", file=f) -os.system("clang-format -i *.cpp *.h") \ No newline at end of file +for width in ["b", "h", "w", "d"]: + w = widths[width] + m = members[width] + with open(f"vbitclr_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = a.{m}[i] & (~((u{w})1 << (b.{m}[i] % {w})));", + file=f, + ) + print(f"}}", file=f) + with open(f"vbitclri_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = a.{m}[i] & (~((u{w})1 << imm));", + file=f, + ) + print(f"}}", file=f) + +os.system("clang-format -i *.cpp *.h") diff --git a/code/gen_tb.py b/code/gen_tb.py index 9c35978c..788cf604 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -1,5 +1,6 @@ import os +widths_signed = ["b", "h", "w", "d"] widths_all = ["b", "bu", "h", "hu", "w", "wu", "d", "du"] widths_vaddw = [ "h_b", @@ -17,10 +18,13 @@ ] tb = { + # widths, args, extra args for imm "vavg": (widths_all, "v128 a, v128 b"), "vavgr": (widths_all, "v128 a, v128 b"), "vaddwev": (widths_vaddw, "v128 a, v128 b"), "vaddwod": (widths_vaddw, "v128 a, v128 b"), + "vbitclr": (widths_signed, "v128 a, v128 b"), + "vbitclri": (widths_signed, "v128 a, int imm", [0, 3, 7]), } for name in tb: @@ -47,7 +51,11 @@ print("}", file=f) print("", file=f) print("void test() {", file=f) - print(f" FUZZ{fuzz_args}({inst_name});", file=f) + if len(t) >= 3: + for imm in t[2]: + print(f" FUZZ{fuzz_args}({inst_name}, {imm});", file=f) + else: + print(f" FUZZ{fuzz_args}({inst_name});", file=f) print("}", file=f) os.system("clang-format -i *.cpp *.h") \ No newline at end of file diff --git a/code/vbitclr_b.cpp b/code/vbitclr_b.cpp new file mode 100644 index 00000000..9563b70d --- /dev/null +++ b/code/vbitclr_b.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vbitclr_b(v128 a, v128 b) { + v128 dst; +#include "vbitclr_b.h" + return dst; +} + +void test() { FUZZ2(vbitclr_b); } diff --git a/code/vbitclr_b.h b/code/vbitclr_b.h new file mode 100644 index 00000000..41dd1306 --- /dev/null +++ b/code/vbitclr_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = a.byte[i] & (~((u8)1 << (b.byte[i] % 8))); +} diff --git a/code/vbitclr_d.cpp b/code/vbitclr_d.cpp new file mode 100644 index 00000000..2fc6b30c --- /dev/null +++ b/code/vbitclr_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vbitclr_d(v128 a, v128 b) { + v128 dst; +#include "vbitclr_d.h" + return dst; +} + +void test() { FUZZ2(vbitclr_d); } diff --git a/code/vbitclr_d.h b/code/vbitclr_d.h new file mode 100644 index 00000000..d11cf70a --- /dev/null +++ b/code/vbitclr_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = a.dword[i] & (~((u64)1 << (b.dword[i] % 64))); +} diff --git a/code/vbitclr_h.cpp b/code/vbitclr_h.cpp new file mode 100644 index 00000000..764a0beb --- /dev/null +++ b/code/vbitclr_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vbitclr_h(v128 a, v128 b) { + v128 dst; +#include "vbitclr_h.h" + return dst; +} + +void test() { FUZZ2(vbitclr_h); } diff --git a/code/vbitclr_h.h b/code/vbitclr_h.h new file mode 100644 index 00000000..63de30e0 --- /dev/null +++ b/code/vbitclr_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = a.half[i] & (~((u16)1 << (b.half[i] % 16))); +} diff --git a/code/vbitclr_w.cpp b/code/vbitclr_w.cpp new file mode 100644 index 00000000..db1aa156 --- /dev/null +++ b/code/vbitclr_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vbitclr_w(v128 a, v128 b) { + v128 dst; +#include "vbitclr_w.h" + return dst; +} + +void test() { FUZZ2(vbitclr_w); } diff --git a/code/vbitclr_w.h b/code/vbitclr_w.h new file mode 100644 index 00000000..341f6087 --- /dev/null +++ b/code/vbitclr_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = a.word[i] & (~((u32)1 << (b.word[i] % 32))); +} diff --git a/code/vbitclri_b.cpp b/code/vbitclri_b.cpp new file mode 100644 index 00000000..9e596c01 --- /dev/null +++ b/code/vbitclri_b.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vbitclri_b(v128 a, int imm) { + v128 dst; +#include "vbitclri_b.h" + return dst; +} + +void test() { + FUZZ1(vbitclri_b, 0); + FUZZ1(vbitclri_b, 3); + FUZZ1(vbitclri_b, 7); +} diff --git a/code/vbitclri_b.h b/code/vbitclri_b.h new file mode 100644 index 00000000..228925b9 --- /dev/null +++ b/code/vbitclri_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = a.byte[i] & (~((u8)1 << imm)); +} diff --git a/code/vbitclri_d.cpp b/code/vbitclri_d.cpp new file mode 100644 index 00000000..4b0639e1 --- /dev/null +++ b/code/vbitclri_d.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vbitclri_d(v128 a, int imm) { + v128 dst; +#include "vbitclri_d.h" + return dst; +} + +void test() { + FUZZ1(vbitclri_d, 0); + FUZZ1(vbitclri_d, 3); + FUZZ1(vbitclri_d, 7); +} diff --git a/code/vbitclri_d.h b/code/vbitclri_d.h new file mode 100644 index 00000000..1811b34d --- /dev/null +++ b/code/vbitclri_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = a.dword[i] & (~((u64)1 << imm)); +} diff --git a/code/vbitclri_h.cpp b/code/vbitclri_h.cpp new file mode 100644 index 00000000..34de7df7 --- /dev/null +++ b/code/vbitclri_h.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vbitclri_h(v128 a, int imm) { + v128 dst; +#include "vbitclri_h.h" + return dst; +} + +void test() { + FUZZ1(vbitclri_h, 0); + FUZZ1(vbitclri_h, 3); + FUZZ1(vbitclri_h, 7); +} diff --git a/code/vbitclri_h.h b/code/vbitclri_h.h new file mode 100644 index 00000000..951006d6 --- /dev/null +++ b/code/vbitclri_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = a.half[i] & (~((u16)1 << imm)); +} diff --git a/code/vbitclri_w.cpp b/code/vbitclri_w.cpp new file mode 100644 index 00000000..bf7e713d --- /dev/null +++ b/code/vbitclri_w.cpp @@ -0,0 +1,13 @@ +#include "common.h" + +v128 vbitclri_w(v128 a, int imm) { + v128 dst; +#include "vbitclri_w.h" + return dst; +} + +void test() { + FUZZ1(vbitclri_w, 0); + FUZZ1(vbitclri_w, 3); + FUZZ1(vbitclri_w, 7); +} diff --git a/code/vbitclri_w.h b/code/vbitclri_w.h new file mode 100644 index 00000000..73bc04b2 --- /dev/null +++ b/code/vbitclri_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = a.word[i] & (~((u32)1 << imm)); +} diff --git a/docs/lsx_bitops/vbitsel.md b/docs/lsx_bitops/vbitwise.md similarity index 70% rename from docs/lsx_bitops/vbitsel.md rename to docs/lsx_bitops/vbitwise.md index b6127091..b858a5c4 100644 --- a/docs/lsx_bitops/vbitsel.md +++ b/docs/lsx_bitops/vbitwise.md @@ -1,4 +1,4 @@ -# Bitwise Selection +# Bitwise Operations ## __m128i __lsx_vbitsel_v (__m128i a, __m128i b, __m128i c) @@ -20,3 +20,13 @@ Compute bitwise selection: for each bit position, if the bit in `c` equals to on ```c++ {% include 'vbitsel_v.h' %} ``` + +{{ vbitclr('b') }} +{{ vbitclr('h') }} +{{ vbitclr('w') }} +{{ vbitclr('d') }} + +{{ vbitclri('b') }} +{{ vbitclri('h') }} +{{ vbitclri('w') }} +{{ vbitclri('d') }} \ No newline at end of file diff --git a/main.py b/main.py index 728e89d5..b2a97e58 100644 --- a/main.py +++ b/main.py @@ -156,6 +156,25 @@ def vavgr(name): desc=f"Compute the average (rounded towards positive infinity) of {signedness} {width}-bit elements in `a` and `b`, save the result in `dst`.", ) + @env.macro + def vbitclr(name): + width = widths[name] + return instruction( + intrinsic=f"__m128i __lsx_vbitclr_{name} (__m128i a, __m128i b)", + instr=f"vbitclr.{name} vr, vr, vr", + desc=f"Clear the bit specified by elements in `b` from {width}-bit elements in `a`, save the result in `dst`.", + ) + + @env.macro + def vbitclri(name): + width = widths[name] + imm_upper = width - 1 + return instruction( + intrinsic=f"__m128i __lsx_vbitclri_{name} (__m128i a, imm0_{imm_upper} imm)", + instr=f"vbitclri.{name} vr, vr, imm", + desc=f"Clear the bit specified by `imm` from {width}-bit elements in `a`, save the result in `dst`.", + ) + @env.macro def vshuf_hwd(name): width = widths[name]