From f7112a72b73c77c3e341bda426560ae547a51117 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Tue, 12 Dec 2023 20:50:25 +0800 Subject: [PATCH] Add vpcnt --- README.md | 8 +------- code/common.h | 10 ++++++++++ code/gen_impl.py | 7 +++++++ code/gen_tb.py | 1 + code/vpcnt_b.cpp | 9 +++++++++ code/vpcnt_b.h | 3 +++ code/vpcnt_d.cpp | 9 +++++++++ code/vpcnt_d.h | 3 +++ code/vpcnt_h.cpp | 9 +++++++++ code/vpcnt_h.h | 3 +++ code/vpcnt_w.cpp | 9 +++++++++ code/vpcnt_w.h | 3 +++ docs/lsx/bitwise_operations.md | 5 +++++ main.py | 9 +++++++++ 14 files changed, 81 insertions(+), 7 deletions(-) create mode 100644 code/vpcnt_b.cpp create mode 100644 code/vpcnt_b.h create mode 100644 code/vpcnt_d.cpp create mode 100644 code/vpcnt_d.h create mode 100644 code/vpcnt_h.cpp create mode 100644 code/vpcnt_h.h create mode 100644 code/vpcnt_w.cpp create mode 100644 code/vpcnt_w.h diff --git a/README.md b/README.md index 604f9998..66365b40 100644 --- a/README.md +++ b/README.md @@ -132,12 +132,6 @@ Vector Multiplication High ### vfrstpi.b/h -### vclo.b/h/w/d - -### vclz.b/h/w/d - -### vpcnt.b/h/w/d - ### vneg.b/h/w/d ### vmskltz.b/h/w/d @@ -248,7 +242,7 @@ Vector Multiplication High ### vssrlni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q -### vssrlrni.b.h/h.w/w.d/d.q/bu.h/bhu.w/wu.d/du.q +### vssrlrni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q ### vsrani.b.h/h.w/w.d/d.q diff --git a/code/common.h b/code/common.h index e3c5e0c9..6090f758 100644 --- a/code/common.h +++ b/code/common.h @@ -39,6 +39,16 @@ template u8 clz(T num) { return sizeof(T) * 8; } +template u8 popcount(T num) { + u8 res = 0; + for (int i = sizeof(T) * 8 - 1; i >= 0; i--) { + if ((num & ((T)1 << i)) != 0) { + res++; + } + } + return res; +} + using std::max; using std::min; diff --git a/code/gen_impl.py b/code/gen_impl.py index 0e8a61a8..503522c7 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -278,6 +278,13 @@ file=f, ) print(f"}}", file=f) + with open(f"vpcnt_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = popcount(a.{m}[i]);", + file=f, + ) + print(f"}}", file=f) with open(f"vextrins_{width}.h", "w") as f: print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) mask = 128 // w - 1 diff --git a/code/gen_tb.py b/code/gen_tb.py index c53d57e0..83fbd8af 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -72,6 +72,7 @@ "vmul": (widths_signed, "v128 a, v128 b"), "vmulwev": (widths_vaddw, "v128 a, v128 b"), "vmulwod": (widths_vaddw, "v128 a, v128 b"), + "vpcnt": (widths_signed, "v128 a"), "vsub": (widths_signed, "v128 a, v128 b"), "vsubwev": (widths_vsubw, "v128 a, v128 b"), "vsubwod": (widths_vsubw, "v128 a, v128 b"), diff --git a/code/vpcnt_b.cpp b/code/vpcnt_b.cpp new file mode 100644 index 00000000..701811b2 --- /dev/null +++ b/code/vpcnt_b.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vpcnt_b(v128 a) { + v128 dst; +#include "vpcnt_b.h" + return dst; +} + +void test() { FUZZ1(vpcnt_b); } diff --git a/code/vpcnt_b.h b/code/vpcnt_b.h new file mode 100644 index 00000000..529750c7 --- /dev/null +++ b/code/vpcnt_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = popcount(a.byte[i]); +} diff --git a/code/vpcnt_d.cpp b/code/vpcnt_d.cpp new file mode 100644 index 00000000..9dfb88be --- /dev/null +++ b/code/vpcnt_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vpcnt_d(v128 a) { + v128 dst; +#include "vpcnt_d.h" + return dst; +} + +void test() { FUZZ1(vpcnt_d); } diff --git a/code/vpcnt_d.h b/code/vpcnt_d.h new file mode 100644 index 00000000..4ae85315 --- /dev/null +++ b/code/vpcnt_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = popcount(a.dword[i]); +} diff --git a/code/vpcnt_h.cpp b/code/vpcnt_h.cpp new file mode 100644 index 00000000..666f65eb --- /dev/null +++ b/code/vpcnt_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vpcnt_h(v128 a) { + v128 dst; +#include "vpcnt_h.h" + return dst; +} + +void test() { FUZZ1(vpcnt_h); } diff --git a/code/vpcnt_h.h b/code/vpcnt_h.h new file mode 100644 index 00000000..2e751d4d --- /dev/null +++ b/code/vpcnt_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = popcount(a.half[i]); +} diff --git a/code/vpcnt_w.cpp b/code/vpcnt_w.cpp new file mode 100644 index 00000000..fbc0719b --- /dev/null +++ b/code/vpcnt_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vpcnt_w(v128 a) { + v128 dst; +#include "vpcnt_w.h" + return dst; +} + +void test() { FUZZ1(vpcnt_w); } diff --git a/code/vpcnt_w.h b/code/vpcnt_w.h new file mode 100644 index 00000000..b33f0872 --- /dev/null +++ b/code/vpcnt_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = popcount(a.word[i]); +} diff --git a/docs/lsx/bitwise_operations.md b/docs/lsx/bitwise_operations.md index aac9c42e..a40d243e 100644 --- a/docs/lsx/bitwise_operations.md +++ b/docs/lsx/bitwise_operations.md @@ -98,3 +98,8 @@ Compute bitwise selection: for each bit position, if the bit in `a` equals to on {{ vextrins('h') }} {{ vextrins('w') }} {{ vextrins('d') }} + +{{ vpcnt('b') }} +{{ vpcnt('h') }} +{{ vpcnt('w') }} +{{ vpcnt('d') }} \ No newline at end of file diff --git a/main.py b/main.py index 39db0e8a..2af30f41 100644 --- a/main.py +++ b/main.py @@ -524,4 +524,13 @@ def vmsub(name): intrinsic=f"__m128i __lsx_vmsub_{name} (__m128i a, __m128i b, __m128i c)", instr=f"vmsub.{name} vr, vr, vr", desc=f"Multiply {width}-bit elements in `b` and `c`, negate and add elements in `a`, save the result in `dst`.", + ) + + @env.macro + def vpcnt(name): + width = widths[name] + return instruction( + intrinsic=f"__m128i __lsx_pcnt_{name} (__m128i a)", + instr=f"vpcnt.{name} vr, vr", + desc=f"Count the number of ones in {width}-bit elements in `a`.", ) \ No newline at end of file