Skip to content

Commit

Permalink
Add vpcnt
Browse files Browse the repository at this point in the history
  • Loading branch information
jiegec committed Dec 12, 2023
1 parent f2eb90a commit f7112a7
Show file tree
Hide file tree
Showing 14 changed files with 81 additions and 7 deletions.
8 changes: 1 addition & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,6 @@ Vector Multiplication High

### vfrstpi.b/h

### vclo.b/h/w/d

### vclz.b/h/w/d

### vpcnt.b/h/w/d

### vneg.b/h/w/d

### vmskltz.b/h/w/d
Expand Down Expand Up @@ -248,7 +242,7 @@ Vector Multiplication High

### vssrlni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q

### vssrlrni.b.h/h.w/w.d/d.q/bu.h/bhu.w/wu.d/du.q
### vssrlrni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q

### vsrani.b.h/h.w/w.d/d.q

Expand Down
10 changes: 10 additions & 0 deletions code/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,16 @@ template <typename T> u8 clz(T num) {
return sizeof(T) * 8;
}

template <typename T> u8 popcount(T num) {
u8 res = 0;
for (int i = sizeof(T) * 8 - 1; i >= 0; i--) {
if ((num & ((T)1 << i)) != 0) {
res++;
}
}
return res;
}

using std::max;
using std::min;

Expand Down
7 changes: 7 additions & 0 deletions code/gen_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,6 +278,13 @@
file=f,
)
print(f"}}", file=f)
with open(f"vpcnt_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = popcount(a.{m}[i]);",
file=f,
)
print(f"}}", file=f)
with open(f"vextrins_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
mask = 128 // w - 1
Expand Down
1 change: 1 addition & 0 deletions code/gen_tb.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
"vmul": (widths_signed, "v128 a, v128 b"),
"vmulwev": (widths_vaddw, "v128 a, v128 b"),
"vmulwod": (widths_vaddw, "v128 a, v128 b"),
"vpcnt": (widths_signed, "v128 a"),
"vsub": (widths_signed, "v128 a, v128 b"),
"vsubwev": (widths_vsubw, "v128 a, v128 b"),
"vsubwod": (widths_vsubw, "v128 a, v128 b"),
Expand Down
9 changes: 9 additions & 0 deletions code/vpcnt_b.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vpcnt_b(v128 a) {
v128 dst;
#include "vpcnt_b.h"
return dst;
}

void test() { FUZZ1(vpcnt_b); }
3 changes: 3 additions & 0 deletions code/vpcnt_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] = popcount(a.byte[i]);
}
9 changes: 9 additions & 0 deletions code/vpcnt_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vpcnt_d(v128 a) {
v128 dst;
#include "vpcnt_d.h"
return dst;
}

void test() { FUZZ1(vpcnt_d); }
3 changes: 3 additions & 0 deletions code/vpcnt_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = popcount(a.dword[i]);
}
9 changes: 9 additions & 0 deletions code/vpcnt_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vpcnt_h(v128 a) {
v128 dst;
#include "vpcnt_h.h"
return dst;
}

void test() { FUZZ1(vpcnt_h); }
3 changes: 3 additions & 0 deletions code/vpcnt_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = popcount(a.half[i]);
}
9 changes: 9 additions & 0 deletions code/vpcnt_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vpcnt_w(v128 a) {
v128 dst;
#include "vpcnt_w.h"
return dst;
}

void test() { FUZZ1(vpcnt_w); }
3 changes: 3 additions & 0 deletions code/vpcnt_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = popcount(a.word[i]);
}
5 changes: 5 additions & 0 deletions docs/lsx/bitwise_operations.md
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,8 @@ Compute bitwise selection: for each bit position, if the bit in `a` equals to on
{{ vextrins('h') }}
{{ vextrins('w') }}
{{ vextrins('d') }}

{{ vpcnt('b') }}
{{ vpcnt('h') }}
{{ vpcnt('w') }}
{{ vpcnt('d') }}
9 changes: 9 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -524,4 +524,13 @@ def vmsub(name):
intrinsic=f"__m128i __lsx_vmsub_{name} (__m128i a, __m128i b, __m128i c)",
instr=f"vmsub.{name} vr, vr, vr",
desc=f"Multiply {width}-bit elements in `b` and `c`, negate and add elements in `a`, save the result in `dst`.",
)

@env.macro
def vpcnt(name):
width = widths[name]
return instruction(
intrinsic=f"__m128i __lsx_pcnt_{name} (__m128i a)",
instr=f"vpcnt.{name} vr, vr",
desc=f"Count the number of ones in {width}-bit elements in `a`.",
)

0 comments on commit f7112a7

Please sign in to comment.