Skip to content

Commit

Permalink
Add vclo/vclz
Browse files Browse the repository at this point in the history
  • Loading branch information
jiegec committed Dec 12, 2023
1 parent 86c704a commit 4bf308a
Show file tree
Hide file tree
Showing 22 changed files with 160 additions and 2 deletions.
2 changes: 1 addition & 1 deletion code/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ run: all
clean:
rm -rf $(EXES)

%: %.cpp $(HDRS)
%: %.cpp %.h
$(CXX) $< -mlsx -mlasx -o $@
18 changes: 18 additions & 0 deletions code/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,24 @@ typedef unsigned __int128 u128;
#define MACHINE_3C5000 0
#endif

template <typename T> u8 clo(T num) {
for (int i = sizeof(T) * 8 - 1; i >= 0; i--) {
if ((num & ((T)1 << i)) == 0) {
return sizeof(T) * 8 - 1 - i;
}
}
return sizeof(T) * 8;
}

template <typename T> u8 clz(T num) {
for (int i = sizeof(T) * 8 - 1; i >= 0; i--) {
if ((num & ((T)1 << i)) != 0) {
return sizeof(T) * 8 - 1 - i;
}
}
return sizeof(T) * 8;
}

union v128 {
__m128i m128i;
__m128 m128;
Expand Down
14 changes: 14 additions & 0 deletions code/gen_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,5 +140,19 @@
file=f,
)
print(f"}}", file=f)
with open(f"vclo_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = clo(a.{m}[i]);",
file=f,
)
print(f"}}", file=f)
with open(f"vclz_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = clz(a.{m}[i]);",
file=f,
)
print(f"}}", file=f)

os.system("clang-format -i *.cpp *.h")
2 changes: 2 additions & 0 deletions code/gen_tb.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@
"vbitseti": (widths_signed, "v128 a, int imm", [0, 3, 7]),
"vbitrev": (widths_signed, "v128 a, v128 b"),
"vbitrevi": (widths_signed, "v128 a, int imm", [0, 3, 7]),
"vclo": (widths_signed, "v128 a"),
"vclz": (widths_signed, "v128 a"),
}

for name in tb:
Expand Down
9 changes: 9 additions & 0 deletions code/vclo_b.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vclo_b(v128 a) {
v128 dst;
#include "vclo_b.h"
return dst;
}

void test() { FUZZ1(vclo_b); }
3 changes: 3 additions & 0 deletions code/vclo_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] = clo(a.byte[i]);
}
9 changes: 9 additions & 0 deletions code/vclo_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vclo_d(v128 a) {
v128 dst;
#include "vclo_d.h"
return dst;
}

void test() { FUZZ1(vclo_d); }
3 changes: 3 additions & 0 deletions code/vclo_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = clo(a.dword[i]);
}
9 changes: 9 additions & 0 deletions code/vclo_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vclo_h(v128 a) {
v128 dst;
#include "vclo_h.h"
return dst;
}

void test() { FUZZ1(vclo_h); }
3 changes: 3 additions & 0 deletions code/vclo_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = clo(a.half[i]);
}
9 changes: 9 additions & 0 deletions code/vclo_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vclo_w(v128 a) {
v128 dst;
#include "vclo_w.h"
return dst;
}

void test() { FUZZ1(vclo_w); }
3 changes: 3 additions & 0 deletions code/vclo_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = clo(a.word[i]);
}
9 changes: 9 additions & 0 deletions code/vclz_b.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vclz_b(v128 a) {
v128 dst;
#include "vclz_b.h"
return dst;
}

void test() { FUZZ1(vclz_b); }
3 changes: 3 additions & 0 deletions code/vclz_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] = clz(a.byte[i]);
}
9 changes: 9 additions & 0 deletions code/vclz_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vclz_d(v128 a) {
v128 dst;
#include "vclz_d.h"
return dst;
}

void test() { FUZZ1(vclz_d); }
3 changes: 3 additions & 0 deletions code/vclz_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = clz(a.dword[i]);
}
9 changes: 9 additions & 0 deletions code/vclz_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vclz_h(v128 a) {
v128 dst;
#include "vclz_h.h"
return dst;
}

void test() { FUZZ1(vclz_h); }
3 changes: 3 additions & 0 deletions code/vclz_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = clz(a.half[i]);
}
9 changes: 9 additions & 0 deletions code/vclz_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vclz_w(v128 a) {
v128 dst;
#include "vclz_w.h"
return dst;
}

void test() { FUZZ1(vclz_w); }
3 changes: 3 additions & 0 deletions code/vclz_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = clz(a.word[i]);
}
12 changes: 11 additions & 1 deletion docs/lsx_bitops/vbitwise.md
Original file line number Diff line number Diff line change
Expand Up @@ -70,4 +70,14 @@ Compute bitwise selection: for each bit position, if the bit in `a` equals to on
{{ vbitrevi('b') }}
{{ vbitrevi('h') }}
{{ vbitrevi('w') }}
{{ vbitrevi('d') }}
{{ vbitrevi('d') }}

{{ vclo('b') }}
{{ vclo('h') }}
{{ vclo('w') }}
{{ vclo('d') }}

{{ vclz('b') }}
{{ vclz('h') }}
{{ vclz('w') }}
{{ vclz('d') }}
18 changes: 18 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,24 @@ def vbitrevi(name):
desc=f"Toggle the bit specified by `imm` from {width}-bit elements in `a`, save the result in `dst`.",
)

@env.macro
def vclo(name):
width = widths[name]
return instruction(
intrinsic=f"__m128i __lsx_clo_{name} (__m128i a)",
instr=f"vclo.{name} vr, vr",
desc=f"Count leading ones of {width}-bit elements in `a`.",
)

@env.macro
def vclz(name):
width = widths[name]
return instruction(
intrinsic=f"__m128i __lsx_clz_{name} (__m128i a)",
instr=f"vclz.{name} vr, vr",
desc=f"Count leading zeros of {width}-bit elements in `a`.",
)

@env.macro
def vshuf_hwd(name):
width = widths[name]
Expand Down

0 comments on commit 4bf308a

Please sign in to comment.