diff --git a/code/Makefile b/code/Makefile index 1302c7bb..0c9a15eb 100644 --- a/code/Makefile +++ b/code/Makefile @@ -11,5 +11,5 @@ run: all clean: rm -rf $(EXES) -%: %.cpp $(HDRS) +%: %.cpp %.h $(CXX) $< -mlsx -mlasx -o $@ diff --git a/code/common.h b/code/common.h index d8aa5251..e539a5c0 100644 --- a/code/common.h +++ b/code/common.h @@ -20,6 +20,24 @@ typedef unsigned __int128 u128; #define MACHINE_3C5000 0 #endif +template u8 clo(T num) { + for (int i = sizeof(T) * 8 - 1; i >= 0; i--) { + if ((num & ((T)1 << i)) == 0) { + return sizeof(T) * 8 - 1 - i; + } + } + return sizeof(T) * 8; +} + +template u8 clz(T num) { + for (int i = sizeof(T) * 8 - 1; i >= 0; i--) { + if ((num & ((T)1 << i)) != 0) { + return sizeof(T) * 8 - 1 - i; + } + } + return sizeof(T) * 8; +} + union v128 { __m128i m128i; __m128 m128; diff --git a/code/gen_impl.py b/code/gen_impl.py index cb268bb5..76856d70 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -140,5 +140,19 @@ file=f, ) print(f"}}", file=f) + with open(f"vclo_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = clo(a.{m}[i]);", + file=f, + ) + print(f"}}", file=f) + with open(f"vclz_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = clz(a.{m}[i]);", + file=f, + ) + print(f"}}", file=f) os.system("clang-format -i *.cpp *.h") diff --git a/code/gen_tb.py b/code/gen_tb.py index 38079fd2..1fd2cfbf 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -29,6 +29,8 @@ "vbitseti": (widths_signed, "v128 a, int imm", [0, 3, 7]), "vbitrev": (widths_signed, "v128 a, v128 b"), "vbitrevi": (widths_signed, "v128 a, int imm", [0, 3, 7]), + "vclo": (widths_signed, "v128 a"), + "vclz": (widths_signed, "v128 a"), } for name in tb: diff --git a/code/vclo_b.cpp b/code/vclo_b.cpp new file mode 100644 index 00000000..14f90bba --- /dev/null +++ b/code/vclo_b.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vclo_b(v128 a) { + v128 dst; +#include "vclo_b.h" + return dst; +} + +void test() { FUZZ1(vclo_b); } diff --git a/code/vclo_b.h b/code/vclo_b.h new file mode 100644 index 00000000..a8bf2b59 --- /dev/null +++ b/code/vclo_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = clo(a.byte[i]); +} diff --git a/code/vclo_d.cpp b/code/vclo_d.cpp new file mode 100644 index 00000000..75d7dc77 --- /dev/null +++ b/code/vclo_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vclo_d(v128 a) { + v128 dst; +#include "vclo_d.h" + return dst; +} + +void test() { FUZZ1(vclo_d); } diff --git a/code/vclo_d.h b/code/vclo_d.h new file mode 100644 index 00000000..55f85ac0 --- /dev/null +++ b/code/vclo_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = clo(a.dword[i]); +} diff --git a/code/vclo_h.cpp b/code/vclo_h.cpp new file mode 100644 index 00000000..532895b2 --- /dev/null +++ b/code/vclo_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vclo_h(v128 a) { + v128 dst; +#include "vclo_h.h" + return dst; +} + +void test() { FUZZ1(vclo_h); } diff --git a/code/vclo_h.h b/code/vclo_h.h new file mode 100644 index 00000000..e3a35d47 --- /dev/null +++ b/code/vclo_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = clo(a.half[i]); +} diff --git a/code/vclo_w.cpp b/code/vclo_w.cpp new file mode 100644 index 00000000..3245a9fa --- /dev/null +++ b/code/vclo_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vclo_w(v128 a) { + v128 dst; +#include "vclo_w.h" + return dst; +} + +void test() { FUZZ1(vclo_w); } diff --git a/code/vclo_w.h b/code/vclo_w.h new file mode 100644 index 00000000..2e54618c --- /dev/null +++ b/code/vclo_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = clo(a.word[i]); +} diff --git a/code/vclz_b.cpp b/code/vclz_b.cpp new file mode 100644 index 00000000..cec0e859 --- /dev/null +++ b/code/vclz_b.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vclz_b(v128 a) { + v128 dst; +#include "vclz_b.h" + return dst; +} + +void test() { FUZZ1(vclz_b); } diff --git a/code/vclz_b.h b/code/vclz_b.h new file mode 100644 index 00000000..93721904 --- /dev/null +++ b/code/vclz_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = clz(a.byte[i]); +} diff --git a/code/vclz_d.cpp b/code/vclz_d.cpp new file mode 100644 index 00000000..f758c0ef --- /dev/null +++ b/code/vclz_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vclz_d(v128 a) { + v128 dst; +#include "vclz_d.h" + return dst; +} + +void test() { FUZZ1(vclz_d); } diff --git a/code/vclz_d.h b/code/vclz_d.h new file mode 100644 index 00000000..554c3202 --- /dev/null +++ b/code/vclz_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = clz(a.dword[i]); +} diff --git a/code/vclz_h.cpp b/code/vclz_h.cpp new file mode 100644 index 00000000..e60ae204 --- /dev/null +++ b/code/vclz_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vclz_h(v128 a) { + v128 dst; +#include "vclz_h.h" + return dst; +} + +void test() { FUZZ1(vclz_h); } diff --git a/code/vclz_h.h b/code/vclz_h.h new file mode 100644 index 00000000..5aa2e242 --- /dev/null +++ b/code/vclz_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = clz(a.half[i]); +} diff --git a/code/vclz_w.cpp b/code/vclz_w.cpp new file mode 100644 index 00000000..e1871f84 --- /dev/null +++ b/code/vclz_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vclz_w(v128 a) { + v128 dst; +#include "vclz_w.h" + return dst; +} + +void test() { FUZZ1(vclz_w); } diff --git a/code/vclz_w.h b/code/vclz_w.h new file mode 100644 index 00000000..8b4bbd2b --- /dev/null +++ b/code/vclz_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = clz(a.word[i]); +} diff --git a/docs/lsx_bitops/vbitwise.md b/docs/lsx_bitops/vbitwise.md index cf3496b6..3cf7bb65 100644 --- a/docs/lsx_bitops/vbitwise.md +++ b/docs/lsx_bitops/vbitwise.md @@ -70,4 +70,14 @@ Compute bitwise selection: for each bit position, if the bit in `a` equals to on {{ vbitrevi('b') }} {{ vbitrevi('h') }} {{ vbitrevi('w') }} -{{ vbitrevi('d') }} \ No newline at end of file +{{ vbitrevi('d') }} + +{{ vclo('b') }} +{{ vclo('h') }} +{{ vclo('w') }} +{{ vclo('d') }} + +{{ vclz('b') }} +{{ vclz('h') }} +{{ vclz('w') }} +{{ vclz('d') }} \ No newline at end of file diff --git a/main.py b/main.py index 014686cd..d0ea8c31 100644 --- a/main.py +++ b/main.py @@ -213,6 +213,24 @@ def vbitrevi(name): desc=f"Toggle the bit specified by `imm` from {width}-bit elements in `a`, save the result in `dst`.", ) + @env.macro + def vclo(name): + width = widths[name] + return instruction( + intrinsic=f"__m128i __lsx_clo_{name} (__m128i a)", + instr=f"vclo.{name} vr, vr", + desc=f"Count leading ones of {width}-bit elements in `a`.", + ) + + @env.macro + def vclz(name): + width = widths[name] + return instruction( + intrinsic=f"__m128i __lsx_clz_{name} (__m128i a)", + instr=f"vclz.{name} vr, vr", + desc=f"Count leading zeros of {width}-bit elements in `a`.", + ) + @env.macro def vshuf_hwd(name): width = widths[name]