From 150730aa580e2bf540eb732a736f5d1ab4f04a34 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Tue, 12 Dec 2023 16:03:25 +0800 Subject: [PATCH] Add vdiv --- code/gen_impl.py | 7 +++++++ code/gen_tb.py | 1 + code/vdiv_b.cpp | 9 +++++++++ code/vdiv_b.h | 3 +++ code/vdiv_bu.cpp | 9 +++++++++ code/vdiv_bu.h | 3 +++ code/vdiv_d.cpp | 9 +++++++++ code/vdiv_d.h | 3 +++ code/vdiv_du.cpp | 9 +++++++++ code/vdiv_du.h | 3 +++ code/vdiv_h.cpp | 9 +++++++++ code/vdiv_h.h | 3 +++ code/vdiv_hu.cpp | 9 +++++++++ code/vdiv_hu.h | 3 +++ code/vdiv_w.cpp | 9 +++++++++ code/vdiv_w.h | 3 +++ code/vdiv_wu.cpp | 9 +++++++++ code/vdiv_wu.h | 3 +++ docs/lsx_integer/{vaddsub.md => computation.md} | 13 +++++++++++-- docs/lsx_integer/{vshift.md => shift.md} | 2 +- main.py | 9 +++++++++ 21 files changed, 125 insertions(+), 3 deletions(-) create mode 100644 code/vdiv_b.cpp create mode 100644 code/vdiv_b.h create mode 100644 code/vdiv_bu.cpp create mode 100644 code/vdiv_bu.h create mode 100644 code/vdiv_d.cpp create mode 100644 code/vdiv_d.h create mode 100644 code/vdiv_du.cpp create mode 100644 code/vdiv_du.h create mode 100644 code/vdiv_h.cpp create mode 100644 code/vdiv_h.h create mode 100644 code/vdiv_hu.cpp create mode 100644 code/vdiv_hu.h create mode 100644 code/vdiv_w.cpp create mode 100644 code/vdiv_w.h create mode 100644 code/vdiv_wu.cpp create mode 100644 code/vdiv_wu.h rename docs/lsx_integer/{vaddsub.md => computation.md} (87%) rename docs/lsx_integer/{vshift.md => shift.md} (97%) diff --git a/code/gen_impl.py b/code/gen_impl.py index 76856d70..636f569b 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -58,6 +58,13 @@ file=f, ) print(f"}}", file=f) + with open(f"vdiv_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = (b.{m}[i] == 0) ? 0 : ({sign}{w})a.{m}[i] / (({sign}{w})b.{m}[i]);", + file=f, + ) + print(f"}}", file=f) for width in ["b", "bu", "h", "hu", "w", "wu", "d", "du"]: double_width = double_widths[width] diff --git a/code/gen_tb.py b/code/gen_tb.py index 1fd2cfbf..89e127c5 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -31,6 +31,7 @@ "vbitrevi": (widths_signed, "v128 a, int imm", [0, 3, 7]), "vclo": (widths_signed, "v128 a"), "vclz": (widths_signed, "v128 a"), + "vdiv": (widths_all, "v128 a, v128 b"), } for name in tb: diff --git a/code/vdiv_b.cpp b/code/vdiv_b.cpp new file mode 100644 index 00000000..58d99ad9 --- /dev/null +++ b/code/vdiv_b.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vdiv_b(v128 a, v128 b) { + v128 dst; +#include "vdiv_b.h" + return dst; +} + +void test() { FUZZ2(vdiv_b); } diff --git a/code/vdiv_b.h b/code/vdiv_b.h new file mode 100644 index 00000000..f237bebe --- /dev/null +++ b/code/vdiv_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = (b.byte[i] == 0) ? 0 : (s8)a.byte[i] / ((s8)b.byte[i]); +} diff --git a/code/vdiv_bu.cpp b/code/vdiv_bu.cpp new file mode 100644 index 00000000..10f2b17b --- /dev/null +++ b/code/vdiv_bu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vdiv_bu(v128 a, v128 b) { + v128 dst; +#include "vdiv_bu.h" + return dst; +} + +void test() { FUZZ2(vdiv_bu); } diff --git a/code/vdiv_bu.h b/code/vdiv_bu.h new file mode 100644 index 00000000..a0d7f69e --- /dev/null +++ b/code/vdiv_bu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = (b.byte[i] == 0) ? 0 : (u8)a.byte[i] / ((u8)b.byte[i]); +} diff --git a/code/vdiv_d.cpp b/code/vdiv_d.cpp new file mode 100644 index 00000000..afba63c5 --- /dev/null +++ b/code/vdiv_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vdiv_d(v128 a, v128 b) { + v128 dst; +#include "vdiv_d.h" + return dst; +} + +void test() { FUZZ2(vdiv_d); } diff --git a/code/vdiv_d.h b/code/vdiv_d.h new file mode 100644 index 00000000..0b0a263e --- /dev/null +++ b/code/vdiv_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (b.dword[i] == 0) ? 0 : (s64)a.dword[i] / ((s64)b.dword[i]); +} diff --git a/code/vdiv_du.cpp b/code/vdiv_du.cpp new file mode 100644 index 00000000..e7c548bf --- /dev/null +++ b/code/vdiv_du.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vdiv_du(v128 a, v128 b) { + v128 dst; +#include "vdiv_du.h" + return dst; +} + +void test() { FUZZ2(vdiv_du); } diff --git a/code/vdiv_du.h b/code/vdiv_du.h new file mode 100644 index 00000000..191deea0 --- /dev/null +++ b/code/vdiv_du.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (b.dword[i] == 0) ? 0 : (u64)a.dword[i] / ((u64)b.dword[i]); +} diff --git a/code/vdiv_h.cpp b/code/vdiv_h.cpp new file mode 100644 index 00000000..2c2c0f68 --- /dev/null +++ b/code/vdiv_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vdiv_h(v128 a, v128 b) { + v128 dst; +#include "vdiv_h.h" + return dst; +} + +void test() { FUZZ2(vdiv_h); } diff --git a/code/vdiv_h.h b/code/vdiv_h.h new file mode 100644 index 00000000..a979f7be --- /dev/null +++ b/code/vdiv_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = (b.half[i] == 0) ? 0 : (s16)a.half[i] / ((s16)b.half[i]); +} diff --git a/code/vdiv_hu.cpp b/code/vdiv_hu.cpp new file mode 100644 index 00000000..ec366f68 --- /dev/null +++ b/code/vdiv_hu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vdiv_hu(v128 a, v128 b) { + v128 dst; +#include "vdiv_hu.h" + return dst; +} + +void test() { FUZZ2(vdiv_hu); } diff --git a/code/vdiv_hu.h b/code/vdiv_hu.h new file mode 100644 index 00000000..75fabde1 --- /dev/null +++ b/code/vdiv_hu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = (b.half[i] == 0) ? 0 : (u16)a.half[i] / ((u16)b.half[i]); +} diff --git a/code/vdiv_w.cpp b/code/vdiv_w.cpp new file mode 100644 index 00000000..3eb56889 --- /dev/null +++ b/code/vdiv_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vdiv_w(v128 a, v128 b) { + v128 dst; +#include "vdiv_w.h" + return dst; +} + +void test() { FUZZ2(vdiv_w); } diff --git a/code/vdiv_w.h b/code/vdiv_w.h new file mode 100644 index 00000000..cfb2c1d4 --- /dev/null +++ b/code/vdiv_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (b.word[i] == 0) ? 0 : (s32)a.word[i] / ((s32)b.word[i]); +} diff --git a/code/vdiv_wu.cpp b/code/vdiv_wu.cpp new file mode 100644 index 00000000..d70737af --- /dev/null +++ b/code/vdiv_wu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vdiv_wu(v128 a, v128 b) { + v128 dst; +#include "vdiv_wu.h" + return dst; +} + +void test() { FUZZ2(vdiv_wu); } diff --git a/code/vdiv_wu.h b/code/vdiv_wu.h new file mode 100644 index 00000000..aa5540dd --- /dev/null +++ b/code/vdiv_wu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (b.word[i] == 0) ? 0 : (u32)a.word[i] / ((u32)b.word[i]); +} diff --git a/docs/lsx_integer/vaddsub.md b/docs/lsx_integer/computation.md similarity index 87% rename from docs/lsx_integer/vaddsub.md rename to docs/lsx_integer/computation.md index 506aca2e..6d5d81e2 100644 --- a/docs/lsx_integer/vaddsub.md +++ b/docs/lsx_integer/computation.md @@ -1,4 +1,4 @@ -# Add, Subtract, Absolute and Average +# Integer Computation {{ vadd('b') }} {{ vadd('h') }} @@ -67,4 +67,13 @@ {{ vavgr('w') }} {{ vavgr('wu') }} {{ vavgr('d') }} -{{ vavgr('du') }} \ No newline at end of file +{{ vavgr('du') }} + +{{ vdiv('b') }} +{{ vdiv('bu') }} +{{ vdiv('h') }} +{{ vdiv('hu') }} +{{ vdiv('w') }} +{{ vdiv('wu') }} +{{ vdiv('d') }} +{{ vdiv('du') }} \ No newline at end of file diff --git a/docs/lsx_integer/vshift.md b/docs/lsx_integer/shift.md similarity index 97% rename from docs/lsx_integer/vshift.md rename to docs/lsx_integer/shift.md index 5df3810b..99216fd7 100644 --- a/docs/lsx_integer/vshift.md +++ b/docs/lsx_integer/shift.md @@ -1,4 +1,4 @@ -# Shift +# Integer Shift ## __m128i __lsx_vbsll_v (__m128i a, imm0_31 imm) diff --git a/main.py b/main.py index d0ea8c31..e9e721be 100644 --- a/main.py +++ b/main.py @@ -231,6 +231,15 @@ def vclz(name): desc=f"Count leading zeros of {width}-bit elements in `a`.", ) + @env.macro + def vdiv(name): + width = widths[name] + return instruction( + intrinsic=f"__m128i __lsx_div_{name} (__m128i a, __m128i b)", + instr=f"vdiv.{name} vr, vr, vr", + desc=f"Divide {width}-bit elements in `a` by elements in `b`.", + ) + @env.macro def vshuf_hwd(name): width = widths[name]