From 0a74f65b1f1f1236d17292a3a79410bbba8932b8 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Tue, 12 Dec 2023 19:02:19 +0800 Subject: [PATCH] Add vhaddw/vhsubw --- README.md | 16 ---------------- code/gen_impl.py | 15 +++++++++++++++ code/gen_tb.py | 2 ++ code/vhaddw_d_w.cpp | 9 +++++++++ code/vhaddw_d_w.h | 3 +++ code/vhaddw_du_wu.cpp | 9 +++++++++ code/vhaddw_du_wu.h | 3 +++ code/vhaddw_h_b.cpp | 9 +++++++++ code/vhaddw_h_b.h | 3 +++ code/vhaddw_hu_bu.cpp | 9 +++++++++ code/vhaddw_hu_bu.h | 3 +++ code/vhaddw_q_d.cpp | 9 +++++++++ code/vhaddw_q_d.h | 3 +++ code/vhaddw_qu_du.cpp | 9 +++++++++ code/vhaddw_qu_du.h | 3 +++ code/vhaddw_w_h.cpp | 9 +++++++++ code/vhaddw_w_h.h | 3 +++ code/vhaddw_wu_hu.cpp | 9 +++++++++ code/vhaddw_wu_hu.h | 3 +++ code/vhsubw_d_w.cpp | 9 +++++++++ code/vhsubw_d_w.h | 3 +++ code/vhsubw_du_wu.cpp | 9 +++++++++ code/vhsubw_du_wu.h | 3 +++ code/vhsubw_h_b.cpp | 9 +++++++++ code/vhsubw_h_b.h | 3 +++ code/vhsubw_hu_bu.cpp | 9 +++++++++ code/vhsubw_hu_bu.h | 3 +++ code/vhsubw_q_d.cpp | 9 +++++++++ code/vhsubw_q_d.h | 3 +++ code/vhsubw_qu_du.cpp | 9 +++++++++ code/vhsubw_qu_du.h | 3 +++ code/vhsubw_w_h.cpp | 9 +++++++++ code/vhsubw_w_h.h | 3 +++ code/vhsubw_wu_hu.cpp | 9 +++++++++ code/vhsubw_wu_hu.h | 3 +++ docs/lsx/computation.md | 20 +++++++++++++++++++- main.py | 22 ++++++++++++++++++++++ 37 files changed, 250 insertions(+), 17 deletions(-) create mode 100644 code/vhaddw_d_w.cpp create mode 100644 code/vhaddw_d_w.h create mode 100644 code/vhaddw_du_wu.cpp create mode 100644 code/vhaddw_du_wu.h create mode 100644 code/vhaddw_h_b.cpp create mode 100644 code/vhaddw_h_b.h create mode 100644 code/vhaddw_hu_bu.cpp create mode 100644 code/vhaddw_hu_bu.h create mode 100644 code/vhaddw_q_d.cpp create mode 100644 code/vhaddw_q_d.h create mode 100644 code/vhaddw_qu_du.cpp create mode 100644 code/vhaddw_qu_du.h create mode 100644 code/vhaddw_w_h.cpp create mode 100644 code/vhaddw_w_h.h create mode 100644 code/vhaddw_wu_hu.cpp create mode 100644 code/vhaddw_wu_hu.h create mode 100644 code/vhsubw_d_w.cpp create mode 100644 code/vhsubw_d_w.h create mode 100644 code/vhsubw_du_wu.cpp create mode 100644 code/vhsubw_du_wu.h create mode 100644 code/vhsubw_h_b.cpp create mode 100644 code/vhsubw_h_b.h create mode 100644 code/vhsubw_hu_bu.cpp create mode 100644 code/vhsubw_hu_bu.h create mode 100644 code/vhsubw_q_d.cpp create mode 100644 code/vhsubw_q_d.h create mode 100644 code/vhsubw_qu_du.cpp create mode 100644 code/vhsubw_qu_du.h create mode 100644 code/vhsubw_w_h.cpp create mode 100644 code/vhsubw_w_h.h create mode 100644 code/vhsubw_wu_hu.cpp create mode 100644 code/vhsubw_wu_hu.h diff --git a/README.md b/README.md index cdadd0c0..7d0c97d3 100644 --- a/README.md +++ b/README.md @@ -54,22 +54,6 @@ Vector Saturated Add Unsigned Vector Saturated Subtract Unsigned -### vhaddw.h.b/w.h/d.w/q.d - -Vector Half Add - -### vhsubw.h.b/w.h/d.w/q.d - -Vector Half Add - -### vhaddw.hu.bu/wu.hu/du.wu/qu.du - -Vector Half Add Unsigned - -### vhsubw.hu.bu/wu.hu/du.wu/qu.du - -Vector Half Sub Unsigned - ### vmax.b/h/w/d Vector Maximum diff --git a/code/gen_impl.py b/code/gen_impl.py index c32025b2..e70624ce 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -81,6 +81,7 @@ file=f, ) print(f"}}", file=f) + double_width = double_widths_same_signedness[width] double_w = widths[double_width] double_m = members[double_width] @@ -91,6 +92,20 @@ file=f, ) print(f"}}", file=f) + with open(f"vhaddw_{double_width}_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // double_w};i++) {{", file=f) + print( + f" dst.{double_m}[i] = ({sign}{double_w})({sign}{w})a.{m}[2 * i + 1] + ({sign}{double_w})({sign}{w})b.{m}[2 * i];", + file=f, + ) + print(f"}}", file=f) + with open(f"vhsubw_{double_width}_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // double_w};i++) {{", file=f) + print( + f" dst.{double_m}[i] = ({sign}{double_w})({sign}{w})a.{m}[2 * i + 1] - ({sign}{double_w})({sign}{w})b.{m}[2 * i];", + file=f, + ) + print(f"}}", file=f) if width == "d" or width == "du": with open(f"vextl_{double_width}_{width}.h", "w") as f: diff --git a/code/gen_tb.py b/code/gen_tb.py index d64b798d..57c0f610 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -37,6 +37,8 @@ "vexth": (widths_vexth, "v128 a"), "vextl": (["q_d", "qu_du"], "v128 a"), "vextrins": (widths_signed, "v128 a, v128 b, int imm", [0, 3, 7, 15, 16, 32, 64, 128, 255]), + "vhaddw": (widths_vexth, "v128 a, v128 b"), + "vhsubw": (widths_vexth, "v128 a, v128 b"), } for name in tb: diff --git a/code/vhaddw_d_w.cpp b/code/vhaddw_d_w.cpp new file mode 100644 index 00000000..248def0a --- /dev/null +++ b/code/vhaddw_d_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhaddw_d_w(v128 a, v128 b) { + v128 dst; +#include "vhaddw_d_w.h" + return dst; +} + +void test() { FUZZ2(vhaddw_d_w); } diff --git a/code/vhaddw_d_w.h b/code/vhaddw_d_w.h new file mode 100644 index 00000000..aead2059 --- /dev/null +++ b/code/vhaddw_d_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (s64)(s32)a.word[2 * i + 1] + (s64)(s32)b.word[2 * i]; +} diff --git a/code/vhaddw_du_wu.cpp b/code/vhaddw_du_wu.cpp new file mode 100644 index 00000000..f174ba67 --- /dev/null +++ b/code/vhaddw_du_wu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhaddw_du_wu(v128 a, v128 b) { + v128 dst; +#include "vhaddw_du_wu.h" + return dst; +} + +void test() { FUZZ2(vhaddw_du_wu); } diff --git a/code/vhaddw_du_wu.h b/code/vhaddw_du_wu.h new file mode 100644 index 00000000..84c9dd82 --- /dev/null +++ b/code/vhaddw_du_wu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (u64)(u32)a.word[2 * i + 1] + (u64)(u32)b.word[2 * i]; +} diff --git a/code/vhaddw_h_b.cpp b/code/vhaddw_h_b.cpp new file mode 100644 index 00000000..9b8e416d --- /dev/null +++ b/code/vhaddw_h_b.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhaddw_h_b(v128 a, v128 b) { + v128 dst; +#include "vhaddw_h_b.h" + return dst; +} + +void test() { FUZZ2(vhaddw_h_b); } diff --git a/code/vhaddw_h_b.h b/code/vhaddw_h_b.h new file mode 100644 index 00000000..1f6c9b04 --- /dev/null +++ b/code/vhaddw_h_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = (s16)(s8)a.byte[2 * i + 1] + (s16)(s8)b.byte[2 * i]; +} diff --git a/code/vhaddw_hu_bu.cpp b/code/vhaddw_hu_bu.cpp new file mode 100644 index 00000000..aab5d058 --- /dev/null +++ b/code/vhaddw_hu_bu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhaddw_hu_bu(v128 a, v128 b) { + v128 dst; +#include "vhaddw_hu_bu.h" + return dst; +} + +void test() { FUZZ2(vhaddw_hu_bu); } diff --git a/code/vhaddw_hu_bu.h b/code/vhaddw_hu_bu.h new file mode 100644 index 00000000..b8611e72 --- /dev/null +++ b/code/vhaddw_hu_bu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = (u16)(u8)a.byte[2 * i + 1] + (u16)(u8)b.byte[2 * i]; +} diff --git a/code/vhaddw_q_d.cpp b/code/vhaddw_q_d.cpp new file mode 100644 index 00000000..3f67b49c --- /dev/null +++ b/code/vhaddw_q_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhaddw_q_d(v128 a, v128 b) { + v128 dst; +#include "vhaddw_q_d.h" + return dst; +} + +void test() { FUZZ2(vhaddw_q_d); } diff --git a/code/vhaddw_q_d.h b/code/vhaddw_q_d.h new file mode 100644 index 00000000..0f322198 --- /dev/null +++ b/code/vhaddw_q_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 1; i++) { + dst.qword[i] = (s128)(s64)a.dword[2 * i + 1] + (s128)(s64)b.dword[2 * i]; +} diff --git a/code/vhaddw_qu_du.cpp b/code/vhaddw_qu_du.cpp new file mode 100644 index 00000000..749d44b7 --- /dev/null +++ b/code/vhaddw_qu_du.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhaddw_qu_du(v128 a, v128 b) { + v128 dst; +#include "vhaddw_qu_du.h" + return dst; +} + +void test() { FUZZ2(vhaddw_qu_du); } diff --git a/code/vhaddw_qu_du.h b/code/vhaddw_qu_du.h new file mode 100644 index 00000000..965e148e --- /dev/null +++ b/code/vhaddw_qu_du.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 1; i++) { + dst.qword[i] = (u128)(u64)a.dword[2 * i + 1] + (u128)(u64)b.dword[2 * i]; +} diff --git a/code/vhaddw_w_h.cpp b/code/vhaddw_w_h.cpp new file mode 100644 index 00000000..a9b56d7b --- /dev/null +++ b/code/vhaddw_w_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhaddw_w_h(v128 a, v128 b) { + v128 dst; +#include "vhaddw_w_h.h" + return dst; +} + +void test() { FUZZ2(vhaddw_w_h); } diff --git a/code/vhaddw_w_h.h b/code/vhaddw_w_h.h new file mode 100644 index 00000000..fdfc5fb4 --- /dev/null +++ b/code/vhaddw_w_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (s32)(s16)a.half[2 * i + 1] + (s32)(s16)b.half[2 * i]; +} diff --git a/code/vhaddw_wu_hu.cpp b/code/vhaddw_wu_hu.cpp new file mode 100644 index 00000000..a1e55178 --- /dev/null +++ b/code/vhaddw_wu_hu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhaddw_wu_hu(v128 a, v128 b) { + v128 dst; +#include "vhaddw_wu_hu.h" + return dst; +} + +void test() { FUZZ2(vhaddw_wu_hu); } diff --git a/code/vhaddw_wu_hu.h b/code/vhaddw_wu_hu.h new file mode 100644 index 00000000..d8a53350 --- /dev/null +++ b/code/vhaddw_wu_hu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (u32)(u16)a.half[2 * i + 1] + (u32)(u16)b.half[2 * i]; +} diff --git a/code/vhsubw_d_w.cpp b/code/vhsubw_d_w.cpp new file mode 100644 index 00000000..fa7dbeb2 --- /dev/null +++ b/code/vhsubw_d_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhsubw_d_w(v128 a, v128 b) { + v128 dst; +#include "vhsubw_d_w.h" + return dst; +} + +void test() { FUZZ2(vhsubw_d_w); } diff --git a/code/vhsubw_d_w.h b/code/vhsubw_d_w.h new file mode 100644 index 00000000..643bcfd0 --- /dev/null +++ b/code/vhsubw_d_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (s64)(s32)a.word[2 * i + 1] - (s64)(s32)b.word[2 * i]; +} diff --git a/code/vhsubw_du_wu.cpp b/code/vhsubw_du_wu.cpp new file mode 100644 index 00000000..7159c429 --- /dev/null +++ b/code/vhsubw_du_wu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhsubw_du_wu(v128 a, v128 b) { + v128 dst; +#include "vhsubw_du_wu.h" + return dst; +} + +void test() { FUZZ2(vhsubw_du_wu); } diff --git a/code/vhsubw_du_wu.h b/code/vhsubw_du_wu.h new file mode 100644 index 00000000..5dbb472f --- /dev/null +++ b/code/vhsubw_du_wu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (u64)(u32)a.word[2 * i + 1] - (u64)(u32)b.word[2 * i]; +} diff --git a/code/vhsubw_h_b.cpp b/code/vhsubw_h_b.cpp new file mode 100644 index 00000000..19563260 --- /dev/null +++ b/code/vhsubw_h_b.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhsubw_h_b(v128 a, v128 b) { + v128 dst; +#include "vhsubw_h_b.h" + return dst; +} + +void test() { FUZZ2(vhsubw_h_b); } diff --git a/code/vhsubw_h_b.h b/code/vhsubw_h_b.h new file mode 100644 index 00000000..6b205ee4 --- /dev/null +++ b/code/vhsubw_h_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = (s16)(s8)a.byte[2 * i + 1] - (s16)(s8)b.byte[2 * i]; +} diff --git a/code/vhsubw_hu_bu.cpp b/code/vhsubw_hu_bu.cpp new file mode 100644 index 00000000..b29b83e1 --- /dev/null +++ b/code/vhsubw_hu_bu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhsubw_hu_bu(v128 a, v128 b) { + v128 dst; +#include "vhsubw_hu_bu.h" + return dst; +} + +void test() { FUZZ2(vhsubw_hu_bu); } diff --git a/code/vhsubw_hu_bu.h b/code/vhsubw_hu_bu.h new file mode 100644 index 00000000..f632cfdf --- /dev/null +++ b/code/vhsubw_hu_bu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = (u16)(u8)a.byte[2 * i + 1] - (u16)(u8)b.byte[2 * i]; +} diff --git a/code/vhsubw_q_d.cpp b/code/vhsubw_q_d.cpp new file mode 100644 index 00000000..812628f0 --- /dev/null +++ b/code/vhsubw_q_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhsubw_q_d(v128 a, v128 b) { + v128 dst; +#include "vhsubw_q_d.h" + return dst; +} + +void test() { FUZZ2(vhsubw_q_d); } diff --git a/code/vhsubw_q_d.h b/code/vhsubw_q_d.h new file mode 100644 index 00000000..c2aa30ce --- /dev/null +++ b/code/vhsubw_q_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 1; i++) { + dst.qword[i] = (s128)(s64)a.dword[2 * i + 1] - (s128)(s64)b.dword[2 * i]; +} diff --git a/code/vhsubw_qu_du.cpp b/code/vhsubw_qu_du.cpp new file mode 100644 index 00000000..359964a4 --- /dev/null +++ b/code/vhsubw_qu_du.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhsubw_qu_du(v128 a, v128 b) { + v128 dst; +#include "vhsubw_qu_du.h" + return dst; +} + +void test() { FUZZ2(vhsubw_qu_du); } diff --git a/code/vhsubw_qu_du.h b/code/vhsubw_qu_du.h new file mode 100644 index 00000000..be143993 --- /dev/null +++ b/code/vhsubw_qu_du.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 1; i++) { + dst.qword[i] = (u128)(u64)a.dword[2 * i + 1] - (u128)(u64)b.dword[2 * i]; +} diff --git a/code/vhsubw_w_h.cpp b/code/vhsubw_w_h.cpp new file mode 100644 index 00000000..a4dce5bd --- /dev/null +++ b/code/vhsubw_w_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhsubw_w_h(v128 a, v128 b) { + v128 dst; +#include "vhsubw_w_h.h" + return dst; +} + +void test() { FUZZ2(vhsubw_w_h); } diff --git a/code/vhsubw_w_h.h b/code/vhsubw_w_h.h new file mode 100644 index 00000000..4724c7c1 --- /dev/null +++ b/code/vhsubw_w_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (s32)(s16)a.half[2 * i + 1] - (s32)(s16)b.half[2 * i]; +} diff --git a/code/vhsubw_wu_hu.cpp b/code/vhsubw_wu_hu.cpp new file mode 100644 index 00000000..4bfc4c49 --- /dev/null +++ b/code/vhsubw_wu_hu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vhsubw_wu_hu(v128 a, v128 b) { + v128 dst; +#include "vhsubw_wu_hu.h" + return dst; +} + +void test() { FUZZ2(vhsubw_wu_hu); } diff --git a/code/vhsubw_wu_hu.h b/code/vhsubw_wu_hu.h new file mode 100644 index 00000000..61b55296 --- /dev/null +++ b/code/vhsubw_wu_hu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (u32)(u16)a.half[2 * i + 1] - (u32)(u16)b.half[2 * i]; +} diff --git a/docs/lsx/computation.md b/docs/lsx/computation.md index 6d5d81e2..05943b41 100644 --- a/docs/lsx/computation.md +++ b/docs/lsx/computation.md @@ -76,4 +76,22 @@ {{ vdiv('w') }} {{ vdiv('wu') }} {{ vdiv('d') }} -{{ vdiv('du') }} \ No newline at end of file +{{ vdiv('du') }} + +{{ vhaddw('h', 'b') }} +{{ vhaddw('hu', 'bu') }} +{{ vhaddw('w', 'h') }} +{{ vhaddw('wu', 'hu') }} +{{ vhaddw('d', 'w') }} +{{ vhaddw('du', 'wu') }} +{{ vhaddw('q', 'd') }} +{{ vhaddw('qu', 'du') }} + +{{ vhsubw('h', 'b') }} +{{ vhsubw('hu', 'bu') }} +{{ vhsubw('w', 'h') }} +{{ vhsubw('wu', 'hu') }} +{{ vhsubw('d', 'w') }} +{{ vhsubw('du', 'wu') }} +{{ vhsubw('q', 'd') }} +{{ vhsubw('qu', 'du') }} diff --git a/main.py b/main.py index 427f69d8..9eb711e4 100644 --- a/main.py +++ b/main.py @@ -343,6 +343,28 @@ def vfdiv(name): desc=f"Divide {fp_type} precision floating point elements in `a` by elements in `b`.", ) + @env.macro + def vhaddw(name, name2): + width = widths[name[0]] + width2 = widths[name2[0]] + signedness = signednesses[name] + return instruction( + intrinsic=f"__m128i __lsx_vhaddw_{name}_{name2} (__m128i a, __m128i b)", + instr=f"vhaddw.{name}.{name2} vr, vr, vr", + desc=f"Add odd-positioned {signedness} {width2}-bit elements in `a` to even-positioned {signedness} {width2}-bit elements in 'b' to get {width}-bit result.", + ) + + @env.macro + def vhsubw(name, name2): + width = widths[name[0]] + width2 = widths[name2[0]] + signedness = signednesses[name] + return instruction( + intrinsic=f"__m128i __lsx_vhsubw_{name}_{name2} (__m128i a, __m128i b)", + instr=f"vhsubw.{name}.{name2} vr, vr, vr", + desc=f"Subtract odd-positioned {signedness} {width2}-bit elements in `a` by even-positioned {signedness} {width2}-bit elements in 'b' to get {width}-bit result.", + ) + @env.macro def vshuf_hwd(name): width = widths[name]