From a2f61faa23dab171c02a2a817a9de42158f7d3f7 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Tue, 12 Dec 2023 21:37:28 +0800 Subject: [PATCH] Add vmuh --- README.md | 4 ---- code/gen_impl.py | 7 +++++++ code/gen_tb.py | 1 + code/vmuh_b.cpp | 9 +++++++++ code/vmuh_b.h | 3 +++ code/vmuh_bu.cpp | 9 +++++++++ code/vmuh_bu.h | 3 +++ code/vmuh_d.cpp | 9 +++++++++ code/vmuh_d.h | 3 +++ code/vmuh_du.cpp | 9 +++++++++ code/vmuh_du.h | 3 +++ code/vmuh_h.cpp | 9 +++++++++ code/vmuh_h.h | 3 +++ code/vmuh_hu.cpp | 9 +++++++++ code/vmuh_hu.h | 3 +++ code/vmuh_w.cpp | 9 +++++++++ code/vmuh_w.h | 3 +++ code/vmuh_wu.cpp | 9 +++++++++ code/vmuh_wu.h | 3 +++ docs/lsx/integer_computation.md | 9 +++++++++ main.py | 10 ++++++++++ 21 files changed, 123 insertions(+), 4 deletions(-) create mode 100644 code/vmuh_b.cpp create mode 100644 code/vmuh_b.h create mode 100644 code/vmuh_bu.cpp create mode 100644 code/vmuh_bu.h create mode 100644 code/vmuh_d.cpp create mode 100644 code/vmuh_d.h create mode 100644 code/vmuh_du.cpp create mode 100644 code/vmuh_du.h create mode 100644 code/vmuh_h.cpp create mode 100644 code/vmuh_h.h create mode 100644 code/vmuh_hu.cpp create mode 100644 code/vmuh_hu.h create mode 100644 code/vmuh_w.cpp create mode 100644 code/vmuh_w.h create mode 100644 code/vmuh_wu.cpp create mode 100644 code/vmuh_wu.h diff --git a/README.md b/README.md index 746e929f..9e5a69ed 100644 --- a/README.md +++ b/README.md @@ -6,10 +6,6 @@ Arranged from QEMU implementation and [GCC Intrinsics](https://gcc.gnu.org/onlin TODO List: -### vmuh.b/h/w/d/bu/hu/wu/du - -Vector Multiplication High - ### vmod.b/h/w/d ### vmod.bu/hu/wu/du diff --git a/code/gen_impl.py b/code/gen_impl.py index 9dfa438c..235440c0 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -159,6 +159,13 @@ file=f, ) print(f"}}", file=f) + with open(f"vmuh_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = ((({sign}{w * 2})({sign}{w})a.{m}[i] * ({sign}{w * 2})({sign}{w})b.{m}[i])) >> {w};", + file=f, + ) + print(f"}}", file=f) for width in ["b", "bu", "h", "hu", "w", "wu", "d", "du"]: double_width = double_widths[width] diff --git a/code/gen_tb.py b/code/gen_tb.py index ea89cc05..7f76d09f 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -69,6 +69,7 @@ "vmin": (widths_all, "v128 a, v128 b"), "vmini": (widths_all, "v128 a, int imm", [0, 3, 15]), "vmsub": (widths_signed, "v128 a, v128 b, v128 c"), + "vmuh": (widths_all, "v128 a, v128 b"), "vmul": (widths_signed, "v128 a, v128 b"), "vmulwev": (widths_vaddw, "v128 a, v128 b"), "vmulwod": (widths_vaddw, "v128 a, v128 b"), diff --git a/code/vmuh_b.cpp b/code/vmuh_b.cpp new file mode 100644 index 00000000..982d9b1c --- /dev/null +++ b/code/vmuh_b.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vmuh_b(v128 a, v128 b) { + v128 dst; +#include "vmuh_b.h" + return dst; +} + +void test() { FUZZ2(vmuh_b); } diff --git a/code/vmuh_b.h b/code/vmuh_b.h new file mode 100644 index 00000000..12822357 --- /dev/null +++ b/code/vmuh_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = (((s16)(s8)a.byte[i] * (s16)(s8)b.byte[i])) >> 8; +} diff --git a/code/vmuh_bu.cpp b/code/vmuh_bu.cpp new file mode 100644 index 00000000..08da45b7 --- /dev/null +++ b/code/vmuh_bu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vmuh_bu(v128 a, v128 b) { + v128 dst; +#include "vmuh_bu.h" + return dst; +} + +void test() { FUZZ2(vmuh_bu); } diff --git a/code/vmuh_bu.h b/code/vmuh_bu.h new file mode 100644 index 00000000..323b1453 --- /dev/null +++ b/code/vmuh_bu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = (((u16)(u8)a.byte[i] * (u16)(u8)b.byte[i])) >> 8; +} diff --git a/code/vmuh_d.cpp b/code/vmuh_d.cpp new file mode 100644 index 00000000..82d3aa2d --- /dev/null +++ b/code/vmuh_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vmuh_d(v128 a, v128 b) { + v128 dst; +#include "vmuh_d.h" + return dst; +} + +void test() { FUZZ2(vmuh_d); } diff --git a/code/vmuh_d.h b/code/vmuh_d.h new file mode 100644 index 00000000..20a405ac --- /dev/null +++ b/code/vmuh_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (((s128)(s64)a.dword[i] * (s128)(s64)b.dword[i])) >> 64; +} diff --git a/code/vmuh_du.cpp b/code/vmuh_du.cpp new file mode 100644 index 00000000..64cd143a --- /dev/null +++ b/code/vmuh_du.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vmuh_du(v128 a, v128 b) { + v128 dst; +#include "vmuh_du.h" + return dst; +} + +void test() { FUZZ2(vmuh_du); } diff --git a/code/vmuh_du.h b/code/vmuh_du.h new file mode 100644 index 00000000..0553bc55 --- /dev/null +++ b/code/vmuh_du.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (((u128)(u64)a.dword[i] * (u128)(u64)b.dword[i])) >> 64; +} diff --git a/code/vmuh_h.cpp b/code/vmuh_h.cpp new file mode 100644 index 00000000..e4b09a51 --- /dev/null +++ b/code/vmuh_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vmuh_h(v128 a, v128 b) { + v128 dst; +#include "vmuh_h.h" + return dst; +} + +void test() { FUZZ2(vmuh_h); } diff --git a/code/vmuh_h.h b/code/vmuh_h.h new file mode 100644 index 00000000..7138dfc7 --- /dev/null +++ b/code/vmuh_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = (((s32)(s16)a.half[i] * (s32)(s16)b.half[i])) >> 16; +} diff --git a/code/vmuh_hu.cpp b/code/vmuh_hu.cpp new file mode 100644 index 00000000..b49c397b --- /dev/null +++ b/code/vmuh_hu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vmuh_hu(v128 a, v128 b) { + v128 dst; +#include "vmuh_hu.h" + return dst; +} + +void test() { FUZZ2(vmuh_hu); } diff --git a/code/vmuh_hu.h b/code/vmuh_hu.h new file mode 100644 index 00000000..2df2e6a8 --- /dev/null +++ b/code/vmuh_hu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = (((u32)(u16)a.half[i] * (u32)(u16)b.half[i])) >> 16; +} diff --git a/code/vmuh_w.cpp b/code/vmuh_w.cpp new file mode 100644 index 00000000..7c63eb6a --- /dev/null +++ b/code/vmuh_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vmuh_w(v128 a, v128 b) { + v128 dst; +#include "vmuh_w.h" + return dst; +} + +void test() { FUZZ2(vmuh_w); } diff --git a/code/vmuh_w.h b/code/vmuh_w.h new file mode 100644 index 00000000..903f13cf --- /dev/null +++ b/code/vmuh_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (((s64)(s32)a.word[i] * (s64)(s32)b.word[i])) >> 32; +} diff --git a/code/vmuh_wu.cpp b/code/vmuh_wu.cpp new file mode 100644 index 00000000..556498ff --- /dev/null +++ b/code/vmuh_wu.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vmuh_wu(v128 a, v128 b) { + v128 dst; +#include "vmuh_wu.h" + return dst; +} + +void test() { FUZZ2(vmuh_wu); } diff --git a/code/vmuh_wu.h b/code/vmuh_wu.h new file mode 100644 index 00000000..99019aa8 --- /dev/null +++ b/code/vmuh_wu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (((u64)(u32)a.word[i] * (u64)(u32)b.word[i])) >> 32; +} diff --git a/docs/lsx/integer_computation.md b/docs/lsx/integer_computation.md index a498d70a..63a4cf7e 100644 --- a/docs/lsx/integer_computation.md +++ b/docs/lsx/integer_computation.md @@ -168,6 +168,15 @@ {{ vmsub('w') }} {{ vmsub('d') }} +{{ vmuh('b') }} +{{ vmuh('bu') }} +{{ vmuh('h') }} +{{ vmuh('hu') }} +{{ vmuh('w') }} +{{ vmuh('wu') }} +{{ vmuh('d') }} +{{ vmuh('du') }} + {{ vmul('b') }} {{ vmul('h') }} {{ vmul('w') }} diff --git a/main.py b/main.py index 06da595e..f636506a 100644 --- a/main.py +++ b/main.py @@ -517,6 +517,16 @@ def vmul(name): desc=f"Multiply {width}-bit elements in `a` and `b`, save the result in `dst`.", ) + @env.macro + def vmuh(name): + width = widths[name] + signedness = signednesses[name] + return instruction( + intrinsic=f"__m128i __lsx_vmuh_{name} (__m128i a, __m128i b)", + instr=f"vmuh.{name} vr, vr, vr", + desc=f"Multiply {signedness} {width}-bit elements in `a` and `b`, save the high {width}-bit result in `dst`.", + ) + @env.macro def vmsub(name): width = widths[name]