From 8a226b6ee7521eb1b02d0d60742a1a7f0eaa3277 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Tue, 12 Dec 2023 19:57:11 +0800 Subject: [PATCH] Add vsub --- README.md | 6 ------ code/gen_impl.py | 8 ++++++++ code/gen_tb.py | 2 ++ code/vadd_b.h | 2 +- code/vsub_b.cpp | 9 +++++++++ code/vsub_b.h | 3 +++ code/vsub_d.cpp | 9 +++++++++ code/vsub_d.h | 3 +++ code/vsub_h.cpp | 9 +++++++++ code/vsub_h.h | 3 +++ code/vsub_q.cpp | 9 +++++++++ code/vsub_q.h | 1 + code/vsub_w.cpp | 9 +++++++++ code/vsub_w.h | 3 +++ docs/lsx/integer_computation.md | 7 +++++++ main.py | 8 ++++++++ 16 files changed, 84 insertions(+), 7 deletions(-) create mode 100644 code/vsub_b.cpp create mode 100644 code/vsub_b.h create mode 100644 code/vsub_d.cpp create mode 100644 code/vsub_d.h create mode 100644 code/vsub_h.cpp create mode 100644 code/vsub_h.h create mode 100644 code/vsub_q.cpp create mode 100644 code/vsub_q.h create mode 100644 code/vsub_w.cpp create mode 100644 code/vsub_w.h diff --git a/README.md b/README.md index daaf0f83..c8654c3a 100644 --- a/README.md +++ b/README.md @@ -22,10 +22,6 @@ Vector Store with Register Offset Vector Set Equal/Less than or Equal/Less Than -### vsub.b/h/w/d - -Vector Subtract - ### vsadd.b/h/w/d Vector Saturated Add @@ -124,8 +120,6 @@ Vector Multiplication High ### vfrstp.b/h -### vsub.q - ### vsignconv.b/h/w/d ### vfsub.s/d diff --git a/code/gen_impl.py b/code/gen_impl.py index 815896ca..ff49850b 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -186,6 +186,14 @@ for width in ["b", "h", "w", "d"]: w = widths[width] m = members[width] + for name, op in [("add", "+"), ("sub", "-")]: + with open(f"v{name}_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = a.{m}[i] {op} b.{m}[i];", + file=f, + ) + print(f"}}", file=f) with open(f"vbitclr_{width}.h", "w") as f: print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) print( diff --git a/code/gen_tb.py b/code/gen_tb.py index dae86f91..b97900db 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -31,6 +31,7 @@ tb = { # widths, args, extra args for imm + "vadd": (widths_signed, "v128 a, v128 b"), "vavg": (widths_all, "v128 a, v128 b"), "vavgr": (widths_all, "v128 a, v128 b"), "vaddwev": (widths_vaddw, "v128 a, v128 b"), @@ -66,6 +67,7 @@ "vmini": (widths_all, "v128 a, int imm", [0, 3, 15]), "vmulwev": (widths_vaddw, "v128 a, v128 b"), "vmulwod": (widths_vaddw, "v128 a, v128 b"), + "vsub": (widths_signed, "v128 a, v128 b"), "vsubwev": (widths_vsubw, "v128 a, v128 b"), "vsubwod": (widths_vsubw, "v128 a, v128 b"), } diff --git a/code/vadd_b.h b/code/vadd_b.h index 0f8b82f0..877bbbf6 100644 --- a/code/vadd_b.h +++ b/code/vadd_b.h @@ -1,3 +1,3 @@ for (int i = 0; i < 16; i++) { dst.byte[i] = a.byte[i] + b.byte[i]; -} \ No newline at end of file +} diff --git a/code/vsub_b.cpp b/code/vsub_b.cpp new file mode 100644 index 00000000..9eacccb0 --- /dev/null +++ b/code/vsub_b.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vsub_b(v128 a, v128 b) { + v128 dst; +#include "vsub_b.h" + return dst; +} + +void test() { FUZZ2(vsub_b); } diff --git a/code/vsub_b.h b/code/vsub_b.h new file mode 100644 index 00000000..0fc73f9d --- /dev/null +++ b/code/vsub_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = a.byte[i] - b.byte[i]; +} diff --git a/code/vsub_d.cpp b/code/vsub_d.cpp new file mode 100644 index 00000000..ae6cdec9 --- /dev/null +++ b/code/vsub_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vsub_d(v128 a, v128 b) { + v128 dst; +#include "vsub_d.h" + return dst; +} + +void test() { FUZZ2(vsub_d); } diff --git a/code/vsub_d.h b/code/vsub_d.h new file mode 100644 index 00000000..17f9fb29 --- /dev/null +++ b/code/vsub_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = a.dword[i] - b.dword[i]; +} diff --git a/code/vsub_h.cpp b/code/vsub_h.cpp new file mode 100644 index 00000000..e352a7ca --- /dev/null +++ b/code/vsub_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vsub_h(v128 a, v128 b) { + v128 dst; +#include "vsub_h.h" + return dst; +} + +void test() { FUZZ2(vsub_h); } diff --git a/code/vsub_h.h b/code/vsub_h.h new file mode 100644 index 00000000..f815f49f --- /dev/null +++ b/code/vsub_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = a.half[i] - b.half[i]; +} diff --git a/code/vsub_q.cpp b/code/vsub_q.cpp new file mode 100644 index 00000000..40a27c60 --- /dev/null +++ b/code/vsub_q.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vsub_q(v128 a, v128 b) { + v128 dst; +#include "vsub_q.h" + return dst; +} + +void test() { FUZZ2(vsub_q); } diff --git a/code/vsub_q.h b/code/vsub_q.h new file mode 100644 index 00000000..e3851e12 --- /dev/null +++ b/code/vsub_q.h @@ -0,0 +1 @@ +dst.qword[0] = a.qword[0] - b.qword[0]; diff --git a/code/vsub_w.cpp b/code/vsub_w.cpp new file mode 100644 index 00000000..d4c2a80d --- /dev/null +++ b/code/vsub_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vsub_w(v128 a, v128 b) { + v128 dst; +#include "vsub_w.h" + return dst; +} + +void test() { FUZZ2(vsub_w); } diff --git a/code/vsub_w.h b/code/vsub_w.h new file mode 100644 index 00000000..686b871d --- /dev/null +++ b/code/vsub_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = a.word[i] - b.word[i]; +} diff --git a/docs/lsx/integer_computation.md b/docs/lsx/integer_computation.md index 1abf3442..1ec8e724 100644 --- a/docs/lsx/integer_computation.md +++ b/docs/lsx/integer_computation.md @@ -145,6 +145,13 @@ {{ vmulwev('q', 'du') }} {{ vmulwev('q', 'du', 'd') }} +{{ vsub('b') }} +{{ vsub('h') }} +{{ vsub('w') }} +{{ vsub('d') }} +{{ vsub('q') }} + + {{ vsubwev('h', 'b') }} {{ vsubwev('h', 'bu') }} {{ vsubwev('w', 'h') }} diff --git a/main.py b/main.py index 1560e318..07726325 100644 --- a/main.py +++ b/main.py @@ -447,3 +447,11 @@ def vldrepl(name): desc=f"Read {width}-bit data from memory address `addr + (offset << {shift})`, replicate the data to all vector lanes and save into `dst`.", ) + @env.macro + def vsub(name): + width = widths[name] + return instruction( + intrinsic=f"__m128i __lsx_vsub_{name} (__m128i a, __m128i b)", + instr=f"vsub.{name} vr, vr, vr", + desc=f"Subtract {width}-bit elements in `a` and `b`, save the result in `dst`.", + )