From 12d3782cd4d68f8d6c6bf240043c12c0d29d157d Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Tue, 12 Dec 2023 19:09:12 +0800 Subject: [PATCH] Add vilvl/vilvh --- README.md | 4 ---- code/gen_impl.py | 14 ++++++++++++++ code/gen_tb.py | 2 ++ code/vilvh_b.cpp | 9 +++++++++ code/vilvh_b.h | 3 +++ code/vilvh_d.cpp | 9 +++++++++ code/vilvh_d.h | 3 +++ code/vilvh_h.cpp | 9 +++++++++ code/vilvh_h.h | 3 +++ code/vilvh_w.cpp | 9 +++++++++ code/vilvh_w.h | 3 +++ code/vilvl_b.cpp | 9 +++++++++ code/vilvl_b.h | 3 +++ code/vilvl_d.cpp | 9 +++++++++ code/vilvl_d.h | 3 +++ code/vilvl_h.cpp | 9 +++++++++ code/vilvl_h.h | 3 +++ code/vilvl_w.cpp | 9 +++++++++ code/vilvl_w.h | 3 +++ docs/lsx/interleave.md | 11 +++++++++++ main.py | 18 ++++++++++++++++++ 21 files changed, 141 insertions(+), 4 deletions(-) create mode 100644 code/vilvh_b.cpp create mode 100644 code/vilvh_b.h create mode 100644 code/vilvh_d.cpp create mode 100644 code/vilvh_d.h create mode 100644 code/vilvh_h.cpp create mode 100644 code/vilvh_h.h create mode 100644 code/vilvh_w.cpp create mode 100644 code/vilvh_w.h create mode 100644 code/vilvl_b.cpp create mode 100644 code/vilvl_b.h create mode 100644 code/vilvl_d.cpp create mode 100644 code/vilvl_d.h create mode 100644 code/vilvl_h.cpp create mode 100644 code/vilvl_h.h create mode 100644 code/vilvl_w.cpp create mode 100644 code/vilvl_w.h create mode 100644 docs/lsx/interleave.md diff --git a/README.md b/README.md index 7d0c97d3..ca2fb8be 100644 --- a/README.md +++ b/README.md @@ -146,10 +146,6 @@ Vector Multiplication High ### vpackod.b/h/w/d -### vilvl.b/h/w/d - -### vilvh.b/h/w/d - ### vpickev.b/h/w/d ### vpickod.b/h/w/d diff --git a/code/gen_impl.py b/code/gen_impl.py index e70624ce..56346856 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -219,6 +219,20 @@ file=f, ) print(f"}}", file=f) + with open(f"vilvh_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = (i % 2 == 1) ? a.{m}[i / 2 + {64 // w}] : b.{m}[i / 2 + {64 // w}];", + file=f, + ) + print(f"}}", file=f) + with open(f"vilvl_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = (i % 2 == 1) ? a.{m}[i / 2] : b.{m}[i / 2];", + file=f, + ) + print(f"}}", file=f) for width in ["s", "d"]: m = members_fp[width] diff --git a/code/gen_tb.py b/code/gen_tb.py index 57c0f610..3049a08d 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -39,6 +39,8 @@ "vextrins": (widths_signed, "v128 a, v128 b, int imm", [0, 3, 7, 15, 16, 32, 64, 128, 255]), "vhaddw": (widths_vexth, "v128 a, v128 b"), "vhsubw": (widths_vexth, "v128 a, v128 b"), + "vilvh": (widths_signed, "v128 a, v128 b"), + "vilvl": (widths_signed, "v128 a, v128 b"), } for name in tb: diff --git a/code/vilvh_b.cpp b/code/vilvh_b.cpp new file mode 100644 index 00000000..dcbdc88d --- /dev/null +++ b/code/vilvh_b.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vilvh_b(v128 a, v128 b) { + v128 dst; +#include "vilvh_b.h" + return dst; +} + +void test() { FUZZ2(vilvh_b); } diff --git a/code/vilvh_b.h b/code/vilvh_b.h new file mode 100644 index 00000000..ae66e3ce --- /dev/null +++ b/code/vilvh_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = (i % 2 == 1) ? a.byte[i / 2 + 8] : b.byte[i / 2 + 8]; +} diff --git a/code/vilvh_d.cpp b/code/vilvh_d.cpp new file mode 100644 index 00000000..b5c2e45e --- /dev/null +++ b/code/vilvh_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vilvh_d(v128 a, v128 b) { + v128 dst; +#include "vilvh_d.h" + return dst; +} + +void test() { FUZZ2(vilvh_d); } diff --git a/code/vilvh_d.h b/code/vilvh_d.h new file mode 100644 index 00000000..6157144f --- /dev/null +++ b/code/vilvh_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (i % 2 == 1) ? a.dword[i / 2 + 1] : b.dword[i / 2 + 1]; +} diff --git a/code/vilvh_h.cpp b/code/vilvh_h.cpp new file mode 100644 index 00000000..13f74e22 --- /dev/null +++ b/code/vilvh_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vilvh_h(v128 a, v128 b) { + v128 dst; +#include "vilvh_h.h" + return dst; +} + +void test() { FUZZ2(vilvh_h); } diff --git a/code/vilvh_h.h b/code/vilvh_h.h new file mode 100644 index 00000000..d9e4afb3 --- /dev/null +++ b/code/vilvh_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = (i % 2 == 1) ? a.half[i / 2 + 4] : b.half[i / 2 + 4]; +} diff --git a/code/vilvh_w.cpp b/code/vilvh_w.cpp new file mode 100644 index 00000000..2f286b1b --- /dev/null +++ b/code/vilvh_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vilvh_w(v128 a, v128 b) { + v128 dst; +#include "vilvh_w.h" + return dst; +} + +void test() { FUZZ2(vilvh_w); } diff --git a/code/vilvh_w.h b/code/vilvh_w.h new file mode 100644 index 00000000..794b0755 --- /dev/null +++ b/code/vilvh_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (i % 2 == 1) ? a.word[i / 2 + 2] : b.word[i / 2 + 2]; +} diff --git a/code/vilvl_b.cpp b/code/vilvl_b.cpp new file mode 100644 index 00000000..e0ddd668 --- /dev/null +++ b/code/vilvl_b.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vilvl_b(v128 a, v128 b) { + v128 dst; +#include "vilvl_b.h" + return dst; +} + +void test() { FUZZ2(vilvl_b); } diff --git a/code/vilvl_b.h b/code/vilvl_b.h new file mode 100644 index 00000000..5985803c --- /dev/null +++ b/code/vilvl_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = (i % 2 == 1) ? a.byte[i / 2] : b.byte[i / 2]; +} diff --git a/code/vilvl_d.cpp b/code/vilvl_d.cpp new file mode 100644 index 00000000..15624435 --- /dev/null +++ b/code/vilvl_d.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vilvl_d(v128 a, v128 b) { + v128 dst; +#include "vilvl_d.h" + return dst; +} + +void test() { FUZZ2(vilvl_d); } diff --git a/code/vilvl_d.h b/code/vilvl_d.h new file mode 100644 index 00000000..e53a7549 --- /dev/null +++ b/code/vilvl_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = (i % 2 == 1) ? a.dword[i / 2] : b.dword[i / 2]; +} diff --git a/code/vilvl_h.cpp b/code/vilvl_h.cpp new file mode 100644 index 00000000..a4b283c0 --- /dev/null +++ b/code/vilvl_h.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vilvl_h(v128 a, v128 b) { + v128 dst; +#include "vilvl_h.h" + return dst; +} + +void test() { FUZZ2(vilvl_h); } diff --git a/code/vilvl_h.h b/code/vilvl_h.h new file mode 100644 index 00000000..1f1fce9c --- /dev/null +++ b/code/vilvl_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = (i % 2 == 1) ? a.half[i / 2] : b.half[i / 2]; +} diff --git a/code/vilvl_w.cpp b/code/vilvl_w.cpp new file mode 100644 index 00000000..3872d861 --- /dev/null +++ b/code/vilvl_w.cpp @@ -0,0 +1,9 @@ +#include "common.h" + +v128 vilvl_w(v128 a, v128 b) { + v128 dst; +#include "vilvl_w.h" + return dst; +} + +void test() { FUZZ2(vilvl_w); } diff --git a/code/vilvl_w.h b/code/vilvl_w.h new file mode 100644 index 00000000..ba5cddd7 --- /dev/null +++ b/code/vilvl_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = (i % 2 == 1) ? a.word[i / 2] : b.word[i / 2]; +} diff --git a/docs/lsx/interleave.md b/docs/lsx/interleave.md new file mode 100644 index 00000000..4a89fd9d --- /dev/null +++ b/docs/lsx/interleave.md @@ -0,0 +1,11 @@ +# Interleaving + +{{ vilvh('b') }} +{{ vilvh('h') }} +{{ vilvh('w') }} +{{ vilvh('d') }} + +{{ vilvl('b') }} +{{ vilvl('h') }} +{{ vilvl('w') }} +{{ vilvl('d') }} \ No newline at end of file diff --git a/main.py b/main.py index 9eb711e4..f47eb2fc 100644 --- a/main.py +++ b/main.py @@ -365,6 +365,24 @@ def vhsubw(name, name2): desc=f"Subtract odd-positioned {signedness} {width2}-bit elements in `a` by even-positioned {signedness} {width2}-bit elements in 'b' to get {width}-bit result.", ) + @env.macro + def vilvh(name): + width = widths[name] + return instruction( + intrinsic=f"__m128i __lsx_vilvh_{name} (__m128i a, __m128i b)", + instr=f"vilvh.{name} vr, vr, vr", + desc=f"Interleave {width}-bit elements in higher half of `a` and `b`.", + ) + + @env.macro + def vilvl(name): + width = widths[name] + return instruction( + intrinsic=f"__m128i __lsx_vilvl_{name} (__m128i a, __m128i b)", + instr=f"vilvl.{name} vr, vr, vr", + desc=f"Interleave {width}-bit elements in lower half of `a` and `b`.", + ) + @env.macro def vshuf_hwd(name): width = widths[name]