From 353adcd2952719094dc261135f1897a9350c8b23 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Tue, 12 Dec 2023 23:54:35 +0800 Subject: [PATCH] Add vslli/vsrli/vsrai --- README.md | 6 ------ code/gen_impl.py | 21 +++++++++++++++++++++ code/gen_tb.py | 3 +++ code/vslli_b.cpp | 12 ++++++++++++ code/vslli_b.h | 3 +++ code/vslli_d.cpp | 12 ++++++++++++ code/vslli_d.h | 3 +++ code/vslli_h.cpp | 12 ++++++++++++ code/vslli_h.h | 3 +++ code/vslli_w.cpp | 12 ++++++++++++ code/vslli_w.h | 3 +++ code/vsrai_b.cpp | 12 ++++++++++++ code/vsrai_b.h | 3 +++ code/vsrai_d.cpp | 12 ++++++++++++ code/vsrai_d.h | 3 +++ code/vsrai_h.cpp | 12 ++++++++++++ code/vsrai_h.h | 3 +++ code/vsrai_w.cpp | 12 ++++++++++++ code/vsrai_w.h | 3 +++ code/vsrli_b.cpp | 12 ++++++++++++ code/vsrli_b.h | 3 +++ code/vsrli_d.cpp | 12 ++++++++++++ code/vsrli_d.h | 3 +++ code/vsrli_h.cpp | 12 ++++++++++++ code/vsrli_h.h | 3 +++ code/vsrli_w.cpp | 12 ++++++++++++ code/vsrli_w.h | 3 +++ docs/lsx/shift.md | 17 ++++++++++++++++- main.py | 30 ++++++++++++++++++++++++++++++ 29 files changed, 250 insertions(+), 7 deletions(-) create mode 100644 code/vslli_b.cpp create mode 100644 code/vslli_b.h create mode 100644 code/vslli_d.cpp create mode 100644 code/vslli_d.h create mode 100644 code/vslli_h.cpp create mode 100644 code/vslli_h.h create mode 100644 code/vslli_w.cpp create mode 100644 code/vslli_w.h create mode 100644 code/vsrai_b.cpp create mode 100644 code/vsrai_b.h create mode 100644 code/vsrai_d.cpp create mode 100644 code/vsrai_d.h create mode 100644 code/vsrai_h.cpp create mode 100644 code/vsrai_h.h create mode 100644 code/vsrai_w.cpp create mode 100644 code/vsrai_w.h create mode 100644 code/vsrli_b.cpp create mode 100644 code/vsrli_b.h create mode 100644 code/vsrli_d.cpp create mode 100644 code/vsrli_d.h create mode 100644 code/vsrli_h.cpp create mode 100644 code/vsrli_h.h create mode 100644 code/vsrli_w.cpp create mode 100644 code/vsrli_w.h diff --git a/README.md b/README.md index 34a475f1..9dfdde2b 100644 --- a/README.md +++ b/README.md @@ -140,12 +140,6 @@ TODO List: ### vsat.b/h/w/d/bu/hu/wu/du -### vslli.b/h/w/d - -### vsrli.b/h/w/d - -### vsrai.b/h/w/d - ### vsrlni.b.h/h.w/w.d/d.q ### vsrlrni.b.h/h.w/w.d/d.q diff --git a/code/gen_impl.py b/code/gen_impl.py index e0505e4e..a53b2220 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -380,6 +380,13 @@ file=f, ) print(f"}}", file=f) + with open(f"vslli_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = a.{m}[i] << imm;", + file=f, + ) + print(f"}}", file=f) with open(f"vsrl_{width}.h", "w") as f: print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) print( @@ -387,6 +394,13 @@ file=f, ) print(f"}}", file=f) + with open(f"vsrli_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = a.{m}[i] >> imm;", + file=f, + ) + print(f"}}", file=f) with open(f"vsra_{width}.h", "w") as f: print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) print( @@ -394,6 +408,13 @@ file=f, ) print(f"}}", file=f) + with open(f"vsrai_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = ((s{w})a.{m}[i]) >> imm;", + file=f, + ) + print(f"}}", file=f) with open(f"vrotr_{width}.h", "w") as f: print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) print( diff --git a/code/gen_tb.py b/code/gen_tb.py index 7fe1045a..bc3cf901 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -98,14 +98,17 @@ "vsub": (widths_signed, "v128 a, v128 b"), "vsubi": (widths_unsigned, "v128 a, int imm", [0, 31]), "vsll": (widths_signed, "v128 a, v128 b"), + "vslli": (widths_signed, "v128 a, int imm", [0, 7]), "vslt": (widths_all, "v128 a, v128 b"), "vslti": (widths_all, "v128 a, int imm", [0, 15]), "vsle": (widths_all, "v128 a, v128 b"), "vslei": (widths_all, "v128 a, int imm", [0, 15]), "vsrl": (widths_signed, "v128 a, v128 b"), + "vsrli": (widths_signed, "v128 a, int imm", [0, 7]), "vsrlr": (widths_signed, "v128 a, v128 b"), "vsrlri": (widths_signed, "v128 a, int imm", [0, 7]), "vsra": (widths_signed, "v128 a, v128 b"), + "vsrai": (widths_signed, "v128 a, int imm", [0, 7]), "vsrar": (widths_signed, "v128 a, v128 b"), "vsrari": (widths_signed, "v128 a, int imm", [0, 7]), "vsub": (widths_signed, "v128 a, v128 b"), diff --git a/code/vslli_b.cpp b/code/vslli_b.cpp new file mode 100644 index 00000000..1d63cde9 --- /dev/null +++ b/code/vslli_b.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vslli_b(v128 a, int imm) { + v128 dst; +#include "vslli_b.h" + return dst; +} + +void test() { + FUZZ1(vslli_b, 0); + FUZZ1(vslli_b, 7); +} diff --git a/code/vslli_b.h b/code/vslli_b.h new file mode 100644 index 00000000..5479527e --- /dev/null +++ b/code/vslli_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = a.byte[i] << imm; +} diff --git a/code/vslli_d.cpp b/code/vslli_d.cpp new file mode 100644 index 00000000..fe8ba0da --- /dev/null +++ b/code/vslli_d.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vslli_d(v128 a, int imm) { + v128 dst; +#include "vslli_d.h" + return dst; +} + +void test() { + FUZZ1(vslli_d, 0); + FUZZ1(vslli_d, 7); +} diff --git a/code/vslli_d.h b/code/vslli_d.h new file mode 100644 index 00000000..07019426 --- /dev/null +++ b/code/vslli_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = a.dword[i] << imm; +} diff --git a/code/vslli_h.cpp b/code/vslli_h.cpp new file mode 100644 index 00000000..0aef7058 --- /dev/null +++ b/code/vslli_h.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vslli_h(v128 a, int imm) { + v128 dst; +#include "vslli_h.h" + return dst; +} + +void test() { + FUZZ1(vslli_h, 0); + FUZZ1(vslli_h, 7); +} diff --git a/code/vslli_h.h b/code/vslli_h.h new file mode 100644 index 00000000..637f08f7 --- /dev/null +++ b/code/vslli_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = a.half[i] << imm; +} diff --git a/code/vslli_w.cpp b/code/vslli_w.cpp new file mode 100644 index 00000000..34280b14 --- /dev/null +++ b/code/vslli_w.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vslli_w(v128 a, int imm) { + v128 dst; +#include "vslli_w.h" + return dst; +} + +void test() { + FUZZ1(vslli_w, 0); + FUZZ1(vslli_w, 7); +} diff --git a/code/vslli_w.h b/code/vslli_w.h new file mode 100644 index 00000000..2f82ccd3 --- /dev/null +++ b/code/vslli_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = a.word[i] << imm; +} diff --git a/code/vsrai_b.cpp b/code/vsrai_b.cpp new file mode 100644 index 00000000..fb467502 --- /dev/null +++ b/code/vsrai_b.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrai_b(v128 a, int imm) { + v128 dst; +#include "vsrai_b.h" + return dst; +} + +void test() { + FUZZ1(vsrai_b, 0); + FUZZ1(vsrai_b, 7); +} diff --git a/code/vsrai_b.h b/code/vsrai_b.h new file mode 100644 index 00000000..9aea2aea --- /dev/null +++ b/code/vsrai_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = ((s8)a.byte[i]) >> imm; +} diff --git a/code/vsrai_d.cpp b/code/vsrai_d.cpp new file mode 100644 index 00000000..be8aeb9f --- /dev/null +++ b/code/vsrai_d.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrai_d(v128 a, int imm) { + v128 dst; +#include "vsrai_d.h" + return dst; +} + +void test() { + FUZZ1(vsrai_d, 0); + FUZZ1(vsrai_d, 7); +} diff --git a/code/vsrai_d.h b/code/vsrai_d.h new file mode 100644 index 00000000..3b5dfca8 --- /dev/null +++ b/code/vsrai_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = ((s64)a.dword[i]) >> imm; +} diff --git a/code/vsrai_h.cpp b/code/vsrai_h.cpp new file mode 100644 index 00000000..a5c20057 --- /dev/null +++ b/code/vsrai_h.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrai_h(v128 a, int imm) { + v128 dst; +#include "vsrai_h.h" + return dst; +} + +void test() { + FUZZ1(vsrai_h, 0); + FUZZ1(vsrai_h, 7); +} diff --git a/code/vsrai_h.h b/code/vsrai_h.h new file mode 100644 index 00000000..15adf26a --- /dev/null +++ b/code/vsrai_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = ((s16)a.half[i]) >> imm; +} diff --git a/code/vsrai_w.cpp b/code/vsrai_w.cpp new file mode 100644 index 00000000..7d83742b --- /dev/null +++ b/code/vsrai_w.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrai_w(v128 a, int imm) { + v128 dst; +#include "vsrai_w.h" + return dst; +} + +void test() { + FUZZ1(vsrai_w, 0); + FUZZ1(vsrai_w, 7); +} diff --git a/code/vsrai_w.h b/code/vsrai_w.h new file mode 100644 index 00000000..f40e6fbb --- /dev/null +++ b/code/vsrai_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = ((s32)a.word[i]) >> imm; +} diff --git a/code/vsrli_b.cpp b/code/vsrli_b.cpp new file mode 100644 index 00000000..ebfe763d --- /dev/null +++ b/code/vsrli_b.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrli_b(v128 a, int imm) { + v128 dst; +#include "vsrli_b.h" + return dst; +} + +void test() { + FUZZ1(vsrli_b, 0); + FUZZ1(vsrli_b, 7); +} diff --git a/code/vsrli_b.h b/code/vsrli_b.h new file mode 100644 index 00000000..69d2572b --- /dev/null +++ b/code/vsrli_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = a.byte[i] >> imm; +} diff --git a/code/vsrli_d.cpp b/code/vsrli_d.cpp new file mode 100644 index 00000000..6037f220 --- /dev/null +++ b/code/vsrli_d.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrli_d(v128 a, int imm) { + v128 dst; +#include "vsrli_d.h" + return dst; +} + +void test() { + FUZZ1(vsrli_d, 0); + FUZZ1(vsrli_d, 7); +} diff --git a/code/vsrli_d.h b/code/vsrli_d.h new file mode 100644 index 00000000..a80eba33 --- /dev/null +++ b/code/vsrli_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = a.dword[i] >> imm; +} diff --git a/code/vsrli_h.cpp b/code/vsrli_h.cpp new file mode 100644 index 00000000..39732984 --- /dev/null +++ b/code/vsrli_h.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrli_h(v128 a, int imm) { + v128 dst; +#include "vsrli_h.h" + return dst; +} + +void test() { + FUZZ1(vsrli_h, 0); + FUZZ1(vsrli_h, 7); +} diff --git a/code/vsrli_h.h b/code/vsrli_h.h new file mode 100644 index 00000000..32bd9caa --- /dev/null +++ b/code/vsrli_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = a.half[i] >> imm; +} diff --git a/code/vsrli_w.cpp b/code/vsrli_w.cpp new file mode 100644 index 00000000..ecacc4bc --- /dev/null +++ b/code/vsrli_w.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsrli_w(v128 a, int imm) { + v128 dst; +#include "vsrli_w.h" + return dst; +} + +void test() { + FUZZ1(vsrli_w, 0); + FUZZ1(vsrli_w, 7); +} diff --git a/code/vsrli_w.h b/code/vsrli_w.h new file mode 100644 index 00000000..31032d36 --- /dev/null +++ b/code/vsrli_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = a.word[i] >> imm; +} diff --git a/docs/lsx/shift.md b/docs/lsx/shift.md index 42444115..5e2502c8 100644 --- a/docs/lsx/shift.md +++ b/docs/lsx/shift.md @@ -47,11 +47,21 @@ Compute 128-bit `a` shifted right by `imm * 8` bits. {{ vsll('w') }} {{ vsll('d') }} +{{ vslli('b') }} +{{ vslli('h') }} +{{ vslli('w') }} +{{ vslli('d') }} + {{ vsrl('b') }} {{ vsrl('h') }} {{ vsrl('w') }} {{ vsrl('d') }} +{{ vsrli('b') }} +{{ vsrli('h') }} +{{ vsrli('w') }} +{{ vsrli('d') }} + {{ vsrlr('b') }} {{ vsrlr('h') }} {{ vsrlr('w') }} @@ -67,6 +77,11 @@ Compute 128-bit `a` shifted right by `imm * 8` bits. {{ vsra('w') }} {{ vsra('d') }} +{{ vsrai('b') }} +{{ vsrai('h') }} +{{ vsrai('w') }} +{{ vsrai('d') }} + {{ vsrar('b') }} {{ vsrar('h') }} {{ vsrar('w') }} @@ -85,4 +100,4 @@ Compute 128-bit `a` shifted right by `imm * 8` bits. {{ vrotri('b') }} {{ vrotri('h') }} {{ vrotri('w') }} -{{ vrotri('d') }} \ No newline at end of file +{{ vrotri('d') }} diff --git a/main.py b/main.py index e38a9acb..74c11021 100644 --- a/main.py +++ b/main.py @@ -695,6 +695,16 @@ def vsll(name): desc=f"Logical left shift the unsigned {width}-bit elements in `a` by elements in `b`, store the result to `dst`.", ) + @env.macro + def vslli(name): + width = widths[name] + signedness = signednesses[name] + return instruction( + intrinsic=f"__m128i __lsx_vslli_{name} (__m128i a, imm0_{width-1} imm)", + instr=f"vslli.{name} vr, vr, imm", + desc=f"Logical left shift the unsigned {width}-bit elements in `a` by `imm`, store the result to `dst`.", + ) + @env.macro def vsrl(name): width = widths[name] @@ -705,6 +715,16 @@ def vsrl(name): desc=f"Logical right shift the unsigned {width}-bit elements in `a` by elements in `b`, store the result to `dst`.", ) + @env.macro + def vsrli(name): + width = widths[name] + signedness = signednesses[name] + return instruction( + intrinsic=f"__m128i __lsx_vsrli_{name} (__m128i a, imm0_{width-1} imm)", + instr=f"vsrli.{name} vr, vr, imm", + desc=f"Logical right shift the unsigned {width}-bit elements in `a` by `imm`, store the result to `dst`.", + ) + @env.macro def vsra(name): width = widths[name] @@ -715,6 +735,16 @@ def vsra(name): desc=f"Arithmetic right shift the signed {width}-bit elements in `a` by elements in `b`, store the result to `dst`.", ) + @env.macro + def vsrai(name): + width = widths[name] + signedness = signednesses[name] + return instruction( + intrinsic=f"__m128i __lsx_vsrai_{name} (__m128i a, imm0_{width-1} imm)", + instr=f"vsrai.{name} vr, vr, imm", + desc=f"Arithmetic right shift the signed {width}-bit elements in `a` by `imm`, store the result to `dst`.", + ) + @env.macro def vrotr(name): width = widths[name]