Skip to content

Commit

Permalink
Add vshuf4i
Browse files Browse the repository at this point in the history
  • Loading branch information
jiegec committed Dec 12, 2023
1 parent 4d9cac3 commit 3868152
Show file tree
Hide file tree
Showing 13 changed files with 100 additions and 2 deletions.
2 changes: 0 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,4 @@ TODO List:

### vssrarni.bu.h/hu.w/wu.d/du.q

### vshuf4i.b/h/w/d

### vldi
8 changes: 8 additions & 0 deletions code/gen_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -550,6 +550,14 @@
file=f,
)
print(f"}}", file=f)
if width != "d":
with open(f"vshuf4i_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = a.{m}[(i & ~0x3) + ((imm >> (2 * (i & 0x3))) & 0x3)];",
file=f,
)
print(f"}}", file=f)

for width in ["s", "d"]:
m = members_fp[width]
Expand Down
1 change: 1 addition & 0 deletions code/gen_tb.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@
"vsadd": (widths_all, "v128 a, v128 b"),
"vseq": (widths_signed, "v128 a, v128 b"),
"vseqi": (widths_signed, "v128 a, int imm", [-16, 0, 15]),
"vshuf4i": (["b", "h", "w"], "v128 a, int imm", [0, 13, 100, 128, 255]),
"vsigncov": (widths_signed, "v128 a, v128 b"),
"vssub": (widths_all, "v128 a, v128 b"),
"vsub": (widths_signed, "v128 a, v128 b"),
Expand Down
15 changes: 15 additions & 0 deletions code/vshuf4i_b.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "common.h"

v128 vshuf4i_b(v128 a, int imm) {
v128 dst;
#include "vshuf4i_b.h"
return dst;
}

void test() {
FUZZ1(vshuf4i_b, 0);
FUZZ1(vshuf4i_b, 13);
FUZZ1(vshuf4i_b, 100);
FUZZ1(vshuf4i_b, 128);
FUZZ1(vshuf4i_b, 255);
}
3 changes: 3 additions & 0 deletions code/vshuf4i_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] = a.byte[(i & ~0x3) + ((imm >> (2 * (i & 0x3))) & 0x3)];
}
15 changes: 15 additions & 0 deletions code/vshuf4i_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "common.h"

v128 vshuf4i_d(v128 a, v128 b, int imm) {
v128 dst;
#include "vshuf4i_d.h"
return dst;
}

void test() {
FUZZ2(vshuf4i_d, 0);
FUZZ2(vshuf4i_d, 13);
FUZZ2(vshuf4i_d, 100);
FUZZ2(vshuf4i_d, 128);
FUZZ2(vshuf4i_d, 255);
}
2 changes: 2 additions & 0 deletions code/vshuf4i_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
dst.dword[0] = (imm & 2) ? b.dword[(imm & 1)] : a.dword[(imm & 1)];
dst.dword[1] = (imm & 8) ? b.dword[((imm >> 2) & 1)] : a.dword[((imm >> 2) & 1)];
15 changes: 15 additions & 0 deletions code/vshuf4i_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "common.h"

v128 vshuf4i_h(v128 a, int imm) {
v128 dst;
#include "vshuf4i_h.h"
return dst;
}

void test() {
FUZZ1(vshuf4i_h, 0);
FUZZ1(vshuf4i_h, 13);
FUZZ1(vshuf4i_h, 100);
FUZZ1(vshuf4i_h, 128);
FUZZ1(vshuf4i_h, 255);
}
3 changes: 3 additions & 0 deletions code/vshuf4i_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = a.half[(i & ~0x3) + ((imm >> (2 * (i & 0x3))) & 0x3)];
}
15 changes: 15 additions & 0 deletions code/vshuf4i_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include "common.h"

v128 vshuf4i_w(v128 a, int imm) {
v128 dst;
#include "vshuf4i_w.h"
return dst;
}

void test() {
FUZZ1(vshuf4i_w, 0);
FUZZ1(vshuf4i_w, 13);
FUZZ1(vshuf4i_w, 100);
FUZZ1(vshuf4i_w, 128);
FUZZ1(vshuf4i_w, 255);
}
3 changes: 3 additions & 0 deletions code/vshuf4i_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = a.word[(i & ~0x3) + ((imm >> (2 * (i & 0x3))) & 0x3)];
}
5 changes: 5 additions & 0 deletions docs/lsx/shuffling.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,8 @@ Caveat: the indices are placed in `c`, while in other `vshuf` intrinsics they ar
{{ vshuf_hwd('h') }}
{{ vshuf_hwd('w') }}
{{ vshuf_hwd('d') }}

{{ vshuf4i('b') }}
{{ vshuf4i('h') }}
{{ vshuf4i('w') }}
{{ vshuf4i('d') }}
15 changes: 15 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -936,4 +936,19 @@ def vpickve2gr(name):
intrinsic=f"{return_type} __lsx_vpickve2gr_{name} (__m128i a, imm0_{128 // width - 1} idx)",
instr=f"vpickve2gr.{name} r, vr, imm",
desc=f"Pick the `lane` specified by `idx` from `a` and store into `dst`.",
)

@env.macro
def vshuf4i(name):
width = widths[name]
if name == "d":
b = " __m128i b,"
b_desc = " and `b`"
else:
b = ""
b_desc = ""
return instruction(
intrinsic=f"__m128i __lsx_vshuf4i_{name} (__m128i a,{b} imm0_255 imm)",
instr=f"vshuf4i.{name} vr, vr, imm",
desc=f"Shuffle every four {width}-bit elements in `a`{b_desc} with indices packed in `imm`, save the result to `dst`.",
)

0 comments on commit 3868152

Please sign in to comment.