Skip to content

Commit

Permalink
Add vsran/vsrlni/vsrani
Browse files Browse the repository at this point in the history
  • Loading branch information
jiegec committed Dec 13, 2023
1 parent f1abe45 commit 7b4d180
Show file tree
Hide file tree
Showing 28 changed files with 262 additions and 29 deletions.
6 changes: 0 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@ Arranged from QEMU implementation and [GCC Intrinsics](https://gcc.gnu.org/onlin

TODO List:

### vsran.b.h/h.w/w.d

### vsrlrn.b.h/h.w/w.d

### vsrarn.b.h/h.w/w.d
Expand Down Expand Up @@ -124,16 +122,12 @@ TODO List:

### vsat.b/h/w/d/bu/hu/wu/du

### vsrlni.b.h/h.w/w.d/d.q

### vsrlrni.b.h/h.w/w.d/d.q

### vssrlni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q

### vssrlrni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q

### vsrani.b.h/h.w/w.d/d.q

### vsrarni.b.h/h.w/w.d/d.q

### vssrani.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q
Expand Down
22 changes: 22 additions & 0 deletions code/gen_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,28 @@
file=f,
)
print(f"}}", file=f)
with open(f"vsran_{width}_{double_width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = (i < {64 // w}) ? (s{w})((s{double_w})a.{double_m}[i] >> (b.{double_m}[i] & {double_w-1})) : 0;",
file=f,
)
print(f"}}", file=f)
if sign == "s":
with open(f"vsrlni_{width}_{double_width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = (i < {64 // w}) ? (u{w})((u{double_w})b.{double_m}[i] >> imm) : (u{w})((u{double_w})a.{double_m}[i - {64 // w}] >> imm);",
file=f,
)
print(f"}}", file=f)
with open(f"vsrani_{width}_{double_width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = (i < {64 // w}) ? (s{w})((s{double_w})b.{double_m}[i] >> imm) : (s{w})((s{double_w})a.{double_m}[i - {64 // w}] >> imm);",
file=f,
)
print(f"}}", file=f)

if width == "d" or width == "du":
with open(f"vextl_{double_width}_{width}.h", "w") as f:
Expand Down
12 changes: 8 additions & 4 deletions code/gen_tb.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
widths_vexth = ["h_b", "hu_bu", "w_h", "wu_hu", "d_w", "du_wu", "q_d", "qu_du"]
widths_vsllwil = ["h_b", "hu_bu", "w_h", "wu_hu", "d_w", "du_wu"]
widths_vsrln = ["b_h", "h_w", "w_d"]
widths_vsrlni = ["b_h", "h_w", "w_d", "d_q"]
widths_vaddw = [
"h_b",
"h_bu",
Expand Down Expand Up @@ -108,15 +109,18 @@
"vslti": (widths_all, "v128 a, int imm", [0, 15]),
"vsle": (widths_all, "v128 a, v128 b"),
"vslei": (widths_all, "v128 a, int imm", [0, 15]),
"vsra": (widths_signed, "v128 a, v128 b"),
"vsrai": (widths_signed, "v128 a, int imm", [0, 7]),
"vsran": (widths_vsrln, "v128 a, v128 b"),
"vsrani": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7]),
"vsrar": (widths_signed, "v128 a, v128 b"),
"vsrari": (widths_signed, "v128 a, int imm", [0, 7]),
"vsrl": (widths_signed, "v128 a, v128 b"),
"vsrli": (widths_signed, "v128 a, int imm", [0, 7]),
"vsrln": (widths_vsrln, "v128 a, v128 b"),
"vsrlni": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7]),
"vsrlr": (widths_signed, "v128 a, v128 b"),
"vsrlri": (widths_signed, "v128 a, int imm", [0, 7]),
"vsra": (widths_signed, "v128 a, v128 b"),
"vsrai": (widths_signed, "v128 a, int imm", [0, 7]),
"vsrar": (widths_signed, "v128 a, v128 b"),
"vsrari": (widths_signed, "v128 a, int imm", [0, 7]),
"vsub": (widths_signed, "v128 a, v128 b"),
"vsubwev": (widths_vsubw, "v128 a, v128 b"),
"vsubwod": (widths_vsubw, "v128 a, v128 b"),
Expand Down
9 changes: 9 additions & 0 deletions code/vsran_b_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsran_b_h(v128 a, v128 b) {
v128 dst;
#include "vsran_b_h.h"
return dst;
}

void test() { FUZZ2(vsran_b_h); }
3 changes: 3 additions & 0 deletions code/vsran_b_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] = (i < 8) ? (s8)((s16)a.half[i] >> (b.half[i] & 15)) : 0;
}
3 changes: 3 additions & 0 deletions code/vsran_d_q.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (i < 1) ? (s64)((s128)a.qword[i] >> (b.qword[i] & 127)) : 0;
}
9 changes: 9 additions & 0 deletions code/vsran_h_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsran_h_w(v128 a, v128 b) {
v128 dst;
#include "vsran_h_w.h"
return dst;
}

void test() { FUZZ2(vsran_h_w); }
3 changes: 3 additions & 0 deletions code/vsran_h_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = (i < 4) ? (s16)((s32)a.word[i] >> (b.word[i] & 31)) : 0;
}
9 changes: 9 additions & 0 deletions code/vsran_w_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsran_w_d(v128 a, v128 b) {
v128 dst;
#include "vsran_w_d.h"
return dst;
}

void test() { FUZZ2(vsran_w_d); }
3 changes: 3 additions & 0 deletions code/vsran_w_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = (i < 2) ? (s32)((s64)a.dword[i] >> (b.dword[i] & 63)) : 0;
}
12 changes: 12 additions & 0 deletions code/vsrani_b_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include "common.h"

v128 vsrani_b_h(v128 a, v128 b, int imm) {
v128 dst;
#include "vsrani_b_h.h"
return dst;
}

void test() {
FUZZ2(vsrani_b_h, 0);
FUZZ2(vsrani_b_h, 7);
}
4 changes: 4 additions & 0 deletions code/vsrani_b_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] =
(i < 8) ? (s8)((s16)b.half[i] >> imm) : (s8)((s16)a.half[i - 8] >> imm);
}
12 changes: 12 additions & 0 deletions code/vsrani_d_q.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include "common.h"

v128 vsrani_d_q(v128 a, v128 b, int imm) {
v128 dst;
#include "vsrani_d_q.h"
return dst;
}

void test() {
FUZZ2(vsrani_d_q, 0);
FUZZ2(vsrani_d_q, 7);
}
4 changes: 4 additions & 0 deletions code/vsrani_d_q.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (i < 1) ? (s64)((s128)b.qword[i] >> imm)
: (s64)((s128)a.qword[i - 1] >> imm);
}
12 changes: 12 additions & 0 deletions code/vsrani_h_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include "common.h"

v128 vsrani_h_w(v128 a, v128 b, int imm) {
v128 dst;
#include "vsrani_h_w.h"
return dst;
}

void test() {
FUZZ2(vsrani_h_w, 0);
FUZZ2(vsrani_h_w, 7);
}
4 changes: 4 additions & 0 deletions code/vsrani_h_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
for (int i = 0; i < 8; i++) {
dst.half[i] =
(i < 4) ? (s16)((s32)b.word[i] >> imm) : (s16)((s32)a.word[i - 4] >> imm);
}
12 changes: 12 additions & 0 deletions code/vsrani_w_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include "common.h"

v128 vsrani_w_d(v128 a, v128 b, int imm) {
v128 dst;
#include "vsrani_w_d.h"
return dst;
}

void test() {
FUZZ2(vsrani_w_d, 0);
FUZZ2(vsrani_w_d, 7);
}
4 changes: 4 additions & 0 deletions code/vsrani_w_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = (i < 2) ? (s32)((s64)b.dword[i] >> imm)
: (s32)((s64)a.dword[i - 2] >> imm);
}
12 changes: 12 additions & 0 deletions code/vsrlni_b_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include "common.h"

v128 vsrlni_b_h(v128 a, v128 b, int imm) {
v128 dst;
#include "vsrlni_b_h.h"
return dst;
}

void test() {
FUZZ2(vsrlni_b_h, 0);
FUZZ2(vsrlni_b_h, 7);
}
4 changes: 4 additions & 0 deletions code/vsrlni_b_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] =
(i < 8) ? (u8)((u16)b.half[i] >> imm) : (u8)((u16)a.half[i - 8] >> imm);
}
12 changes: 12 additions & 0 deletions code/vsrlni_d_q.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include "common.h"

v128 vsrlni_d_q(v128 a, v128 b, int imm) {
v128 dst;
#include "vsrlni_d_q.h"
return dst;
}

void test() {
FUZZ2(vsrlni_d_q, 0);
FUZZ2(vsrlni_d_q, 7);
}
4 changes: 4 additions & 0 deletions code/vsrlni_d_q.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (i < 1) ? (u64)((u128)b.qword[i] >> imm)
: (u64)((u128)a.qword[i - 1] >> imm);
}
12 changes: 12 additions & 0 deletions code/vsrlni_h_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include "common.h"

v128 vsrlni_h_w(v128 a, v128 b, int imm) {
v128 dst;
#include "vsrlni_h_w.h"
return dst;
}

void test() {
FUZZ2(vsrlni_h_w, 0);
FUZZ2(vsrlni_h_w, 7);
}
4 changes: 4 additions & 0 deletions code/vsrlni_h_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
for (int i = 0; i < 8; i++) {
dst.half[i] =
(i < 4) ? (u16)((u32)b.word[i] >> imm) : (u16)((u32)a.word[i - 4] >> imm);
}
12 changes: 12 additions & 0 deletions code/vsrlni_w_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#include "common.h"

v128 vsrlni_w_d(v128 a, v128 b, int imm) {
v128 dst;
#include "vsrlni_w_d.h"
return dst;
}

void test() {
FUZZ2(vsrlni_w_d, 0);
FUZZ2(vsrlni_w_d, 7);
}
4 changes: 4 additions & 0 deletions code/vsrlni_w_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = (i < 2) ? (u32)((u64)b.dword[i] >> imm)
: (u32)((u64)a.dword[i - 2] >> imm);
}
54 changes: 35 additions & 19 deletions docs/lsx/shift.md
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,36 @@ Compute 128-bit `a` shifted right by `imm * 8` bits.
{{ vsllwil('d', 'w') }}
{{ vsllwil('du', 'wu') }}

{{ vsra('b') }}
{{ vsra('h') }}
{{ vsra('w') }}
{{ vsra('d') }}

{{ vsrai('b') }}
{{ vsrai('h') }}
{{ vsrai('w') }}
{{ vsrai('d') }}

{{ vsran('b', 'h') }}
{{ vsran('h', 'w') }}
{{ vsran('w', 'd') }}

{{ vsrani('b', 'h') }}
{{ vsrani('h', 'w') }}
{{ vsrani('w', 'd') }}
{{ vsrani('d', 'q') }}

{{ vsrar('b') }}
{{ vsrar('h') }}
{{ vsrar('w') }}
{{ vsrar('d') }}

{{ vsrari('b') }}
{{ vsrari('h') }}
{{ vsrari('w') }}
{{ vsrari('d') }}


{{ vsrl('b') }}
{{ vsrl('h') }}
{{ vsrl('w') }}
Expand All @@ -73,6 +103,11 @@ Compute 128-bit `a` shifted right by `imm * 8` bits.
{{ vsrln('h', 'w') }}
{{ vsrln('w', 'd') }}

{{ vsrlni('b', 'h') }}
{{ vsrlni('h', 'w') }}
{{ vsrlni('w', 'd') }}
{{ vsrlni('d', 'q') }}

{{ vsrlr('b') }}
{{ vsrlr('h') }}
{{ vsrlr('w') }}
Expand All @@ -83,25 +118,6 @@ Compute 128-bit `a` shifted right by `imm * 8` bits.
{{ vsrlri('w') }}
{{ vsrlri('d') }}

{{ vsra('b') }}
{{ vsra('h') }}
{{ vsra('w') }}
{{ vsra('d') }}

{{ vsrai('b') }}
{{ vsrai('h') }}
{{ vsrai('w') }}
{{ vsrai('d') }}

{{ vsrar('b') }}
{{ vsrar('h') }}
{{ vsrar('w') }}
{{ vsrar('d') }}

{{ vsrari('b') }}
{{ vsrari('h') }}
{{ vsrari('w') }}
{{ vsrari('d') }}

{{ vrotr('b') }}
{{ vrotr('h') }}
Expand Down
30 changes: 30 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1017,4 +1017,34 @@ def vsrln(name, name2):
intrinsic=f"__m128i __lsx_vsrln_{name}_{name2} (__m128i a, __m128i b)",
instr=f"vsrln.{name}.{name2} vr, vr, vr",
desc=f"Logical right shift the unsigned {width2}-bit elements in `a` by elements in `b`, truncate to {width}-bit and store the result to `dst`.",
)

@env.macro
def vsran(name, name2):
width = widths[name[0]]
width2 = widths[name2[0]]
return instruction(
intrinsic=f"__m128i __lsx_vsran_{name}_{name2} (__m128i a, __m128i b)",
instr=f"vsran.{name}.{name2} vr, vr, vr",
desc=f"Arithmetic right shift the signed {width2}-bit elements in `a` by elements in `b`, truncate to {width}-bit and store the result to `dst`.",
)

@env.macro
def vsrlni(name, name2):
width = widths[name[0]]
width2 = widths[name2[0]]
return instruction(
intrinsic=f"__m128i __lsx_vsrlni_{name}_{name2} (__m128i a, __m128i b, imm0_{width2-1} imm)",
instr=f"vsrlni.{name}.{name2} vr, vr, imm",
desc=f"Logical right shift the unsigned {width2}-bit elements in `a` and `b` by `imm`, truncate to {width}-bit and store the result to `dst`.",
)

@env.macro
def vsrani(name, name2):
width = widths[name[0]]
width2 = widths[name2[0]]
return instruction(
intrinsic=f"__m128i __lsx_vsrani_{name}_{name2} (__m128i a, __m128i b, imm0_{width2-1} imm)",
instr=f"vsrani.{name}.{name2} vr, vr, imm",
desc=f"Arithemtic right shift the signed {width2}-bit elements in `a` and `b` by `imm`, truncate to {width}-bit and store the result to `dst`.",
)

0 comments on commit 7b4d180

Please sign in to comment.