Skip to content

Commit

Permalink
Add vsrlrn/vsrarn
Browse files Browse the repository at this point in the history
  • Loading branch information
jiegec committed Dec 13, 2023
1 parent 7b4d180 commit 476b709
Show file tree
Hide file tree
Showing 20 changed files with 199 additions and 14 deletions.
4 changes: 0 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ Arranged from QEMU implementation and [GCC Intrinsics](https://gcc.gnu.org/onlin

TODO List:

### vsrlrn.b.h/h.w/w.d

### vsrarn.b.h/h.w/w.d

### vssrln.b.h/h.w/w.d

### vssran.b.h/h.w/w.d
Expand Down
40 changes: 39 additions & 1 deletion code/gen_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@
file=f,
)
print(f"}}", file=f)
if "width" != "q":
if width != "d":
with open(f"vsllwil_{double_width}_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // double_w};i++) {{", file=f)
print(
Expand All @@ -129,13 +129,51 @@
file=f,
)
print(f"}}", file=f)
with open(f"vsrlrn_{width}_{double_width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(f"if (i < {64 // w}) {{", file=f)
print(f"u8 shift = (b.{double_m}[i] & {double_w-1});", file=f)
print(f"if (shift == 0) {{", file=f)
print(
f" dst.{m}[i] = (u{w})(u{double_w})a.{double_m}[i];",
file=f,
)
print(f"}} else {{", file=f)
print(
f" dst.{m}[i] = (u{w})(((u{double_w})a.{double_m}[i] >> shift) + (((u{double_w})a.{double_m}[i] >> (shift - 1)) & 0x1));",
file=f,
)
print(f"}}", file=f)
print(f"}} else {{", file=f)
print(f" dst.{m}[i] = 0;", file=f)
print(f"}}", file=f)
print(f"}}", file=f)
with open(f"vsran_{width}_{double_width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = (i < {64 // w}) ? (s{w})((s{double_w})a.{double_m}[i] >> (b.{double_m}[i] & {double_w-1})) : 0;",
file=f,
)
print(f"}}", file=f)
with open(f"vsrarn_{width}_{double_width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(f"if (i < {64 // w}) {{", file=f)
print(f"u8 shift = (b.{double_m}[i] & {double_w-1});", file=f)
print(f"if (shift == 0) {{", file=f)
print(
f" dst.{m}[i] = (s{w})(s{double_w})a.{double_m}[i];",
file=f,
)
print(f"}} else {{", file=f)
print(
f" dst.{m}[i] = (s{w})(((s{double_w})a.{double_m}[i] >> shift) + (((s{double_w})a.{double_m}[i] >> (shift - 1)) & 0x1));",
file=f,
)
print(f"}}", file=f)
print(f"}} else {{", file=f)
print(f" dst.{m}[i] = 0;", file=f)
print(f"}}", file=f)
print(f"}}", file=f)
if sign == "s":
with open(f"vsrlni_{width}_{double_width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
Expand Down
2 changes: 2 additions & 0 deletions code/gen_tb.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,14 @@
"vsrani": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7]),
"vsrar": (widths_signed, "v128 a, v128 b"),
"vsrari": (widths_signed, "v128 a, int imm", [0, 7]),
"vsrarn": (widths_vsrln, "v128 a, v128 b"),
"vsrl": (widths_signed, "v128 a, v128 b"),
"vsrli": (widths_signed, "v128 a, int imm", [0, 7]),
"vsrln": (widths_vsrln, "v128 a, v128 b"),
"vsrlni": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7]),
"vsrlr": (widths_signed, "v128 a, v128 b"),
"vsrlri": (widths_signed, "v128 a, int imm", [0, 7]),
"vsrlrn": (widths_vsrln, "v128 a, v128 b"),
"vsub": (widths_signed, "v128 a, v128 b"),
"vsubwev": (widths_vsubw, "v128 a, v128 b"),
"vsubwod": (widths_vsubw, "v128 a, v128 b"),
Expand Down
3 changes: 0 additions & 3 deletions code/vsllwil_q_d.h

This file was deleted.

3 changes: 0 additions & 3 deletions code/vsran_d_q.h

This file was deleted.

9 changes: 9 additions & 0 deletions code/vsrarn_b_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrarn_b_h(v128 a, v128 b) {
v128 dst;
#include "vsrarn_b_h.h"
return dst;
}

void test() { FUZZ2(vsrarn_b_h); }
13 changes: 13 additions & 0 deletions code/vsrarn_b_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
for (int i = 0; i < 16; i++) {
if (i < 8) {
u8 shift = (b.half[i] & 15);
if (shift == 0) {
dst.byte[i] = (s8)(s16)a.half[i];
} else {
dst.byte[i] = (s8)(((s16)a.half[i] >> shift) +
(((s16)a.half[i] >> (shift - 1)) & 0x1));
}
} else {
dst.byte[i] = 0;
}
}
9 changes: 9 additions & 0 deletions code/vsrarn_h_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrarn_h_w(v128 a, v128 b) {
v128 dst;
#include "vsrarn_h_w.h"
return dst;
}

void test() { FUZZ2(vsrarn_h_w); }
13 changes: 13 additions & 0 deletions code/vsrarn_h_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
for (int i = 0; i < 8; i++) {
if (i < 4) {
u8 shift = (b.word[i] & 31);
if (shift == 0) {
dst.half[i] = (s16)(s32)a.word[i];
} else {
dst.half[i] = (s16)(((s32)a.word[i] >> shift) +
(((s32)a.word[i] >> (shift - 1)) & 0x1));
}
} else {
dst.half[i] = 0;
}
}
9 changes: 9 additions & 0 deletions code/vsrarn_w_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrarn_w_d(v128 a, v128 b) {
v128 dst;
#include "vsrarn_w_d.h"
return dst;
}

void test() { FUZZ2(vsrarn_w_d); }
13 changes: 13 additions & 0 deletions code/vsrarn_w_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
for (int i = 0; i < 4; i++) {
if (i < 2) {
u8 shift = (b.dword[i] & 63);
if (shift == 0) {
dst.word[i] = (s32)(s64)a.dword[i];
} else {
dst.word[i] = (s32)(((s64)a.dword[i] >> shift) +
(((s64)a.dword[i] >> (shift - 1)) & 0x1));
}
} else {
dst.word[i] = 0;
}
}
3 changes: 0 additions & 3 deletions code/vsrln_d_q.h

This file was deleted.

9 changes: 9 additions & 0 deletions code/vsrlrn_b_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrlrn_b_h(v128 a, v128 b) {
v128 dst;
#include "vsrlrn_b_h.h"
return dst;
}

void test() { FUZZ2(vsrlrn_b_h); }
13 changes: 13 additions & 0 deletions code/vsrlrn_b_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
for (int i = 0; i < 16; i++) {
if (i < 8) {
u8 shift = (b.half[i] & 15);
if (shift == 0) {
dst.byte[i] = (u8)(u16)a.half[i];
} else {
dst.byte[i] = (u8)(((u16)a.half[i] >> shift) +
(((u16)a.half[i] >> (shift - 1)) & 0x1));
}
} else {
dst.byte[i] = 0;
}
}
9 changes: 9 additions & 0 deletions code/vsrlrn_h_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrlrn_h_w(v128 a, v128 b) {
v128 dst;
#include "vsrlrn_h_w.h"
return dst;
}

void test() { FUZZ2(vsrlrn_h_w); }
13 changes: 13 additions & 0 deletions code/vsrlrn_h_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
for (int i = 0; i < 8; i++) {
if (i < 4) {
u8 shift = (b.word[i] & 31);
if (shift == 0) {
dst.half[i] = (u16)(u32)a.word[i];
} else {
dst.half[i] = (u16)(((u32)a.word[i] >> shift) +
(((u32)a.word[i] >> (shift - 1)) & 0x1));
}
} else {
dst.half[i] = 0;
}
}
9 changes: 9 additions & 0 deletions code/vsrlrn_w_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrlrn_w_d(v128 a, v128 b) {
v128 dst;
#include "vsrlrn_w_d.h"
return dst;
}

void test() { FUZZ2(vsrlrn_w_d); }
13 changes: 13 additions & 0 deletions code/vsrlrn_w_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
for (int i = 0; i < 4; i++) {
if (i < 2) {
u8 shift = (b.dword[i] & 63);
if (shift == 0) {
dst.word[i] = (u32)(u64)a.dword[i];
} else {
dst.word[i] = (u32)(((u64)a.dword[i] >> shift) +
(((u64)a.dword[i] >> (shift - 1)) & 0x1));
}
} else {
dst.word[i] = 0;
}
}
6 changes: 6 additions & 0 deletions docs/lsx/shift.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,9 @@ Compute 128-bit `a` shifted right by `imm * 8` bits.
{{ vsrari('w') }}
{{ vsrari('d') }}

{{ vsrarn('b', 'h') }}
{{ vsrarn('h', 'w') }}
{{ vsrarn('w', 'd') }}

{{ vsrl('b') }}
{{ vsrl('h') }}
Expand Down Expand Up @@ -118,6 +121,9 @@ Compute 128-bit `a` shifted right by `imm * 8` bits.
{{ vsrlri('w') }}
{{ vsrlri('d') }}

{{ vsrlrn('b', 'h') }}
{{ vsrlrn('h', 'w') }}
{{ vsrlrn('w', 'd') }}

{{ vrotr('b') }}
{{ vrotr('h') }}
Expand Down
20 changes: 20 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -1047,4 +1047,24 @@ def vsrani(name, name2):
intrinsic=f"__m128i __lsx_vsrani_{name}_{name2} (__m128i a, __m128i b, imm0_{width2-1} imm)",
instr=f"vsrani.{name}.{name2} vr, vr, imm",
desc=f"Arithemtic right shift the signed {width2}-bit elements in `a` and `b` by `imm`, truncate to {width}-bit and store the result to `dst`.",
)

@env.macro
def vsrlrn(name, name2):
width = widths[name[0]]
width2 = widths[name2[0]]
return instruction(
intrinsic=f"__m128i __lsx_vsrlrn_{name}_{name2} (__m128i a, __m128i b)",
instr=f"vsrlrn.{name}.{name2} vr, vr, vr",
desc=f"Logical right shift (with rounding) the unsigned {width2}-bit elements in `a` by elements in `b`, truncate to {width}-bit and store the result to `dst`.",
)

@env.macro
def vsrarn(name, name2):
width = widths[name[0]]
width2 = widths[name2[0]]
return instruction(
intrinsic=f"__m128i __lsx_vsrarn_{name}_{name2} (__m128i a, __m128i b)",
instr=f"vsrarn.{name}.{name2} vr, vr, vr",
desc=f"Arithmetic right shift (with rounding) the signed {width2}-bit elements in `a` by elements in `b`, truncate to {width}-bit and store the result to `dst`.",
)

0 comments on commit 476b709

Please sign in to comment.