Skip to content

Commit

Permalink
Add vsrlr/vsrar
Browse files Browse the repository at this point in the history
  • Loading branch information
jiegec committed Dec 12, 2023
1 parent 47dec44 commit 3c1e68b
Show file tree
Hide file tree
Showing 21 changed files with 196 additions and 4 deletions.
4 changes: 0 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ Arranged from QEMU implementation and [GCC Intrinsics](https://gcc.gnu.org/onlin

TODO List:

### vsrlr.b/h/w/d

### vsrar.b/h/w/d

### vsrln.b.h/h.w/w.d

### vsran.b.h/h.w/w.d
Expand Down
28 changes: 28 additions & 0 deletions code/gen_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -379,6 +379,34 @@
file=f,
)
print(f"}}", file=f)
with open(f"vsrlr_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(f" if ((b.{m}[i] & 0x{w-1:x}) == 0) {{", file=f)
print(
f" dst.{m}[i] = a.{m}[i];",
file=f,
)
print(f" }} else {{", file=f)
print(
f" dst.{m}[i] = (a.{m}[i] >> (b.{m}[i] & 0x{w-1:x})) + ((a.{m}[i] >> ((b.{m}[i] & 0x{w-1:x}) - 1)) & 0x1);",
file=f,
)
print(f" }}", file=f)
print(f"}}", file=f)
with open(f"vsrar_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(f" if ((b.{m}[i] & 0x{w-1:x}) == 0) {{", file=f)
print(
f" dst.{m}[i] = a.{m}[i];",
file=f,
)
print(f" }} else {{", file=f)
print(
f" dst.{m}[i] = ((s{w})a.{m}[i] >> (b.{m}[i] & 0x{w-1:x})) + (((s{w})a.{m}[i] >> ((b.{m}[i] & 0x{w-1:x}) - 1)) & 0x1);",
file=f,
)
print(f" }}", file=f)
print(f"}}", file=f)

for width in ["s", "d"]:
m = members_fp[width]
Expand Down
2 changes: 2 additions & 0 deletions code/gen_tb.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@
"vslt": (widths_all, "v128 a, v128 b"),
"vsle": (widths_all, "v128 a, v128 b"),
"vsrl": (widths_signed, "v128 a, v128 b"),
"vsrlr": (widths_signed, "v128 a, v128 b"),
"vsra": (widths_signed, "v128 a, v128 b"),
"vsrar": (widths_signed, "v128 a, v128 b"),
"vsub": (widths_signed, "v128 a, v128 b"),
"vsubwev": (widths_vsubw, "v128 a, v128 b"),
"vsubwod": (widths_vsubw, "v128 a, v128 b"),
Expand Down
9 changes: 9 additions & 0 deletions code/vsrar_b.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrar_b(v128 a, v128 b) {
v128 dst;
#include "vsrar_b.h"
return dst;
}

void test() { FUZZ2(vsrar_b); }
8 changes: 8 additions & 0 deletions code/vsrar_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
for (int i = 0; i < 16; i++) {
if ((b.byte[i] & 0x7) == 0) {
dst.byte[i] = a.byte[i];
} else {
dst.byte[i] = ((s8)a.byte[i] >> (b.byte[i] & 0x7)) +
(((s8)a.byte[i] >> ((b.byte[i] & 0x7) - 1)) & 0x1);
}
}
9 changes: 9 additions & 0 deletions code/vsrar_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrar_d(v128 a, v128 b) {
v128 dst;
#include "vsrar_d.h"
return dst;
}

void test() { FUZZ2(vsrar_d); }
8 changes: 8 additions & 0 deletions code/vsrar_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
for (int i = 0; i < 2; i++) {
if ((b.dword[i] & 0x3f) == 0) {
dst.dword[i] = a.dword[i];
} else {
dst.dword[i] = ((s64)a.dword[i] >> (b.dword[i] & 0x3f)) +
(((s64)a.dword[i] >> ((b.dword[i] & 0x3f) - 1)) & 0x1);
}
}
9 changes: 9 additions & 0 deletions code/vsrar_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrar_h(v128 a, v128 b) {
v128 dst;
#include "vsrar_h.h"
return dst;
}

void test() { FUZZ2(vsrar_h); }
8 changes: 8 additions & 0 deletions code/vsrar_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
for (int i = 0; i < 8; i++) {
if ((b.half[i] & 0xf) == 0) {
dst.half[i] = a.half[i];
} else {
dst.half[i] = ((s16)a.half[i] >> (b.half[i] & 0xf)) +
(((s16)a.half[i] >> ((b.half[i] & 0xf) - 1)) & 0x1);
}
}
9 changes: 9 additions & 0 deletions code/vsrar_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrar_w(v128 a, v128 b) {
v128 dst;
#include "vsrar_w.h"
return dst;
}

void test() { FUZZ2(vsrar_w); }
8 changes: 8 additions & 0 deletions code/vsrar_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
for (int i = 0; i < 4; i++) {
if ((b.word[i] & 0x1f) == 0) {
dst.word[i] = a.word[i];
} else {
dst.word[i] = ((s32)a.word[i] >> (b.word[i] & 0x1f)) +
(((s32)a.word[i] >> ((b.word[i] & 0x1f) - 1)) & 0x1);
}
}
9 changes: 9 additions & 0 deletions code/vsrlr_b.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrlr_b(v128 a, v128 b) {
v128 dst;
#include "vsrlr_b.h"
return dst;
}

void test() { FUZZ2(vsrlr_b); }
8 changes: 8 additions & 0 deletions code/vsrlr_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
for (int i = 0; i < 16; i++) {
if ((b.byte[i] & 0x7) == 0) {
dst.byte[i] = a.byte[i];
} else {
dst.byte[i] = (a.byte[i] >> (b.byte[i] & 0x7)) +
((a.byte[i] >> ((b.byte[i] & 0x7) - 1)) & 0x1);
}
}
9 changes: 9 additions & 0 deletions code/vsrlr_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrlr_d(v128 a, v128 b) {
v128 dst;
#include "vsrlr_d.h"
return dst;
}

void test() { FUZZ2(vsrlr_d); }
8 changes: 8 additions & 0 deletions code/vsrlr_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
for (int i = 0; i < 2; i++) {
if ((b.dword[i] & 0x3f) == 0) {
dst.dword[i] = a.dword[i];
} else {
dst.dword[i] = (a.dword[i] >> (b.dword[i] & 0x3f)) +
((a.dword[i] >> ((b.dword[i] & 0x3f) - 1)) & 0x1);
}
}
9 changes: 9 additions & 0 deletions code/vsrlr_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrlr_h(v128 a, v128 b) {
v128 dst;
#include "vsrlr_h.h"
return dst;
}

void test() { FUZZ2(vsrlr_h); }
8 changes: 8 additions & 0 deletions code/vsrlr_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
for (int i = 0; i < 8; i++) {
if ((b.half[i] & 0xf) == 0) {
dst.half[i] = a.half[i];
} else {
dst.half[i] = (a.half[i] >> (b.half[i] & 0xf)) +
((a.half[i] >> ((b.half[i] & 0xf) - 1)) & 0x1);
}
}
9 changes: 9 additions & 0 deletions code/vsrlr_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vsrlr_w(v128 a, v128 b) {
v128 dst;
#include "vsrlr_w.h"
return dst;
}

void test() { FUZZ2(vsrlr_w); }
8 changes: 8 additions & 0 deletions code/vsrlr_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
for (int i = 0; i < 4; i++) {
if ((b.word[i] & 0x1f) == 0) {
dst.word[i] = a.word[i];
} else {
dst.word[i] = (a.word[i] >> (b.word[i] & 0x1f)) +
((a.word[i] >> ((b.word[i] & 0x1f) - 1)) & 0x1);
}
}
10 changes: 10 additions & 0 deletions docs/lsx/shift.md
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,21 @@ Compute 128-bit `a` shifted right by `imm * 8` bits.
{{ vsrl('w') }}
{{ vsrl('d') }}

{{ vsrlr('b') }}
{{ vsrlr('h') }}
{{ vsrlr('w') }}
{{ vsrlr('d') }}

{{ vsra('b') }}
{{ vsra('h') }}
{{ vsra('w') }}
{{ vsra('d') }}

{{ vsrar('b') }}
{{ vsrar('h') }}
{{ vsrar('w') }}
{{ vsrar('d') }}

{{ vrotr('b') }}
{{ vrotr('h') }}
{{ vrotr('w') }}
Expand Down
20 changes: 20 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -677,4 +677,24 @@ def vrotr(name):
intrinsic=f"__m128i __lsx_vrotr_{name} (__m128i a, __m128i b)",
instr=f"vrotr.{name} vr, vr, vr",
desc=f"Rotate right the unsigned {width}-bit elements in `a` by elements in `b`, store the result to `dst`.",
)

@env.macro
def vsrlr(name):
width = widths[name]
signedness = signednesses[name]
return instruction(
intrinsic=f"__m128i __lsx_vsrlr_{name} (__m128i a, __m128i b)",
instr=f"vsrlr.{name} vr, vr, vr",
desc=f"Logical right shift (with rounding) the unsigned {width}-bit elements in `a` by elements in `b`, store the result to `dst`.",
)

@env.macro
def vsrar(name):
width = widths[name]
signedness = signednesses[name]
return instruction(
intrinsic=f"__m128i __lsx_vsrar_{name} (__m128i a, __m128i b)",
instr=f"vsrar.{name} vr, vr, vr",
desc=f"Arithmetic right shift (with rounding) the signed {width}-bit elements in `a` by elements in `b`, store the result to `dst`.",
)

0 comments on commit 3c1e68b

Please sign in to comment.