Skip to content

Commit

Permalink
Add vmuh
Browse files Browse the repository at this point in the history
  • Loading branch information
jiegec committed Dec 12, 2023
1 parent d8ce748 commit a2f61fa
Show file tree
Hide file tree
Showing 21 changed files with 123 additions and 4 deletions.
4 changes: 0 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,6 @@ Arranged from QEMU implementation and [GCC Intrinsics](https://gcc.gnu.org/onlin

TODO List:

### vmuh.b/h/w/d/bu/hu/wu/du

Vector Multiplication High

### vmod.b/h/w/d

### vmod.bu/hu/wu/du
Expand Down
7 changes: 7 additions & 0 deletions code/gen_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,13 @@
file=f,
)
print(f"}}", file=f)
with open(f"vmuh_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = ((({sign}{w * 2})({sign}{w})a.{m}[i] * ({sign}{w * 2})({sign}{w})b.{m}[i])) >> {w};",
file=f,
)
print(f"}}", file=f)

for width in ["b", "bu", "h", "hu", "w", "wu", "d", "du"]:
double_width = double_widths[width]
Expand Down
1 change: 1 addition & 0 deletions code/gen_tb.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@
"vmin": (widths_all, "v128 a, v128 b"),
"vmini": (widths_all, "v128 a, int imm", [0, 3, 15]),
"vmsub": (widths_signed, "v128 a, v128 b, v128 c"),
"vmuh": (widths_all, "v128 a, v128 b"),
"vmul": (widths_signed, "v128 a, v128 b"),
"vmulwev": (widths_vaddw, "v128 a, v128 b"),
"vmulwod": (widths_vaddw, "v128 a, v128 b"),
Expand Down
9 changes: 9 additions & 0 deletions code/vmuh_b.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmuh_b(v128 a, v128 b) {
v128 dst;
#include "vmuh_b.h"
return dst;
}

void test() { FUZZ2(vmuh_b); }
3 changes: 3 additions & 0 deletions code/vmuh_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] = (((s16)(s8)a.byte[i] * (s16)(s8)b.byte[i])) >> 8;
}
9 changes: 9 additions & 0 deletions code/vmuh_bu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmuh_bu(v128 a, v128 b) {
v128 dst;
#include "vmuh_bu.h"
return dst;
}

void test() { FUZZ2(vmuh_bu); }
3 changes: 3 additions & 0 deletions code/vmuh_bu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] = (((u16)(u8)a.byte[i] * (u16)(u8)b.byte[i])) >> 8;
}
9 changes: 9 additions & 0 deletions code/vmuh_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmuh_d(v128 a, v128 b) {
v128 dst;
#include "vmuh_d.h"
return dst;
}

void test() { FUZZ2(vmuh_d); }
3 changes: 3 additions & 0 deletions code/vmuh_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (((s128)(s64)a.dword[i] * (s128)(s64)b.dword[i])) >> 64;
}
9 changes: 9 additions & 0 deletions code/vmuh_du.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmuh_du(v128 a, v128 b) {
v128 dst;
#include "vmuh_du.h"
return dst;
}

void test() { FUZZ2(vmuh_du); }
3 changes: 3 additions & 0 deletions code/vmuh_du.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (((u128)(u64)a.dword[i] * (u128)(u64)b.dword[i])) >> 64;
}
9 changes: 9 additions & 0 deletions code/vmuh_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmuh_h(v128 a, v128 b) {
v128 dst;
#include "vmuh_h.h"
return dst;
}

void test() { FUZZ2(vmuh_h); }
3 changes: 3 additions & 0 deletions code/vmuh_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = (((s32)(s16)a.half[i] * (s32)(s16)b.half[i])) >> 16;
}
9 changes: 9 additions & 0 deletions code/vmuh_hu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmuh_hu(v128 a, v128 b) {
v128 dst;
#include "vmuh_hu.h"
return dst;
}

void test() { FUZZ2(vmuh_hu); }
3 changes: 3 additions & 0 deletions code/vmuh_hu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = (((u32)(u16)a.half[i] * (u32)(u16)b.half[i])) >> 16;
}
9 changes: 9 additions & 0 deletions code/vmuh_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmuh_w(v128 a, v128 b) {
v128 dst;
#include "vmuh_w.h"
return dst;
}

void test() { FUZZ2(vmuh_w); }
3 changes: 3 additions & 0 deletions code/vmuh_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = (((s64)(s32)a.word[i] * (s64)(s32)b.word[i])) >> 32;
}
9 changes: 9 additions & 0 deletions code/vmuh_wu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmuh_wu(v128 a, v128 b) {
v128 dst;
#include "vmuh_wu.h"
return dst;
}

void test() { FUZZ2(vmuh_wu); }
3 changes: 3 additions & 0 deletions code/vmuh_wu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = (((u64)(u32)a.word[i] * (u64)(u32)b.word[i])) >> 32;
}
9 changes: 9 additions & 0 deletions docs/lsx/integer_computation.md
Original file line number Diff line number Diff line change
Expand Up @@ -168,6 +168,15 @@
{{ vmsub('w') }}
{{ vmsub('d') }}

{{ vmuh('b') }}
{{ vmuh('bu') }}
{{ vmuh('h') }}
{{ vmuh('hu') }}
{{ vmuh('w') }}
{{ vmuh('wu') }}
{{ vmuh('d') }}
{{ vmuh('du') }}

{{ vmul('b') }}
{{ vmul('h') }}
{{ vmul('w') }}
Expand Down
10 changes: 10 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -517,6 +517,16 @@ def vmul(name):
desc=f"Multiply {width}-bit elements in `a` and `b`, save the result in `dst`.",
)

@env.macro
def vmuh(name):
width = widths[name]
signedness = signednesses[name]
return instruction(
intrinsic=f"__m128i __lsx_vmuh_{name} (__m128i a, __m128i b)",
instr=f"vmuh.{name} vr, vr, vr",
desc=f"Multiply {signedness} {width}-bit elements in `a` and `b`, save the high {width}-bit result in `dst`.",
)

@env.macro
def vmsub(name):
width = widths[name]
Expand Down

0 comments on commit a2f61fa

Please sign in to comment.