Skip to content

Commit

Permalink
Add vhaddw/vhsubw
Browse files Browse the repository at this point in the history
  • Loading branch information
jiegec committed Dec 12, 2023
1 parent b6d8145 commit 0a74f65
Show file tree
Hide file tree
Showing 37 changed files with 250 additions and 17 deletions.
16 changes: 0 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,22 +54,6 @@ Vector Saturated Add Unsigned

Vector Saturated Subtract Unsigned

### vhaddw.h.b/w.h/d.w/q.d

Vector Half Add

### vhsubw.h.b/w.h/d.w/q.d

Vector Half Add

### vhaddw.hu.bu/wu.hu/du.wu/qu.du

Vector Half Add Unsigned

### vhsubw.hu.bu/wu.hu/du.wu/qu.du

Vector Half Sub Unsigned

### vmax.b/h/w/d

Vector Maximum
Expand Down
15 changes: 15 additions & 0 deletions code/gen_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
file=f,
)
print(f"}}", file=f)

double_width = double_widths_same_signedness[width]
double_w = widths[double_width]
double_m = members[double_width]
Expand All @@ -91,6 +92,20 @@
file=f,
)
print(f"}}", file=f)
with open(f"vhaddw_{double_width}_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // double_w};i++) {{", file=f)
print(
f" dst.{double_m}[i] = ({sign}{double_w})({sign}{w})a.{m}[2 * i + 1] + ({sign}{double_w})({sign}{w})b.{m}[2 * i];",
file=f,
)
print(f"}}", file=f)
with open(f"vhsubw_{double_width}_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // double_w};i++) {{", file=f)
print(
f" dst.{double_m}[i] = ({sign}{double_w})({sign}{w})a.{m}[2 * i + 1] - ({sign}{double_w})({sign}{w})b.{m}[2 * i];",
file=f,
)
print(f"}}", file=f)

if width == "d" or width == "du":
with open(f"vextl_{double_width}_{width}.h", "w") as f:
Expand Down
2 changes: 2 additions & 0 deletions code/gen_tb.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
"vexth": (widths_vexth, "v128 a"),
"vextl": (["q_d", "qu_du"], "v128 a"),
"vextrins": (widths_signed, "v128 a, v128 b, int imm", [0, 3, 7, 15, 16, 32, 64, 128, 255]),
"vhaddw": (widths_vexth, "v128 a, v128 b"),
"vhsubw": (widths_vexth, "v128 a, v128 b"),
}

for name in tb:
Expand Down
9 changes: 9 additions & 0 deletions code/vhaddw_d_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhaddw_d_w(v128 a, v128 b) {
v128 dst;
#include "vhaddw_d_w.h"
return dst;
}

void test() { FUZZ2(vhaddw_d_w); }
3 changes: 3 additions & 0 deletions code/vhaddw_d_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s64)(s32)a.word[2 * i + 1] + (s64)(s32)b.word[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhaddw_du_wu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhaddw_du_wu(v128 a, v128 b) {
v128 dst;
#include "vhaddw_du_wu.h"
return dst;
}

void test() { FUZZ2(vhaddw_du_wu); }
3 changes: 3 additions & 0 deletions code/vhaddw_du_wu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (u64)(u32)a.word[2 * i + 1] + (u64)(u32)b.word[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhaddw_h_b.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhaddw_h_b(v128 a, v128 b) {
v128 dst;
#include "vhaddw_h_b.h"
return dst;
}

void test() { FUZZ2(vhaddw_h_b); }
3 changes: 3 additions & 0 deletions code/vhaddw_h_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = (s16)(s8)a.byte[2 * i + 1] + (s16)(s8)b.byte[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhaddw_hu_bu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhaddw_hu_bu(v128 a, v128 b) {
v128 dst;
#include "vhaddw_hu_bu.h"
return dst;
}

void test() { FUZZ2(vhaddw_hu_bu); }
3 changes: 3 additions & 0 deletions code/vhaddw_hu_bu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = (u16)(u8)a.byte[2 * i + 1] + (u16)(u8)b.byte[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhaddw_q_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhaddw_q_d(v128 a, v128 b) {
v128 dst;
#include "vhaddw_q_d.h"
return dst;
}

void test() { FUZZ2(vhaddw_q_d); }
3 changes: 3 additions & 0 deletions code/vhaddw_q_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 1; i++) {
dst.qword[i] = (s128)(s64)a.dword[2 * i + 1] + (s128)(s64)b.dword[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhaddw_qu_du.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhaddw_qu_du(v128 a, v128 b) {
v128 dst;
#include "vhaddw_qu_du.h"
return dst;
}

void test() { FUZZ2(vhaddw_qu_du); }
3 changes: 3 additions & 0 deletions code/vhaddw_qu_du.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 1; i++) {
dst.qword[i] = (u128)(u64)a.dword[2 * i + 1] + (u128)(u64)b.dword[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhaddw_w_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhaddw_w_h(v128 a, v128 b) {
v128 dst;
#include "vhaddw_w_h.h"
return dst;
}

void test() { FUZZ2(vhaddw_w_h); }
3 changes: 3 additions & 0 deletions code/vhaddw_w_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = (s32)(s16)a.half[2 * i + 1] + (s32)(s16)b.half[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhaddw_wu_hu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhaddw_wu_hu(v128 a, v128 b) {
v128 dst;
#include "vhaddw_wu_hu.h"
return dst;
}

void test() { FUZZ2(vhaddw_wu_hu); }
3 changes: 3 additions & 0 deletions code/vhaddw_wu_hu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = (u32)(u16)a.half[2 * i + 1] + (u32)(u16)b.half[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhsubw_d_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhsubw_d_w(v128 a, v128 b) {
v128 dst;
#include "vhsubw_d_w.h"
return dst;
}

void test() { FUZZ2(vhsubw_d_w); }
3 changes: 3 additions & 0 deletions code/vhsubw_d_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s64)(s32)a.word[2 * i + 1] - (s64)(s32)b.word[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhsubw_du_wu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhsubw_du_wu(v128 a, v128 b) {
v128 dst;
#include "vhsubw_du_wu.h"
return dst;
}

void test() { FUZZ2(vhsubw_du_wu); }
3 changes: 3 additions & 0 deletions code/vhsubw_du_wu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (u64)(u32)a.word[2 * i + 1] - (u64)(u32)b.word[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhsubw_h_b.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhsubw_h_b(v128 a, v128 b) {
v128 dst;
#include "vhsubw_h_b.h"
return dst;
}

void test() { FUZZ2(vhsubw_h_b); }
3 changes: 3 additions & 0 deletions code/vhsubw_h_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = (s16)(s8)a.byte[2 * i + 1] - (s16)(s8)b.byte[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhsubw_hu_bu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhsubw_hu_bu(v128 a, v128 b) {
v128 dst;
#include "vhsubw_hu_bu.h"
return dst;
}

void test() { FUZZ2(vhsubw_hu_bu); }
3 changes: 3 additions & 0 deletions code/vhsubw_hu_bu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = (u16)(u8)a.byte[2 * i + 1] - (u16)(u8)b.byte[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhsubw_q_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhsubw_q_d(v128 a, v128 b) {
v128 dst;
#include "vhsubw_q_d.h"
return dst;
}

void test() { FUZZ2(vhsubw_q_d); }
3 changes: 3 additions & 0 deletions code/vhsubw_q_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 1; i++) {
dst.qword[i] = (s128)(s64)a.dword[2 * i + 1] - (s128)(s64)b.dword[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhsubw_qu_du.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhsubw_qu_du(v128 a, v128 b) {
v128 dst;
#include "vhsubw_qu_du.h"
return dst;
}

void test() { FUZZ2(vhsubw_qu_du); }
3 changes: 3 additions & 0 deletions code/vhsubw_qu_du.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 1; i++) {
dst.qword[i] = (u128)(u64)a.dword[2 * i + 1] - (u128)(u64)b.dword[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhsubw_w_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhsubw_w_h(v128 a, v128 b) {
v128 dst;
#include "vhsubw_w_h.h"
return dst;
}

void test() { FUZZ2(vhsubw_w_h); }
3 changes: 3 additions & 0 deletions code/vhsubw_w_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = (s32)(s16)a.half[2 * i + 1] - (s32)(s16)b.half[2 * i];
}
9 changes: 9 additions & 0 deletions code/vhsubw_wu_hu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vhsubw_wu_hu(v128 a, v128 b) {
v128 dst;
#include "vhsubw_wu_hu.h"
return dst;
}

void test() { FUZZ2(vhsubw_wu_hu); }
3 changes: 3 additions & 0 deletions code/vhsubw_wu_hu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = (u32)(u16)a.half[2 * i + 1] - (u32)(u16)b.half[2 * i];
}
20 changes: 19 additions & 1 deletion docs/lsx/computation.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,4 +76,22 @@
{{ vdiv('w') }}
{{ vdiv('wu') }}
{{ vdiv('d') }}
{{ vdiv('du') }}
{{ vdiv('du') }}

{{ vhaddw('h', 'b') }}
{{ vhaddw('hu', 'bu') }}
{{ vhaddw('w', 'h') }}
{{ vhaddw('wu', 'hu') }}
{{ vhaddw('d', 'w') }}
{{ vhaddw('du', 'wu') }}
{{ vhaddw('q', 'd') }}
{{ vhaddw('qu', 'du') }}

{{ vhsubw('h', 'b') }}
{{ vhsubw('hu', 'bu') }}
{{ vhsubw('w', 'h') }}
{{ vhsubw('wu', 'hu') }}
{{ vhsubw('d', 'w') }}
{{ vhsubw('du', 'wu') }}
{{ vhsubw('q', 'd') }}
{{ vhsubw('qu', 'du') }}
22 changes: 22 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,28 @@ def vfdiv(name):
desc=f"Divide {fp_type} precision floating point elements in `a` by elements in `b`.",
)

@env.macro
def vhaddw(name, name2):
width = widths[name[0]]
width2 = widths[name2[0]]
signedness = signednesses[name]
return instruction(
intrinsic=f"__m128i __lsx_vhaddw_{name}_{name2} (__m128i a, __m128i b)",
instr=f"vhaddw.{name}.{name2} vr, vr, vr",
desc=f"Add odd-positioned {signedness} {width2}-bit elements in `a` to even-positioned {signedness} {width2}-bit elements in 'b' to get {width}-bit result.",
)

@env.macro
def vhsubw(name, name2):
width = widths[name[0]]
width2 = widths[name2[0]]
signedness = signednesses[name]
return instruction(
intrinsic=f"__m128i __lsx_vhsubw_{name}_{name2} (__m128i a, __m128i b)",
instr=f"vhsubw.{name}.{name2} vr, vr, vr",
desc=f"Subtract odd-positioned {signedness} {width2}-bit elements in `a` by even-positioned {signedness} {width2}-bit elements in 'b' to get {width}-bit result.",
)

@env.macro
def vshuf_hwd(name):
width = widths[name]
Expand Down

0 comments on commit 0a74f65

Please sign in to comment.