Skip to content

Commit

Permalink
Add vmax/vmaxi/vmin/vmini
Browse files Browse the repository at this point in the history
  • Loading branch information
jiegec committed Dec 12, 2023
1 parent c56c56b commit 940e7ef
Show file tree
Hide file tree
Showing 70 changed files with 544 additions and 24 deletions.
24 changes: 0 additions & 24 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,22 +54,6 @@ Vector Saturated Add Unsigned

Vector Saturated Subtract Unsigned

### vmax.b/h/w/d

Vector Maximum

### vmin.b/h/w/d

Vector Minimum

### vmax.bu/hu/wu/du

Vector Maximum Unsigned

### vmin.bu/hu/wu/du

Vector Minimum Unsigned

### vmul.b/h/w/d

Vector Multiplication
Expand Down Expand Up @@ -202,14 +186,6 @@ Vector Multiplication High

### vsubi.bu/hu/wu/du

### vmaxi.b/h/w/d

### vmini.b/h/w/d

### vmaxi.bu/hu/wu/du

### vmini.bu/hu/wu/du

### vfrstpi.b/h

### vclo.b/h/w/d
Expand Down
4 changes: 4 additions & 0 deletions code/common.h
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <algorithm>
#include <assert.h>
#include <lsxintrin.h>
#include <stdint.h>
Expand Down Expand Up @@ -38,6 +39,9 @@ template <typename T> u8 clz(T num) {
return sizeof(T) * 8;
}

using std::max;
using std::min;

union v128 {
__m128i m128i;
__m128 m128;
Expand Down
28 changes: 28 additions & 0 deletions code/gen_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,34 @@
file=f,
)
print(f"}}", file=f)
with open(f"vmax_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = max(({sign}{w})a.{m}[i], ({sign}{w})b.{m}[i]);",
file=f,
)
print(f"}}", file=f)
with open(f"vmaxi_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = max(({sign}{w})a.{m}[i], ({sign}{w})imm);",
file=f,
)
print(f"}}", file=f)
with open(f"vmin_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = min(({sign}{w})a.{m}[i], ({sign}{w})b.{m}[i]);",
file=f,
)
print(f"}}", file=f)
with open(f"vmini_{width}.h", "w") as f:
print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
print(
f" dst.{m}[i] = min(({sign}{w})a.{m}[i], ({sign}{w})imm);",
file=f,
)
print(f"}}", file=f)

for width in ["b", "bu", "h", "hu", "w", "wu", "d", "du"]:
double_width = double_widths[width]
Expand Down
4 changes: 4 additions & 0 deletions code/gen_tb.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,10 @@
"vilvh": (widths_signed, "v128 a, v128 b"),
"vilvl": (widths_signed, "v128 a, v128 b"),
"vinsgr2vr": (widths_signed, "v128 a, long int b, int imm", ["0, 0", "1234, 1", "5678, 1"]),
"vmax": (widths_all, "v128 a, v128 b"),
"vmaxi": (widths_all, "v128 a, int imm", [0, 3, 15]),
"vmin": (widths_all, "v128 a, v128 b"),
"vmini": (widths_all, "v128 a, int imm", [0, 3, 15]),
}

for name in tb:
Expand Down
9 changes: 9 additions & 0 deletions code/vmax_b.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmax_b(v128 a, v128 b) {
v128 dst;
#include "vmax_b.h"
return dst;
}

void test() { FUZZ2(vmax_b); }
3 changes: 3 additions & 0 deletions code/vmax_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] = max((s8)a.byte[i], (s8)b.byte[i]);
}
9 changes: 9 additions & 0 deletions code/vmax_bu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmax_bu(v128 a, v128 b) {
v128 dst;
#include "vmax_bu.h"
return dst;
}

void test() { FUZZ2(vmax_bu); }
3 changes: 3 additions & 0 deletions code/vmax_bu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] = max((u8)a.byte[i], (u8)b.byte[i]);
}
9 changes: 9 additions & 0 deletions code/vmax_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmax_d(v128 a, v128 b) {
v128 dst;
#include "vmax_d.h"
return dst;
}

void test() { FUZZ2(vmax_d); }
3 changes: 3 additions & 0 deletions code/vmax_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = max((s64)a.dword[i], (s64)b.dword[i]);
}
9 changes: 9 additions & 0 deletions code/vmax_du.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmax_du(v128 a, v128 b) {
v128 dst;
#include "vmax_du.h"
return dst;
}

void test() { FUZZ2(vmax_du); }
3 changes: 3 additions & 0 deletions code/vmax_du.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = max((u64)a.dword[i], (u64)b.dword[i]);
}
9 changes: 9 additions & 0 deletions code/vmax_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmax_h(v128 a, v128 b) {
v128 dst;
#include "vmax_h.h"
return dst;
}

void test() { FUZZ2(vmax_h); }
3 changes: 3 additions & 0 deletions code/vmax_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = max((s16)a.half[i], (s16)b.half[i]);
}
9 changes: 9 additions & 0 deletions code/vmax_hu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmax_hu(v128 a, v128 b) {
v128 dst;
#include "vmax_hu.h"
return dst;
}

void test() { FUZZ2(vmax_hu); }
3 changes: 3 additions & 0 deletions code/vmax_hu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = max((u16)a.half[i], (u16)b.half[i]);
}
9 changes: 9 additions & 0 deletions code/vmax_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmax_w(v128 a, v128 b) {
v128 dst;
#include "vmax_w.h"
return dst;
}

void test() { FUZZ2(vmax_w); }
3 changes: 3 additions & 0 deletions code/vmax_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = max((s32)a.word[i], (s32)b.word[i]);
}
9 changes: 9 additions & 0 deletions code/vmax_wu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmax_wu(v128 a, v128 b) {
v128 dst;
#include "vmax_wu.h"
return dst;
}

void test() { FUZZ2(vmax_wu); }
3 changes: 3 additions & 0 deletions code/vmax_wu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = max((u32)a.word[i], (u32)b.word[i]);
}
13 changes: 13 additions & 0 deletions code/vmaxi_b.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "common.h"

v128 vmaxi_b(v128 a, int imm) {
v128 dst;
#include "vmaxi_b.h"
return dst;
}

void test() {
FUZZ1(vmaxi_b, 0);
FUZZ1(vmaxi_b, 3);
FUZZ1(vmaxi_b, 15);
}
3 changes: 3 additions & 0 deletions code/vmaxi_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] = max((s8)a.byte[i], (s8)imm);
}
13 changes: 13 additions & 0 deletions code/vmaxi_bu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "common.h"

v128 vmaxi_bu(v128 a, int imm) {
v128 dst;
#include "vmaxi_bu.h"
return dst;
}

void test() {
FUZZ1(vmaxi_bu, 0);
FUZZ1(vmaxi_bu, 3);
FUZZ1(vmaxi_bu, 15);
}
3 changes: 3 additions & 0 deletions code/vmaxi_bu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] = max((u8)a.byte[i], (u8)imm);
}
13 changes: 13 additions & 0 deletions code/vmaxi_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "common.h"

v128 vmaxi_d(v128 a, int imm) {
v128 dst;
#include "vmaxi_d.h"
return dst;
}

void test() {
FUZZ1(vmaxi_d, 0);
FUZZ1(vmaxi_d, 3);
FUZZ1(vmaxi_d, 15);
}
3 changes: 3 additions & 0 deletions code/vmaxi_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = max((s64)a.dword[i], (s64)imm);
}
13 changes: 13 additions & 0 deletions code/vmaxi_du.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "common.h"

v128 vmaxi_du(v128 a, int imm) {
v128 dst;
#include "vmaxi_du.h"
return dst;
}

void test() {
FUZZ1(vmaxi_du, 0);
FUZZ1(vmaxi_du, 3);
FUZZ1(vmaxi_du, 15);
}
3 changes: 3 additions & 0 deletions code/vmaxi_du.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = max((u64)a.dword[i], (u64)imm);
}
13 changes: 13 additions & 0 deletions code/vmaxi_h.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "common.h"

v128 vmaxi_h(v128 a, int imm) {
v128 dst;
#include "vmaxi_h.h"
return dst;
}

void test() {
FUZZ1(vmaxi_h, 0);
FUZZ1(vmaxi_h, 3);
FUZZ1(vmaxi_h, 15);
}
3 changes: 3 additions & 0 deletions code/vmaxi_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = max((s16)a.half[i], (s16)imm);
}
13 changes: 13 additions & 0 deletions code/vmaxi_hu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "common.h"

v128 vmaxi_hu(v128 a, int imm) {
v128 dst;
#include "vmaxi_hu.h"
return dst;
}

void test() {
FUZZ1(vmaxi_hu, 0);
FUZZ1(vmaxi_hu, 3);
FUZZ1(vmaxi_hu, 15);
}
3 changes: 3 additions & 0 deletions code/vmaxi_hu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = max((u16)a.half[i], (u16)imm);
}
13 changes: 13 additions & 0 deletions code/vmaxi_w.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "common.h"

v128 vmaxi_w(v128 a, int imm) {
v128 dst;
#include "vmaxi_w.h"
return dst;
}

void test() {
FUZZ1(vmaxi_w, 0);
FUZZ1(vmaxi_w, 3);
FUZZ1(vmaxi_w, 15);
}
3 changes: 3 additions & 0 deletions code/vmaxi_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = max((s32)a.word[i], (s32)imm);
}
13 changes: 13 additions & 0 deletions code/vmaxi_wu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#include "common.h"

v128 vmaxi_wu(v128 a, int imm) {
v128 dst;
#include "vmaxi_wu.h"
return dst;
}

void test() {
FUZZ1(vmaxi_wu, 0);
FUZZ1(vmaxi_wu, 3);
FUZZ1(vmaxi_wu, 15);
}
3 changes: 3 additions & 0 deletions code/vmaxi_wu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = max((u32)a.word[i], (u32)imm);
}
9 changes: 9 additions & 0 deletions code/vmin_b.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmin_b(v128 a, v128 b) {
v128 dst;
#include "vmin_b.h"
return dst;
}

void test() { FUZZ2(vmin_b); }
3 changes: 3 additions & 0 deletions code/vmin_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] = min((s8)a.byte[i], (s8)b.byte[i]);
}
9 changes: 9 additions & 0 deletions code/vmin_bu.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmin_bu(v128 a, v128 b) {
v128 dst;
#include "vmin_bu.h"
return dst;
}

void test() { FUZZ2(vmin_bu); }
3 changes: 3 additions & 0 deletions code/vmin_bu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.byte[i] = min((u8)a.byte[i], (u8)b.byte[i]);
}
9 changes: 9 additions & 0 deletions code/vmin_d.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#include "common.h"

v128 vmin_d(v128 a, v128 b) {
v128 dst;
#include "vmin_d.h"
return dst;
}

void test() { FUZZ2(vmin_d); }
3 changes: 3 additions & 0 deletions code/vmin_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = min((s64)a.dword[i], (s64)b.dword[i]);
}
Loading

0 comments on commit 940e7ef

Please sign in to comment.