diff --git a/README.md b/README.md index f0295e15..9e783c7e 100644 --- a/README.md +++ b/README.md @@ -98,6 +98,4 @@ TODO List: ### vftintrnel.l.s -### vftintrneh.l.s - -### vsat.b/h/w/d/bu/hu/wu/du \ No newline at end of file +### vftintrneh.l.s \ No newline at end of file diff --git a/code/gen_impl.py b/code/gen_impl.py index 3389fd79..eb0e2b15 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -331,6 +331,7 @@ ) print(f"}}", file=f) + for name, op in [("lt", "<"), ("le", "<=")]: with open(f"vs{name}_{width}.h", "w") as f: print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) @@ -416,6 +417,19 @@ file=f, ) print(f"}}", file=f) + with open(f"vsat_{width}.h", "w") as f: + if sign == "s": + min = "-(1 << imm)" + max = "(1 << imm) - 1" + else: + min = "0" + max = "(1 << (imm+1)) - 1" + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = clamp<{sign}{w}>(a.{m}[i], {min}, {max});", + file=f, + ) + print(f"}}", file=f) for width in ["b", "h", "w", "d"]: w = widths[width] diff --git a/code/gen_tb.py b/code/gen_tb.py index a4421f19..bf6109b0 100644 --- a/code/gen_tb.py +++ b/code/gen_tb.py @@ -97,6 +97,7 @@ "vreplvei": (widths_signed, "v128 a, int idx", [0, 1]), "vreplgr2vr": (widths_signed, "int val", [0, 1, 256]), "vsadd": (widths_all, "v128 a, v128 b"), + "vsat": (widths_all, "v128 a, int imm", [0, 7]), "vseq": (widths_signed, "v128 a, v128 b"), "vseqi": (widths_signed, "v128 a, int imm", [-16, 0, 15]), "vshuf4i": (["b", "h", "w"], "v128 a, int imm", [0, 13, 100, 128, 255]), diff --git a/code/vsat_b.cpp b/code/vsat_b.cpp new file mode 100644 index 00000000..eb8236a4 --- /dev/null +++ b/code/vsat_b.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsat_b(v128 a, int imm) { + v128 dst; +#include "vsat_b.h" + return dst; +} + +void test() { + FUZZ1(vsat_b, 0); + FUZZ1(vsat_b, 7); +} diff --git a/code/vsat_b.h b/code/vsat_b.h new file mode 100644 index 00000000..3be0eb28 --- /dev/null +++ b/code/vsat_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = clamp(a.byte[i], -(1 << imm), (1 << imm) - 1); +} diff --git a/code/vsat_bu.cpp b/code/vsat_bu.cpp new file mode 100644 index 00000000..9af8c9a1 --- /dev/null +++ b/code/vsat_bu.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsat_bu(v128 a, int imm) { + v128 dst; +#include "vsat_bu.h" + return dst; +} + +void test() { + FUZZ1(vsat_bu, 0); + FUZZ1(vsat_bu, 7); +} diff --git a/code/vsat_bu.h b/code/vsat_bu.h new file mode 100644 index 00000000..bf3bc538 --- /dev/null +++ b/code/vsat_bu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.byte[i] = clamp(a.byte[i], 0, (1 << (imm + 1)) - 1); +} diff --git a/code/vsat_d.cpp b/code/vsat_d.cpp new file mode 100644 index 00000000..42559699 --- /dev/null +++ b/code/vsat_d.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsat_d(v128 a, int imm) { + v128 dst; +#include "vsat_d.h" + return dst; +} + +void test() { + FUZZ1(vsat_d, 0); + FUZZ1(vsat_d, 7); +} diff --git a/code/vsat_d.h b/code/vsat_d.h new file mode 100644 index 00000000..97a08b57 --- /dev/null +++ b/code/vsat_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = clamp(a.dword[i], -(1 << imm), (1 << imm) - 1); +} diff --git a/code/vsat_du.cpp b/code/vsat_du.cpp new file mode 100644 index 00000000..4e24662d --- /dev/null +++ b/code/vsat_du.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsat_du(v128 a, int imm) { + v128 dst; +#include "vsat_du.h" + return dst; +} + +void test() { + FUZZ1(vsat_du, 0); + FUZZ1(vsat_du, 7); +} diff --git a/code/vsat_du.h b/code/vsat_du.h new file mode 100644 index 00000000..65315ad7 --- /dev/null +++ b/code/vsat_du.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.dword[i] = clamp(a.dword[i], 0, (1 << (imm + 1)) - 1); +} diff --git a/code/vsat_h.cpp b/code/vsat_h.cpp new file mode 100644 index 00000000..99297f8b --- /dev/null +++ b/code/vsat_h.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsat_h(v128 a, int imm) { + v128 dst; +#include "vsat_h.h" + return dst; +} + +void test() { + FUZZ1(vsat_h, 0); + FUZZ1(vsat_h, 7); +} diff --git a/code/vsat_h.h b/code/vsat_h.h new file mode 100644 index 00000000..85985e3c --- /dev/null +++ b/code/vsat_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = clamp(a.half[i], -(1 << imm), (1 << imm) - 1); +} diff --git a/code/vsat_hu.cpp b/code/vsat_hu.cpp new file mode 100644 index 00000000..cc955c95 --- /dev/null +++ b/code/vsat_hu.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsat_hu(v128 a, int imm) { + v128 dst; +#include "vsat_hu.h" + return dst; +} + +void test() { + FUZZ1(vsat_hu, 0); + FUZZ1(vsat_hu, 7); +} diff --git a/code/vsat_hu.h b/code/vsat_hu.h new file mode 100644 index 00000000..1af47881 --- /dev/null +++ b/code/vsat_hu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.half[i] = clamp(a.half[i], 0, (1 << (imm + 1)) - 1); +} diff --git a/code/vsat_w.cpp b/code/vsat_w.cpp new file mode 100644 index 00000000..44955e16 --- /dev/null +++ b/code/vsat_w.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsat_w(v128 a, int imm) { + v128 dst; +#include "vsat_w.h" + return dst; +} + +void test() { + FUZZ1(vsat_w, 0); + FUZZ1(vsat_w, 7); +} diff --git a/code/vsat_w.h b/code/vsat_w.h new file mode 100644 index 00000000..324379c0 --- /dev/null +++ b/code/vsat_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = clamp(a.word[i], -(1 << imm), (1 << imm) - 1); +} diff --git a/code/vsat_wu.cpp b/code/vsat_wu.cpp new file mode 100644 index 00000000..093a0c94 --- /dev/null +++ b/code/vsat_wu.cpp @@ -0,0 +1,12 @@ +#include "common.h" + +v128 vsat_wu(v128 a, int imm) { + v128 dst; +#include "vsat_wu.h" + return dst; +} + +void test() { + FUZZ1(vsat_wu, 0); + FUZZ1(vsat_wu, 7); +} diff --git a/code/vsat_wu.h b/code/vsat_wu.h new file mode 100644 index 00000000..e3e3fc3b --- /dev/null +++ b/code/vsat_wu.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.word[i] = clamp(a.word[i], 0, (1 << (imm + 1)) - 1); +} diff --git a/docs/lsx/misc.md b/docs/lsx/misc.md index f511e480..4097a1bd 100644 --- a/docs/lsx/misc.md +++ b/docs/lsx/misc.md @@ -73,6 +73,15 @@ {{ vreplvei('w') }} {{ vreplvei('d') }} +{{ vsat('b') }} +{{ vsat('bu') }} +{{ vsat('h') }} +{{ vsat('hu') }} +{{ vsat('w') }} +{{ vsat('wu') }} +{{ vsat('d') }} +{{ vsat('du') }} + {{ vsigncov('b') }} {{ vsigncov('h') }} {{ vsigncov('w') }} diff --git a/main.py b/main.py index c535deab..c3c10375 100644 --- a/main.py +++ b/main.py @@ -1175,4 +1175,14 @@ def vssrarni(name, name2): intrinsic=f"__m128i __lsx_vssrarni_{name}_{name2} (__m128i a, __m128i b, imm0_{width2-1} imm)", instr=f"vssrarni.{name}.{name2} vr, vr, imm", desc=f"Arithemtic right shift (with rounding) the signed {width2}-bit elements in `a` and `b` by `imm`, clamp to fit in {signedness} {width}-bit integer and store the result to `dst`.", + ) + + @env.macro + def vsat(name): + width = widths[name] + signedness = signednesses[name] + return instruction( + intrinsic=f"__m128i __lsx_vsat_{name} (__m128i a, imm0_{width - 1} imm)", + instr=f"vsat.{name} vr, vr, imm", + desc=f"Clamp {signedness} {width}-bit elements in `a` to range specified by `imm`.", ) \ No newline at end of file