From 8fa69c1cfa0f0b7e7c2f31175c438887cbd54c95 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Wed, 13 Dec 2023 15:41:31 +0800 Subject: [PATCH] Add vfmin/vfmina/vfmax/vfmaxa --- README.md | 8 ------- code/gen_impl.py | 23 ++++++++++++++++++++ code/vfmax_d.h | 3 +++ code/vfmax_s.h | 3 +++ code/vfmaxa_d.h | 3 +++ code/vfmaxa_s.h | 3 +++ code/vfmin_d.h | 3 +++ code/vfmin_s.h | 3 +++ code/vfmina_d.h | 3 +++ code/vfmina_s.h | 3 +++ docs/lsx/float_computation.md | 12 +++++++++++ main.py | 40 +++++++++++++++++++++++++++++++++++ 12 files changed, 99 insertions(+), 8 deletions(-) create mode 100644 code/vfmax_d.h create mode 100644 code/vfmax_s.h create mode 100644 code/vfmaxa_d.h create mode 100644 code/vfmaxa_s.h create mode 100644 code/vfmin_d.h create mode 100644 code/vfmin_s.h create mode 100644 code/vfmina_d.h create mode 100644 code/vfmina_s.h diff --git a/README.md b/README.md index 2a5aa87a..a23e71df 100644 --- a/README.md +++ b/README.md @@ -8,14 +8,6 @@ TODO List: ### vfrstp.b/h -### vfmax.s/d - -### vfmin.s/d - -### vfmaxa.s/d - -### vfmina.s/d - ### vfcvt.h.s ### vffint.s.l diff --git a/code/gen_impl.py b/code/gen_impl.py index e4c766ad..9d05528f 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -772,4 +772,27 @@ ) print(f"}}", file=f) + for name in ["max", "min"]: + with open(f"vf{name}_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = {op}(a.{m}[i], b.{m}[i]);", + file=f, + ) + print(f"}}", file=f) + with open(f"vfmaxa_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = (abs(a.{m}[i]) > abs(b.{m}[i])) ? a.{m}[i] : b.{m}[i];", + file=f, + ) + print(f"}}", file=f) + with open(f"vfmina_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = (abs(a.{m}[i]) < abs(b.{m}[i])) ? a.{m}[i] : b.{m}[i];", + file=f, + ) + print(f"}}", file=f) + os.system("clang-format -i *.cpp *.h") diff --git a/code/vfmax_d.h b/code/vfmax_d.h new file mode 100644 index 00000000..fd54ba52 --- /dev/null +++ b/code/vfmax_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp64[i] = +(a.fp64[i], b.fp64[i]); +} diff --git a/code/vfmax_s.h b/code/vfmax_s.h new file mode 100644 index 00000000..85a2b177 --- /dev/null +++ b/code/vfmax_s.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp32[i] = +(a.fp32[i], b.fp32[i]); +} diff --git a/code/vfmaxa_d.h b/code/vfmaxa_d.h new file mode 100644 index 00000000..4123dae0 --- /dev/null +++ b/code/vfmaxa_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp64[i] = (abs(a.fp64[i]) > abs(b.fp64[i])) ? a.fp64[i] : b.fp64[i]; +} diff --git a/code/vfmaxa_s.h b/code/vfmaxa_s.h new file mode 100644 index 00000000..90c964e9 --- /dev/null +++ b/code/vfmaxa_s.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp32[i] = (abs(a.fp32[i]) > abs(b.fp32[i])) ? a.fp32[i] : b.fp32[i]; +} diff --git a/code/vfmin_d.h b/code/vfmin_d.h new file mode 100644 index 00000000..fd54ba52 --- /dev/null +++ b/code/vfmin_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp64[i] = +(a.fp64[i], b.fp64[i]); +} diff --git a/code/vfmin_s.h b/code/vfmin_s.h new file mode 100644 index 00000000..85a2b177 --- /dev/null +++ b/code/vfmin_s.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp32[i] = +(a.fp32[i], b.fp32[i]); +} diff --git a/code/vfmina_d.h b/code/vfmina_d.h new file mode 100644 index 00000000..938a2bc8 --- /dev/null +++ b/code/vfmina_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp64[i] = (abs(a.fp64[i]) < abs(b.fp64[i])) ? a.fp64[i] : b.fp64[i]; +} diff --git a/code/vfmina_s.h b/code/vfmina_s.h new file mode 100644 index 00000000..da77ed57 --- /dev/null +++ b/code/vfmina_s.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp32[i] = (abs(a.fp32[i]) < abs(b.fp32[i])) ? a.fp32[i] : b.fp32[i]; +} diff --git a/docs/lsx/float_computation.md b/docs/lsx/float_computation.md index b8351200..de93aaa1 100644 --- a/docs/lsx/float_computation.md +++ b/docs/lsx/float_computation.md @@ -6,6 +6,18 @@ {{ vfdiv('s') }} {{ vfdiv('d') }} +{{ vfmax('s') }} +{{ vfmax('d') }} + +{{ vfmaxa('s') }} +{{ vfmaxa('d') }} + +{{ vfmin('s') }} +{{ vfmin('d') }} + +{{ vfmina('s') }} +{{ vfmina('d') }} + {{ vfmul('s') }} {{ vfmul('d') }} diff --git a/main.py b/main.py index a92f9242..10df3b03 100644 --- a/main.py +++ b/main.py @@ -398,6 +398,46 @@ def vfsub(name): desc=f"Subtract {precision} precision floating point elements in `a` by elements in `b`.", ) + @env.macro + def vfmax(name): + precision = precisions[name] + fp_type = fp_types[name] + return instruction( + intrinsic=f"{fp_type} __lsx_vfmax_{name} ({fp_type} a, {fp_type} b)", + instr=f"vfmax.{name} vr, vr, vr", + desc=f"Compute maximum of {precision} precision floating point elements in `a` and `b`.", + ) + + @env.macro + def vfmaxa(name): + precision = precisions[name] + fp_type = fp_types[name] + return instruction( + intrinsic=f"{fp_type} __lsx_vfmaxa_{name} ({fp_type} a, {fp_type} b)", + instr=f"vfmaxa.{name} vr, vr, vr", + desc=f"Compute maximum of {precision} precision floating point elements in `a` and `b` by magnitude.", + ) + + @env.macro + def vfmin(name): + precision = precisions[name] + fp_type = fp_types[name] + return instruction( + intrinsic=f"{fp_type} __lsx_vfmin_{name} ({fp_type} a, {fp_type} b)", + instr=f"vfmax.{name} vr, vr, vr", + desc=f"Compute minimum of {precision} precision floating point elements in `a` and `b`.", + ) + + @env.macro + def vfmina(name): + precision = precisions[name] + fp_type = fp_types[name] + return instruction( + intrinsic=f"{fp_type} __lsx_vfmina_{name} ({fp_type} a, {fp_type} b)", + instr=f"vfmina.{name} vr, vr, vr", + desc=f"Compute minimum of {precision} precision floating point elements in `a` and `b` by magnitude.", + ) + @env.macro def vhaddw(name, name2): width = widths[name[0]]