From 8fa69c1cfa0f0b7e7c2f31175c438887cbd54c95 Mon Sep 17 00:00:00 2001
From: Jiajie Chen <c@jia.je>
Date: Wed, 13 Dec 2023 15:41:31 +0800
Subject: [PATCH] Add vfmin/vfmina/vfmax/vfmaxa

---
 README.md                     |  8 -------
 code/gen_impl.py              | 23 ++++++++++++++++++++
 code/vfmax_d.h                |  3 +++
 code/vfmax_s.h                |  3 +++
 code/vfmaxa_d.h               |  3 +++
 code/vfmaxa_s.h               |  3 +++
 code/vfmin_d.h                |  3 +++
 code/vfmin_s.h                |  3 +++
 code/vfmina_d.h               |  3 +++
 code/vfmina_s.h               |  3 +++
 docs/lsx/float_computation.md | 12 +++++++++++
 main.py                       | 40 +++++++++++++++++++++++++++++++++++
 12 files changed, 99 insertions(+), 8 deletions(-)
 create mode 100644 code/vfmax_d.h
 create mode 100644 code/vfmax_s.h
 create mode 100644 code/vfmaxa_d.h
 create mode 100644 code/vfmaxa_s.h
 create mode 100644 code/vfmin_d.h
 create mode 100644 code/vfmin_s.h
 create mode 100644 code/vfmina_d.h
 create mode 100644 code/vfmina_s.h

diff --git a/README.md b/README.md
index 2a5aa87a..a23e71df 100644
--- a/README.md
+++ b/README.md
@@ -8,14 +8,6 @@ TODO List:
 
 ### vfrstp.b/h
 
-### vfmax.s/d
-
-### vfmin.s/d
-
-### vfmaxa.s/d
-
-### vfmina.s/d
-
 ### vfcvt.h.s
 
 ### vffint.s.l
diff --git a/code/gen_impl.py b/code/gen_impl.py
index e4c766ad..9d05528f 100644
--- a/code/gen_impl.py
+++ b/code/gen_impl.py
@@ -772,4 +772,27 @@
             )
             print(f"}}", file=f)
 
+    for name in ["max", "min"]:
+        with open(f"vf{name}_{width}.h", "w") as f:
+            print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
+            print(
+                f"  dst.{m}[i] = {op}(a.{m}[i], b.{m}[i]);",
+                file=f,
+            )
+            print(f"}}", file=f)
+    with open(f"vfmaxa_{width}.h", "w") as f:
+        print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
+        print(
+            f"  dst.{m}[i] = (abs(a.{m}[i]) > abs(b.{m}[i])) ? a.{m}[i] : b.{m}[i];",
+            file=f,
+        )
+        print(f"}}", file=f)
+    with open(f"vfmina_{width}.h", "w") as f:
+        print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
+        print(
+            f"  dst.{m}[i] = (abs(a.{m}[i]) < abs(b.{m}[i])) ? a.{m}[i] : b.{m}[i];",
+            file=f,
+        )
+        print(f"}}", file=f)
+
 os.system("clang-format -i *.cpp *.h")
diff --git a/code/vfmax_d.h b/code/vfmax_d.h
new file mode 100644
index 00000000..fd54ba52
--- /dev/null
+++ b/code/vfmax_d.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp64[i] = +(a.fp64[i], b.fp64[i]);
+}
diff --git a/code/vfmax_s.h b/code/vfmax_s.h
new file mode 100644
index 00000000..85a2b177
--- /dev/null
+++ b/code/vfmax_s.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp32[i] = +(a.fp32[i], b.fp32[i]);
+}
diff --git a/code/vfmaxa_d.h b/code/vfmaxa_d.h
new file mode 100644
index 00000000..4123dae0
--- /dev/null
+++ b/code/vfmaxa_d.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp64[i] = (abs(a.fp64[i]) > abs(b.fp64[i])) ? a.fp64[i] : b.fp64[i];
+}
diff --git a/code/vfmaxa_s.h b/code/vfmaxa_s.h
new file mode 100644
index 00000000..90c964e9
--- /dev/null
+++ b/code/vfmaxa_s.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp32[i] = (abs(a.fp32[i]) > abs(b.fp32[i])) ? a.fp32[i] : b.fp32[i];
+}
diff --git a/code/vfmin_d.h b/code/vfmin_d.h
new file mode 100644
index 00000000..fd54ba52
--- /dev/null
+++ b/code/vfmin_d.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp64[i] = +(a.fp64[i], b.fp64[i]);
+}
diff --git a/code/vfmin_s.h b/code/vfmin_s.h
new file mode 100644
index 00000000..85a2b177
--- /dev/null
+++ b/code/vfmin_s.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp32[i] = +(a.fp32[i], b.fp32[i]);
+}
diff --git a/code/vfmina_d.h b/code/vfmina_d.h
new file mode 100644
index 00000000..938a2bc8
--- /dev/null
+++ b/code/vfmina_d.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp64[i] = (abs(a.fp64[i]) < abs(b.fp64[i])) ? a.fp64[i] : b.fp64[i];
+}
diff --git a/code/vfmina_s.h b/code/vfmina_s.h
new file mode 100644
index 00000000..da77ed57
--- /dev/null
+++ b/code/vfmina_s.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp32[i] = (abs(a.fp32[i]) < abs(b.fp32[i])) ? a.fp32[i] : b.fp32[i];
+}
diff --git a/docs/lsx/float_computation.md b/docs/lsx/float_computation.md
index b8351200..de93aaa1 100644
--- a/docs/lsx/float_computation.md
+++ b/docs/lsx/float_computation.md
@@ -6,6 +6,18 @@
 {{ vfdiv('s') }}
 {{ vfdiv('d') }}
 
+{{ vfmax('s') }}
+{{ vfmax('d') }}
+
+{{ vfmaxa('s') }}
+{{ vfmaxa('d') }}
+
+{{ vfmin('s') }}
+{{ vfmin('d') }}
+
+{{ vfmina('s') }}
+{{ vfmina('d') }}
+
 {{ vfmul('s') }}
 {{ vfmul('d') }}
 
diff --git a/main.py b/main.py
index a92f9242..10df3b03 100644
--- a/main.py
+++ b/main.py
@@ -398,6 +398,46 @@ def vfsub(name):
             desc=f"Subtract {precision} precision floating point elements in `a` by elements in `b`.",
         )
 
+    @env.macro
+    def vfmax(name):
+        precision = precisions[name]
+        fp_type = fp_types[name]
+        return instruction(
+            intrinsic=f"{fp_type} __lsx_vfmax_{name} ({fp_type} a, {fp_type} b)",
+            instr=f"vfmax.{name} vr, vr, vr",
+            desc=f"Compute maximum of {precision} precision floating point elements in `a` and `b`.",
+        )
+
+    @env.macro
+    def vfmaxa(name):
+        precision = precisions[name]
+        fp_type = fp_types[name]
+        return instruction(
+            intrinsic=f"{fp_type} __lsx_vfmaxa_{name} ({fp_type} a, {fp_type} b)",
+            instr=f"vfmaxa.{name} vr, vr, vr",
+            desc=f"Compute maximum of {precision} precision floating point elements in `a` and `b` by magnitude.",
+        )
+
+    @env.macro
+    def vfmin(name):
+        precision = precisions[name]
+        fp_type = fp_types[name]
+        return instruction(
+            intrinsic=f"{fp_type} __lsx_vfmin_{name} ({fp_type} a, {fp_type} b)",
+            instr=f"vfmax.{name} vr, vr, vr",
+            desc=f"Compute minimum of {precision} precision floating point elements in `a` and `b`.",
+        )
+
+    @env.macro
+    def vfmina(name):
+        precision = precisions[name]
+        fp_type = fp_types[name]
+        return instruction(
+            intrinsic=f"{fp_type} __lsx_vfmina_{name} ({fp_type} a, {fp_type} b)",
+            instr=f"vfmina.{name} vr, vr, vr",
+            desc=f"Compute minimum of {precision} precision floating point elements in `a` and `b` by magnitude.",
+        )
+
     @env.macro
     def vhaddw(name, name2):
         width = widths[name[0]]