From afa2ff245565fbaada3cf3a64b51915bb6092417 Mon Sep 17 00:00:00 2001
From: Jiajie Chen <c@jia.je>
Date: Tue, 12 Dec 2023 13:26:00 +0800
Subject: [PATCH] Add vaddi

---
 code/common.h               |  5 +++++
 code/vaddi_bu.cpp           | 12 ++++++++++++
 code/vaddi_bu.h             |  3 +++
 code/vaddi_du.cpp           | 12 ++++++++++++
 code/vaddi_du.h             |  3 +++
 code/vaddi_hu.cpp           | 12 ++++++++++++
 code/vaddi_hu.h             |  3 +++
 code/vaddi_wu.cpp           | 12 ++++++++++++
 code/vaddi_wu.h             |  3 +++
 docs/lsx_integer/vaddsub.md |  7 ++++++-
 main.py                     | 36 +++++++++++++++++++++++-------------
 11 files changed, 94 insertions(+), 14 deletions(-)
 create mode 100644 code/vaddi_bu.cpp
 create mode 100644 code/vaddi_bu.h
 create mode 100644 code/vaddi_du.cpp
 create mode 100644 code/vaddi_du.h
 create mode 100644 code/vaddi_hu.cpp
 create mode 100644 code/vaddi_hu.h
 create mode 100644 code/vaddi_wu.cpp
 create mode 100644 code/vaddi_wu.h

diff --git a/code/common.h b/code/common.h
index aa6ab7f2..d1d2078d 100644
--- a/code/common.h
+++ b/code/common.h
@@ -23,6 +23,9 @@ union v128 {
   __m128 m128;
   __m128d m128d;
   v4i32 __v4i32;
+  v8i16 __v8i16;
+  v16i8 __v16i8;
+
   u8 byte[16];
   u16 half[8];
   u32 word[4];
@@ -38,6 +41,8 @@ union v128 {
 
   operator __m128i() { return m128i; }
   operator v4i32() { return __v4i32; }
+  operator v8i16() { return __v8i16; }
+  operator v16i8() { return __v16i8; }
   bool operator==(const v128 &other) const {
     return memcmp(byte, other.byte, 16) == 0;
   }
diff --git a/code/vaddi_bu.cpp b/code/vaddi_bu.cpp
new file mode 100644
index 00000000..926ca752
--- /dev/null
+++ b/code/vaddi_bu.cpp
@@ -0,0 +1,12 @@
+#include "common.h"
+
+v128 vaddi_bu(v128 a, int imm) {
+  v128 dst;
+#include "vaddi_bu.h"
+  return dst;
+}
+
+void test() {
+  FUZZ1(vaddi_bu, 1);
+  FUZZ1(vaddi_bu, 31);
+}
diff --git a/code/vaddi_bu.h b/code/vaddi_bu.h
new file mode 100644
index 00000000..2e3c23ec
--- /dev/null
+++ b/code/vaddi_bu.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 16; i++) {
+  dst.byte[i] = a.byte[i] + imm;
+}
diff --git a/code/vaddi_du.cpp b/code/vaddi_du.cpp
new file mode 100644
index 00000000..d47ac105
--- /dev/null
+++ b/code/vaddi_du.cpp
@@ -0,0 +1,12 @@
+#include "common.h"
+
+v128 vaddi_du(v128 a, int imm) {
+  v128 dst;
+#include "vaddi_du.h"
+  return dst;
+}
+
+void test() {
+  FUZZ1(vaddi_du, 1);
+  FUZZ1(vaddi_du, 31);
+}
diff --git a/code/vaddi_du.h b/code/vaddi_du.h
new file mode 100644
index 00000000..433ad2b1
--- /dev/null
+++ b/code/vaddi_du.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.dword[i] = a.dword[i] + imm;
+}
diff --git a/code/vaddi_hu.cpp b/code/vaddi_hu.cpp
new file mode 100644
index 00000000..c8e22bd3
--- /dev/null
+++ b/code/vaddi_hu.cpp
@@ -0,0 +1,12 @@
+#include "common.h"
+
+v128 vaddi_hu(v128 a, int imm) {
+  v128 dst;
+#include "vaddi_hu.h"
+  return dst;
+}
+
+void test() {
+  FUZZ1(vaddi_hu, 1);
+  FUZZ1(vaddi_hu, 31);
+}
diff --git a/code/vaddi_hu.h b/code/vaddi_hu.h
new file mode 100644
index 00000000..8b549672
--- /dev/null
+++ b/code/vaddi_hu.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 8; i++) {
+  dst.half[i] = a.half[i] + imm;
+}
diff --git a/code/vaddi_wu.cpp b/code/vaddi_wu.cpp
new file mode 100644
index 00000000..a48aae12
--- /dev/null
+++ b/code/vaddi_wu.cpp
@@ -0,0 +1,12 @@
+#include "common.h"
+
+v128 vaddi_wu(v128 a, int imm) {
+  v128 dst;
+#include "vaddi_wu.h"
+  return dst;
+}
+
+void test() {
+  FUZZ1(vaddi_wu, 1);
+  FUZZ1(vaddi_wu, 31);
+}
diff --git a/code/vaddi_wu.h b/code/vaddi_wu.h
new file mode 100644
index 00000000..0f319589
--- /dev/null
+++ b/code/vaddi_wu.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 4; i++) {
+  dst.word[i] = a.word[i] + imm;
+}
diff --git a/docs/lsx_integer/vaddsub.md b/docs/lsx_integer/vaddsub.md
index ad45efb1..f91d42c7 100644
--- a/docs/lsx_integer/vaddsub.md
+++ b/docs/lsx_integer/vaddsub.md
@@ -18,4 +18,9 @@
 {{ vadda('b') }}
 {{ vadda('h') }}
 {{ vadda('w') }}
-{{ vadda('d') }}
\ No newline at end of file
+{{ vadda('d') }}
+
+{{ vaddi('bu') }}
+{{ vaddi('hu') }}
+{{ vaddi('wu') }}
+{{ vaddi('du') }}
\ No newline at end of file
diff --git a/main.py b/main.py
index 4f7b38e5..b7e6338f 100644
--- a/main.py
+++ b/main.py
@@ -61,6 +61,16 @@ def instruction(intrinsic, instr, desc):
 
 """
 
+    @env.macro
+    def vabsd(name):
+        width = widths[name]
+        signedness = signednesses[name]
+        return instruction(
+            intrinsic=f"__m128i __lsx_vabsd_{name} (__m128i a, __m128i b)",
+            instr=f"vabsd.{name} vr, vr, vr",
+            desc=f"Compute absolute difference of {signedness} {width}-bit elements in `a` and `b`, save the result in `dst`.",
+        )
+
     @env.macro
     def vadd(name):
         width = widths[name]
@@ -71,13 +81,21 @@ def vadd(name):
         )
 
     @env.macro
-    def vabsd(name):
+    def vadda(name):
         width = widths[name]
-        signedness = signednesses[name]
         return instruction(
-            intrinsic=f"__m128i __lsx_vabsd_{name} (__m128i a, __m128i b)",
-            instr=f"vabsd.{name} vr, vr, vr",
-            desc=f"Compute absolute difference of {signedness} {width}-bit elements in `a` and `b`, save the result in `dst`.",
+            intrinsic=f"__m128i __lsx_vadda_{name} (__m128i a, __m128i b)",
+            instr=f"vadda.{name} vr, vr, vr",
+            desc=f"Add absolute of {width}-bit elements in `a` and `b`, save the result in `dst`.",
+        )
+
+    @env.macro
+    def vaddi(name):
+        width = widths[name]
+        return instruction(
+            intrinsic=f"__m128i __lsx_vaddi_{name} (__m128i a, imm0_31 imm)",
+            instr=f"vaddi.{name} vr, vr, imm",
+            desc=f"Add {width}-bit elements in `a` and `imm`, save the result in `dst`.",
         )
 
     @env.macro
@@ -100,11 +118,3 @@ def vldrepl(name):
             desc=f"Read {width}-bit data from memory address `addr + (offset << {shift})`, replicate the data to all vector lanes and save into `dst`.",
         )
 
-    @env.macro
-    def vadda(name):
-        width = widths[name]
-        return instruction(
-            intrinsic=f"__m128i __lsx_vadda_{name} (__m128i a, __m128i b)",
-            instr=f"vadda.{name} vr, vr, vr",
-            desc=f"Add absolute of {width}-bit elements in `a` and `b`, save the result in `dst`.",
-        )