From f72a956315985bbffd93ac480db78ea1dd78ea07 Mon Sep 17 00:00:00 2001
From: Jiajie Chen <c@jia.je>
Date: Wed, 13 Dec 2023 14:47:04 +0800
Subject: [PATCH] Add vssrani/vssrlni

---
 README.md             | 16 ----------------
 code/gen_impl.py      | 38 ++++++++++++++++++++++++++++++++++++++
 code/gen_tb.py        |  5 ++++-
 code/vssrani_b_h.cpp  | 13 +++++++++++++
 code/vssrani_b_h.h    |  9 +++++++++
 code/vssrani_bu_h.cpp | 13 +++++++++++++
 code/vssrani_bu_h.h   |  9 +++++++++
 code/vssrani_d_q.cpp  | 13 +++++++++++++
 code/vssrani_d_q.h    |  9 +++++++++
 code/vssrani_du_q.cpp | 13 +++++++++++++
 code/vssrani_du_q.h   |  9 +++++++++
 code/vssrani_h_w.cpp  | 13 +++++++++++++
 code/vssrani_h_w.h    |  9 +++++++++
 code/vssrani_hu_w.cpp | 13 +++++++++++++
 code/vssrani_hu_w.h   |  9 +++++++++
 code/vssrani_w_d.cpp  | 13 +++++++++++++
 code/vssrani_w_d.h    |  9 +++++++++
 code/vssrani_wu_d.cpp | 13 +++++++++++++
 code/vssrani_wu_d.h   |  9 +++++++++
 code/vssrlni_b_h.cpp  | 13 +++++++++++++
 code/vssrlni_b_h.h    |  9 +++++++++
 code/vssrlni_bu_h.cpp | 13 +++++++++++++
 code/vssrlni_bu_h.h   |  9 +++++++++
 code/vssrlni_d_q.cpp  | 13 +++++++++++++
 code/vssrlni_d_q.h    |  9 +++++++++
 code/vssrlni_du_q.cpp | 13 +++++++++++++
 code/vssrlni_du_q.h   |  9 +++++++++
 code/vssrlni_h_w.cpp  | 13 +++++++++++++
 code/vssrlni_h_w.h    |  9 +++++++++
 code/vssrlni_hu_w.cpp | 13 +++++++++++++
 code/vssrlni_hu_w.h   |  9 +++++++++
 code/vssrlni_w_d.cpp  | 13 +++++++++++++
 code/vssrlni_w_d.h    |  9 +++++++++
 code/vssrlni_wu_d.cpp | 13 +++++++++++++
 code/vssrlni_wu_d.h   |  9 +++++++++
 docs/lsx/shift.md     | 18 ++++++++++++++++++
 main.py               | 26 ++++++++++++++++++++++++--
 37 files changed, 436 insertions(+), 19 deletions(-)
 create mode 100644 code/vssrani_b_h.cpp
 create mode 100644 code/vssrani_b_h.h
 create mode 100644 code/vssrani_bu_h.cpp
 create mode 100644 code/vssrani_bu_h.h
 create mode 100644 code/vssrani_d_q.cpp
 create mode 100644 code/vssrani_d_q.h
 create mode 100644 code/vssrani_du_q.cpp
 create mode 100644 code/vssrani_du_q.h
 create mode 100644 code/vssrani_h_w.cpp
 create mode 100644 code/vssrani_h_w.h
 create mode 100644 code/vssrani_hu_w.cpp
 create mode 100644 code/vssrani_hu_w.h
 create mode 100644 code/vssrani_w_d.cpp
 create mode 100644 code/vssrani_w_d.h
 create mode 100644 code/vssrani_wu_d.cpp
 create mode 100644 code/vssrani_wu_d.h
 create mode 100644 code/vssrlni_b_h.cpp
 create mode 100644 code/vssrlni_b_h.h
 create mode 100644 code/vssrlni_bu_h.cpp
 create mode 100644 code/vssrlni_bu_h.h
 create mode 100644 code/vssrlni_d_q.cpp
 create mode 100644 code/vssrlni_d_q.h
 create mode 100644 code/vssrlni_du_q.cpp
 create mode 100644 code/vssrlni_du_q.h
 create mode 100644 code/vssrlni_h_w.cpp
 create mode 100644 code/vssrlni_h_w.h
 create mode 100644 code/vssrlni_hu_w.cpp
 create mode 100644 code/vssrlni_hu_w.h
 create mode 100644 code/vssrlni_w_d.cpp
 create mode 100644 code/vssrlni_w_d.h
 create mode 100644 code/vssrlni_wu_d.cpp
 create mode 100644 code/vssrlni_wu_d.h

diff --git a/README.md b/README.md
index 2ec388c1..8d834509 100644
--- a/README.md
+++ b/README.md
@@ -6,18 +6,6 @@ Arranged from QEMU implementation and [GCC Intrinsics](https://gcc.gnu.org/onlin
 
 TODO List:
 
-### vssrln.b.h/h.w/w.d
-
-### vssran.b.h/h.w/w.d
-
-### vssrlrn.b.h/h.w/w.d
-
-### vssrarn.b.h/h.w/w.d
-
-### vssrln.bu.h/hu.w/wu.d
-
-### vssran.bu.h/hu.w/wu.d
-
 ### vssrlrn.bu.h/hu.w/wu.d
 
 ### vssrarn.bu.h/hu.w/wu.d
@@ -118,12 +106,8 @@ TODO List:
 
 ### vsat.b/h/w/d/bu/hu/wu/du
 
-### vssrlni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q
-
 ### vssrlrni.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q
 
-### vssrani.b.h/h.w/w.d/d.q/bu.h/hu.w/wu.d/du.q
-
 ### vssrarni.b.h/h.w/w.d/d.q
 
 ### vssrarni.bu.h/hu.w/wu.d/du.q
diff --git a/code/gen_impl.py b/code/gen_impl.py
index 3093bfd0..ac4535c3 100644
--- a/code/gen_impl.py
+++ b/code/gen_impl.py
@@ -204,6 +204,44 @@
                 print(f"}}", file=f)
                 print(f"}}", file=f)
 
+    for name, shift_sign in [("srl", "u"), ("sra", "s")]:
+        double_width_signed = double_width[:1]
+        with open(f"vs{name}ni_{width}_{double_width_signed}.h", "w") as f:
+            if shift_sign == "u":
+                min = 0
+                if sign == "u":
+                    max = (2**w) - 1
+                else:
+                    max = (2**(w - 1)) - 1
+            else:
+                if sign == "u":
+                    min = 0
+                    max = (2**w) - 1
+                else:
+                    min = -(2 ** (w - 1))
+                    max = (2 ** (w - 1)) - 1
+            print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
+            print(f"if (i < {64 // w}) {{", file=f)
+            print(
+                f"  {shift_sign}{double_w} temp = ({shift_sign}{double_w})b.{double_m}[i] >> imm;",
+                file=f,
+            )
+            print(
+                f"  dst.{m}[i] = clamp<{shift_sign}{double_w}>(temp, {min}, {max});",
+                file=f,
+            )
+            print(f"}} else {{", file=f)
+            print(
+                f"  {shift_sign}{double_w} temp = ({shift_sign}{double_w})a.{double_m}[i - {64 // w}] >> imm;",
+                file=f,
+            )
+            print(
+                f"  dst.{m}[i] = clamp<{shift_sign}{double_w}>(temp, {min}, {max});",
+                file=f,
+            )
+            print(f"}}", file=f)
+            print(f"}}", file=f)
+
     if width == "d" or width == "du":
         with open(f"vextl_{double_width}_{width}.h", "w") as f:
             print(f"for (int i = 0;i < {128 // double_w};i++) {{", file=f)
diff --git a/code/gen_tb.py b/code/gen_tb.py
index 50d19307..8c0765d4 100644
--- a/code/gen_tb.py
+++ b/code/gen_tb.py
@@ -9,6 +9,7 @@
 widths_vsrln = ["b_h", "h_w", "w_d"]
 widths_vssrln = ["b_h", "bu_h", "h_w", "hu_w", "w_d", "wu_d"]
 widths_vsrlni = ["b_h", "h_w", "w_d", "d_q"]
+widths_vssrlni = ["b_h", "bu_h", "h_w", "hu_w", "w_d", "wu_d", "d_q", "du_q"]
 widths_vaddw = [
     "h_b",
     "h_bu",
@@ -126,8 +127,10 @@
     "vsrlri": (widths_signed, "v128 a, int imm", [0, 7]),
     "vsrlrn": (widths_vsrln, "v128 a, v128 b"),
     "vsrlrni": (widths_vsrlni, "v128 a, v128 b, int imm", [0, 7, 15]),
-    "vssrln": (widths_vssrln, "v128 a, v128 b"),
     "vssran": (widths_vssrln, "v128 a, v128 b"),
+    "vssrani": (widths_vssrlni, "v128 a, v128 b, int imm", [0, 7, 15]),
+    "vssrln": (widths_vssrln, "v128 a, v128 b"),
+    "vssrlni": (widths_vssrlni, "v128 a, v128 b, int imm", [0, 7, 15]),
     "vsub": (widths_signed, "v128 a, v128 b"),
     "vsubwev": (widths_vsubw, "v128 a, v128 b"),
     "vsubwod": (widths_vsubw, "v128 a, v128 b"),
diff --git a/code/vssrani_b_h.cpp b/code/vssrani_b_h.cpp
new file mode 100644
index 00000000..4dbd2ae1
--- /dev/null
+++ b/code/vssrani_b_h.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrani_b_h(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrani_b_h.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrani_b_h, 0);
+  FUZZ2(vssrani_b_h, 7);
+  FUZZ2(vssrani_b_h, 15);
+}
diff --git a/code/vssrani_b_h.h b/code/vssrani_b_h.h
new file mode 100644
index 00000000..93653db4
--- /dev/null
+++ b/code/vssrani_b_h.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 16; i++) {
+  if (i < 8) {
+    s16 temp = (s16)b.half[i] >> imm;
+    dst.byte[i] = clamp<s16>(temp, -128, 127);
+  } else {
+    s16 temp = (s16)a.half[i - 8] >> imm;
+    dst.byte[i] = clamp<s16>(temp, -128, 127);
+  }
+}
diff --git a/code/vssrani_bu_h.cpp b/code/vssrani_bu_h.cpp
new file mode 100644
index 00000000..c4a8de95
--- /dev/null
+++ b/code/vssrani_bu_h.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrani_bu_h(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrani_bu_h.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrani_bu_h, 0);
+  FUZZ2(vssrani_bu_h, 7);
+  FUZZ2(vssrani_bu_h, 15);
+}
diff --git a/code/vssrani_bu_h.h b/code/vssrani_bu_h.h
new file mode 100644
index 00000000..70bc55a6
--- /dev/null
+++ b/code/vssrani_bu_h.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 16; i++) {
+  if (i < 8) {
+    s16 temp = (s16)b.half[i] >> imm;
+    dst.byte[i] = clamp<s16>(temp, 0, 255);
+  } else {
+    s16 temp = (s16)a.half[i - 8] >> imm;
+    dst.byte[i] = clamp<s16>(temp, 0, 255);
+  }
+}
diff --git a/code/vssrani_d_q.cpp b/code/vssrani_d_q.cpp
new file mode 100644
index 00000000..46370548
--- /dev/null
+++ b/code/vssrani_d_q.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrani_d_q(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrani_d_q.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrani_d_q, 0);
+  FUZZ2(vssrani_d_q, 7);
+  FUZZ2(vssrani_d_q, 15);
+}
diff --git a/code/vssrani_d_q.h b/code/vssrani_d_q.h
new file mode 100644
index 00000000..ffe71fd0
--- /dev/null
+++ b/code/vssrani_d_q.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 2; i++) {
+  if (i < 1) {
+    s128 temp = (s128)b.qword[i] >> imm;
+    dst.dword[i] = clamp<s128>(temp, -9223372036854775808, 9223372036854775807);
+  } else {
+    s128 temp = (s128)a.qword[i - 1] >> imm;
+    dst.dword[i] = clamp<s128>(temp, -9223372036854775808, 9223372036854775807);
+  }
+}
diff --git a/code/vssrani_du_q.cpp b/code/vssrani_du_q.cpp
new file mode 100644
index 00000000..4cdc467f
--- /dev/null
+++ b/code/vssrani_du_q.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrani_du_q(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrani_du_q.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrani_du_q, 0);
+  FUZZ2(vssrani_du_q, 7);
+  FUZZ2(vssrani_du_q, 15);
+}
diff --git a/code/vssrani_du_q.h b/code/vssrani_du_q.h
new file mode 100644
index 00000000..74cba48d
--- /dev/null
+++ b/code/vssrani_du_q.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 2; i++) {
+  if (i < 1) {
+    s128 temp = (s128)b.qword[i] >> imm;
+    dst.dword[i] = clamp<s128>(temp, 0, 18446744073709551615);
+  } else {
+    s128 temp = (s128)a.qword[i - 1] >> imm;
+    dst.dword[i] = clamp<s128>(temp, 0, 18446744073709551615);
+  }
+}
diff --git a/code/vssrani_h_w.cpp b/code/vssrani_h_w.cpp
new file mode 100644
index 00000000..086ae9d0
--- /dev/null
+++ b/code/vssrani_h_w.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrani_h_w(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrani_h_w.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrani_h_w, 0);
+  FUZZ2(vssrani_h_w, 7);
+  FUZZ2(vssrani_h_w, 15);
+}
diff --git a/code/vssrani_h_w.h b/code/vssrani_h_w.h
new file mode 100644
index 00000000..96075130
--- /dev/null
+++ b/code/vssrani_h_w.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 8; i++) {
+  if (i < 4) {
+    s32 temp = (s32)b.word[i] >> imm;
+    dst.half[i] = clamp<s32>(temp, -32768, 32767);
+  } else {
+    s32 temp = (s32)a.word[i - 4] >> imm;
+    dst.half[i] = clamp<s32>(temp, -32768, 32767);
+  }
+}
diff --git a/code/vssrani_hu_w.cpp b/code/vssrani_hu_w.cpp
new file mode 100644
index 00000000..41ec0077
--- /dev/null
+++ b/code/vssrani_hu_w.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrani_hu_w(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrani_hu_w.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrani_hu_w, 0);
+  FUZZ2(vssrani_hu_w, 7);
+  FUZZ2(vssrani_hu_w, 15);
+}
diff --git a/code/vssrani_hu_w.h b/code/vssrani_hu_w.h
new file mode 100644
index 00000000..0c6651f7
--- /dev/null
+++ b/code/vssrani_hu_w.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 8; i++) {
+  if (i < 4) {
+    s32 temp = (s32)b.word[i] >> imm;
+    dst.half[i] = clamp<s32>(temp, 0, 65535);
+  } else {
+    s32 temp = (s32)a.word[i - 4] >> imm;
+    dst.half[i] = clamp<s32>(temp, 0, 65535);
+  }
+}
diff --git a/code/vssrani_w_d.cpp b/code/vssrani_w_d.cpp
new file mode 100644
index 00000000..718a926a
--- /dev/null
+++ b/code/vssrani_w_d.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrani_w_d(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrani_w_d.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrani_w_d, 0);
+  FUZZ2(vssrani_w_d, 7);
+  FUZZ2(vssrani_w_d, 15);
+}
diff --git a/code/vssrani_w_d.h b/code/vssrani_w_d.h
new file mode 100644
index 00000000..fd6ba8e2
--- /dev/null
+++ b/code/vssrani_w_d.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 4; i++) {
+  if (i < 2) {
+    s64 temp = (s64)b.dword[i] >> imm;
+    dst.word[i] = clamp<s64>(temp, -2147483648, 2147483647);
+  } else {
+    s64 temp = (s64)a.dword[i - 2] >> imm;
+    dst.word[i] = clamp<s64>(temp, -2147483648, 2147483647);
+  }
+}
diff --git a/code/vssrani_wu_d.cpp b/code/vssrani_wu_d.cpp
new file mode 100644
index 00000000..e89b0b2d
--- /dev/null
+++ b/code/vssrani_wu_d.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrani_wu_d(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrani_wu_d.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrani_wu_d, 0);
+  FUZZ2(vssrani_wu_d, 7);
+  FUZZ2(vssrani_wu_d, 15);
+}
diff --git a/code/vssrani_wu_d.h b/code/vssrani_wu_d.h
new file mode 100644
index 00000000..98680cee
--- /dev/null
+++ b/code/vssrani_wu_d.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 4; i++) {
+  if (i < 2) {
+    s64 temp = (s64)b.dword[i] >> imm;
+    dst.word[i] = clamp<s64>(temp, 0, 4294967295);
+  } else {
+    s64 temp = (s64)a.dword[i - 2] >> imm;
+    dst.word[i] = clamp<s64>(temp, 0, 4294967295);
+  }
+}
diff --git a/code/vssrlni_b_h.cpp b/code/vssrlni_b_h.cpp
new file mode 100644
index 00000000..cadd2c81
--- /dev/null
+++ b/code/vssrlni_b_h.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrlni_b_h(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrlni_b_h.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrlni_b_h, 0);
+  FUZZ2(vssrlni_b_h, 7);
+  FUZZ2(vssrlni_b_h, 15);
+}
diff --git a/code/vssrlni_b_h.h b/code/vssrlni_b_h.h
new file mode 100644
index 00000000..7caeafcb
--- /dev/null
+++ b/code/vssrlni_b_h.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 16; i++) {
+  if (i < 8) {
+    u16 temp = (u16)b.half[i] >> imm;
+    dst.byte[i] = clamp<u16>(temp, 0, 127);
+  } else {
+    u16 temp = (u16)a.half[i - 8] >> imm;
+    dst.byte[i] = clamp<u16>(temp, 0, 127);
+  }
+}
diff --git a/code/vssrlni_bu_h.cpp b/code/vssrlni_bu_h.cpp
new file mode 100644
index 00000000..d3ff235d
--- /dev/null
+++ b/code/vssrlni_bu_h.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrlni_bu_h(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrlni_bu_h.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrlni_bu_h, 0);
+  FUZZ2(vssrlni_bu_h, 7);
+  FUZZ2(vssrlni_bu_h, 15);
+}
diff --git a/code/vssrlni_bu_h.h b/code/vssrlni_bu_h.h
new file mode 100644
index 00000000..e37aecc3
--- /dev/null
+++ b/code/vssrlni_bu_h.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 16; i++) {
+  if (i < 8) {
+    u16 temp = (u16)b.half[i] >> imm;
+    dst.byte[i] = clamp<u16>(temp, 0, 255);
+  } else {
+    u16 temp = (u16)a.half[i - 8] >> imm;
+    dst.byte[i] = clamp<u16>(temp, 0, 255);
+  }
+}
diff --git a/code/vssrlni_d_q.cpp b/code/vssrlni_d_q.cpp
new file mode 100644
index 00000000..84a98747
--- /dev/null
+++ b/code/vssrlni_d_q.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrlni_d_q(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrlni_d_q.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrlni_d_q, 0);
+  FUZZ2(vssrlni_d_q, 7);
+  FUZZ2(vssrlni_d_q, 15);
+}
diff --git a/code/vssrlni_d_q.h b/code/vssrlni_d_q.h
new file mode 100644
index 00000000..964b4ac7
--- /dev/null
+++ b/code/vssrlni_d_q.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 2; i++) {
+  if (i < 1) {
+    u128 temp = (u128)b.qword[i] >> imm;
+    dst.dword[i] = clamp<u128>(temp, 0, 9223372036854775807);
+  } else {
+    u128 temp = (u128)a.qword[i - 1] >> imm;
+    dst.dword[i] = clamp<u128>(temp, 0, 9223372036854775807);
+  }
+}
diff --git a/code/vssrlni_du_q.cpp b/code/vssrlni_du_q.cpp
new file mode 100644
index 00000000..23d2920e
--- /dev/null
+++ b/code/vssrlni_du_q.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrlni_du_q(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrlni_du_q.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrlni_du_q, 0);
+  FUZZ2(vssrlni_du_q, 7);
+  FUZZ2(vssrlni_du_q, 15);
+}
diff --git a/code/vssrlni_du_q.h b/code/vssrlni_du_q.h
new file mode 100644
index 00000000..2c18c9de
--- /dev/null
+++ b/code/vssrlni_du_q.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 2; i++) {
+  if (i < 1) {
+    u128 temp = (u128)b.qword[i] >> imm;
+    dst.dword[i] = clamp<u128>(temp, 0, 18446744073709551615);
+  } else {
+    u128 temp = (u128)a.qword[i - 1] >> imm;
+    dst.dword[i] = clamp<u128>(temp, 0, 18446744073709551615);
+  }
+}
diff --git a/code/vssrlni_h_w.cpp b/code/vssrlni_h_w.cpp
new file mode 100644
index 00000000..1dd31bf0
--- /dev/null
+++ b/code/vssrlni_h_w.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrlni_h_w(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrlni_h_w.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrlni_h_w, 0);
+  FUZZ2(vssrlni_h_w, 7);
+  FUZZ2(vssrlni_h_w, 15);
+}
diff --git a/code/vssrlni_h_w.h b/code/vssrlni_h_w.h
new file mode 100644
index 00000000..1a54d8f2
--- /dev/null
+++ b/code/vssrlni_h_w.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 8; i++) {
+  if (i < 4) {
+    u32 temp = (u32)b.word[i] >> imm;
+    dst.half[i] = clamp<u32>(temp, 0, 32767);
+  } else {
+    u32 temp = (u32)a.word[i - 4] >> imm;
+    dst.half[i] = clamp<u32>(temp, 0, 32767);
+  }
+}
diff --git a/code/vssrlni_hu_w.cpp b/code/vssrlni_hu_w.cpp
new file mode 100644
index 00000000..63ab6660
--- /dev/null
+++ b/code/vssrlni_hu_w.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrlni_hu_w(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrlni_hu_w.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrlni_hu_w, 0);
+  FUZZ2(vssrlni_hu_w, 7);
+  FUZZ2(vssrlni_hu_w, 15);
+}
diff --git a/code/vssrlni_hu_w.h b/code/vssrlni_hu_w.h
new file mode 100644
index 00000000..c3705d8a
--- /dev/null
+++ b/code/vssrlni_hu_w.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 8; i++) {
+  if (i < 4) {
+    u32 temp = (u32)b.word[i] >> imm;
+    dst.half[i] = clamp<u32>(temp, 0, 65535);
+  } else {
+    u32 temp = (u32)a.word[i - 4] >> imm;
+    dst.half[i] = clamp<u32>(temp, 0, 65535);
+  }
+}
diff --git a/code/vssrlni_w_d.cpp b/code/vssrlni_w_d.cpp
new file mode 100644
index 00000000..248ce490
--- /dev/null
+++ b/code/vssrlni_w_d.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrlni_w_d(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrlni_w_d.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrlni_w_d, 0);
+  FUZZ2(vssrlni_w_d, 7);
+  FUZZ2(vssrlni_w_d, 15);
+}
diff --git a/code/vssrlni_w_d.h b/code/vssrlni_w_d.h
new file mode 100644
index 00000000..7fe8a39a
--- /dev/null
+++ b/code/vssrlni_w_d.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 4; i++) {
+  if (i < 2) {
+    u64 temp = (u64)b.dword[i] >> imm;
+    dst.word[i] = clamp<u64>(temp, 0, 2147483647);
+  } else {
+    u64 temp = (u64)a.dword[i - 2] >> imm;
+    dst.word[i] = clamp<u64>(temp, 0, 2147483647);
+  }
+}
diff --git a/code/vssrlni_wu_d.cpp b/code/vssrlni_wu_d.cpp
new file mode 100644
index 00000000..6dc85814
--- /dev/null
+++ b/code/vssrlni_wu_d.cpp
@@ -0,0 +1,13 @@
+#include "common.h"
+
+v128 vssrlni_wu_d(v128 a, v128 b, int imm) {
+  v128 dst;
+#include "vssrlni_wu_d.h"
+  return dst;
+}
+
+void test() {
+  FUZZ2(vssrlni_wu_d, 0);
+  FUZZ2(vssrlni_wu_d, 7);
+  FUZZ2(vssrlni_wu_d, 15);
+}
diff --git a/code/vssrlni_wu_d.h b/code/vssrlni_wu_d.h
new file mode 100644
index 00000000..c42012c4
--- /dev/null
+++ b/code/vssrlni_wu_d.h
@@ -0,0 +1,9 @@
+for (int i = 0; i < 4; i++) {
+  if (i < 2) {
+    u64 temp = (u64)b.dword[i] >> imm;
+    dst.word[i] = clamp<u64>(temp, 0, 4294967295);
+  } else {
+    u64 temp = (u64)a.dword[i - 2] >> imm;
+    dst.word[i] = clamp<u64>(temp, 0, 4294967295);
+  }
+}
diff --git a/docs/lsx/shift.md b/docs/lsx/shift.md
index cc40493e..5f44e0c3 100644
--- a/docs/lsx/shift.md
+++ b/docs/lsx/shift.md
@@ -142,6 +142,15 @@ Compute 128-bit `a` shifted right by `imm * 8` bits.
 {{ vssran('w', 'd') }}
 {{ vssran('wu', 'd') }}
 
+{{ vssrani('b', 'h') }}
+{{ vssrani('bu', 'h') }}
+{{ vssrani('h', 'w') }}
+{{ vssrani('hu', 'w') }}
+{{ vssrani('w', 'd') }}
+{{ vssrani('wu', 'd') }}
+{{ vssrani('d', 'q') }}
+{{ vssrani('du', 'q') }}
+
 {{ vssrln('b', 'h') }}
 {{ vssrln('bu', 'h') }}
 {{ vssrln('h', 'w') }}
@@ -149,6 +158,15 @@ Compute 128-bit `a` shifted right by `imm * 8` bits.
 {{ vssrln('w', 'd') }}
 {{ vssrln('wu', 'd') }}
 
+{{ vssrlni('b', 'h') }}
+{{ vssrlni('bu', 'h') }}
+{{ vssrlni('h', 'w') }}
+{{ vssrlni('hu', 'w') }}
+{{ vssrlni('w', 'd') }}
+{{ vssrlni('wu', 'd') }}
+{{ vssrlni('d', 'q') }}
+{{ vssrlni('du', 'q') }}
+
 {{ vrotr('b') }}
 {{ vrotr('h') }}
 {{ vrotr('w') }}
diff --git a/main.py b/main.py
index cb05ff22..9ab75a87 100644
--- a/main.py
+++ b/main.py
@@ -1097,7 +1097,7 @@ def vssrln(name, name2):
         return instruction(
             intrinsic=f"__m128i __lsx_vssrln_{name}_{name2} (__m128i a, __m128i b)",
             instr=f"vssrln.{name}.{name2} vr, vr, vr",
-            desc=f"Logical right shift the unsigned {width2}-bit elements in `a` by elements in `b`, clamp to fit in {signedness} {width}-bit and store the result to `dst`.",
+            desc=f"Logical right shift the unsigned {width2}-bit elements in `a` by elements in `b`, clamp to fit in {signedness} {width}-bit integer and store the result to `dst`.",
         )
 
     @env.macro
@@ -1108,5 +1108,27 @@ def vssran(name, name2):
         return instruction(
             intrinsic=f"__m128i __lsx_vssran_{name}_{name2} (__m128i a, __m128i b)",
             instr=f"vssran.{name}.{name2} vr, vr, vr",
-            desc=f"Arithemtic right shift the signed {width2}-bit elements in `a` by elements in `b`, clamp to fit in {signedness} {width}-bit and store the result to `dst`.",
+            desc=f"Arithemtic right shift the signed {width2}-bit elements in `a` by elements in `b`, clamp to fit in {signedness} {width}-bit integer and store the result to `dst`.",
         )
+
+    @env.macro
+    def vssrlni(name, name2):
+        width = widths[name[0]]
+        signedness = signednesses[name]
+        width2 = widths[name2[0]]
+        return instruction(
+            intrinsic=f"__m128i __lsx_vssrlni_{name}_{name2} (__m128i a, __m128i b, imm0_{width2-1} imm)",
+            instr=f"vssrlni.{name}.{name2} vr, vr, imm",
+            desc=f"Logical right shift the unsigned {width2}-bit elements in `a` and `b` by `imm`, clamp to fit in {signedness} {width}-bit integer and store the result to `dst`.",
+        )
+
+    @env.macro
+    def vssrani(name, name2):
+        width = widths[name[0]]
+        signedness = signednesses[name]
+        width2 = widths[name2[0]]
+        return instruction(
+            intrinsic=f"__m128i __lsx_vssrani_{name}_{name2} (__m128i a, __m128i b, imm0_{width2-1} imm)",
+            instr=f"vssrani.{name}.{name2} vr, vr, imm",
+            desc=f"Arithemtic right shift the signed {width2}-bit elements in `a` and `b` by `imm`, clamp to fit in {signedness} {width}-bit integer and store the result to `dst`.",
+        )
\ No newline at end of file