diff --git a/README.md b/README.md
index 9e783c7e..2a5aa87a 100644
--- a/README.md
+++ b/README.md
@@ -8,10 +8,6 @@ TODO List:
 
 ### vfrstp.b/h
 
-### vfsub.s/d
-
-### vfmul.s/d
-
 ### vfmax.s/d
 
 ### vfmin.s/d
diff --git a/code/gen_impl.py b/code/gen_impl.py
index eb0e2b15..e4c766ad 100644
--- a/code/gen_impl.py
+++ b/code/gen_impl.py
@@ -763,12 +763,13 @@
 
 for width in ["s", "d"]:
     m = members_fp[width]
-    with open(f"vfdiv_{width}.h", "w") as f:
-        print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
-        print(
-            f"  dst.{m}[i] = a.{m}[i] / b.{m}[i];",
-            file=f,
-        )
-        print(f"}}", file=f)
+    for name, op in [("div", "/"), ("mul", "*"), ("sub", "-"), ("add", "+")]:
+        with open(f"vf{name}_{width}.h", "w") as f:
+            print(f"for (int i = 0;i < {128 // w};i++) {{", file=f)
+            print(
+                f"  dst.{m}[i] = a.{m}[i] {op} b.{m}[i];",
+                file=f,
+            )
+            print(f"}}", file=f)
 
 os.system("clang-format -i *.cpp *.h")
diff --git a/code/vfadd_d.h b/code/vfadd_d.h
new file mode 100644
index 00000000..0e5ebc35
--- /dev/null
+++ b/code/vfadd_d.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp64[i] = a.fp64[i] + b.fp64[i];
+}
diff --git a/code/vfadd_s.h b/code/vfadd_s.h
new file mode 100644
index 00000000..3f67c7f7
--- /dev/null
+++ b/code/vfadd_s.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp32[i] = a.fp32[i] + b.fp32[i];
+}
diff --git a/code/vfmul_d.h b/code/vfmul_d.h
new file mode 100644
index 00000000..becd05f4
--- /dev/null
+++ b/code/vfmul_d.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp64[i] = a.fp64[i] * b.fp64[i];
+}
diff --git a/code/vfmul_s.h b/code/vfmul_s.h
new file mode 100644
index 00000000..93fc790b
--- /dev/null
+++ b/code/vfmul_s.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp32[i] = a.fp32[i] * b.fp32[i];
+}
diff --git a/code/vfsub_d.h b/code/vfsub_d.h
new file mode 100644
index 00000000..cd1778b7
--- /dev/null
+++ b/code/vfsub_d.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp64[i] = a.fp64[i] - b.fp64[i];
+}
diff --git a/code/vfsub_s.h b/code/vfsub_s.h
new file mode 100644
index 00000000..4937b20b
--- /dev/null
+++ b/code/vfsub_s.h
@@ -0,0 +1,3 @@
+for (int i = 0; i < 2; i++) {
+  dst.fp32[i] = a.fp32[i] - b.fp32[i];
+}
diff --git a/docs/lsx/float_computation.md b/docs/lsx/float_computation.md
index 62dd618c..b8351200 100644
--- a/docs/lsx/float_computation.md
+++ b/docs/lsx/float_computation.md
@@ -1,50 +1,13 @@
 # Floating Point Computation
 
-## __m128d __lsx_vfadd_d (__m128d a, __m128d b)
-
-### Synopsis
-
-```c++
-__m128d __lsx_vfadd_d (__m128d a, __m128d b)
-#include <lsxintrin.h>
-Instruction: vfadd.d vr, vr, vr
-CPU Flags: LSX
-```
-
-### Description
-
-Add double precision floating point elements in `a` to `b` and store the result in `dst`.
-
-### Operation
-
-```c++
-for (int i = 0;i < 2;i++) {
-    dst.fp64[i] = a.fp64[i] + b.fp64[i];
-}
-```
-
-## __m128 __lsx_vfadd_s (__m128 a, __m128 b)
-
-### Synopsis
-
-```c++
-__m128d __lsx_vfadd_s (__m128d a, __m128d b)
-#include <lsxintrin.h>
-Instruction: vfadd.s vr, vr, vr
-CPU Flags: LSX
-```
-
-### Description
-
-Add single precision floating point elements in `a` to `b` and store the result in `dst`.
-
-### Operation
-
-```c++
-for (int i = 0;i < 4;i++) {
-    dst.fp32[i] = a.fp32[i] + b.fp32[i];
-}
-```
+{{ vfadd('s') }}
+{{ vfadd('d') }}
 
 {{ vfdiv('s') }}
 {{ vfdiv('d') }}
+
+{{ vfmul('s') }}
+{{ vfmul('d') }}
+
+{{ vfsub('s') }}
+{{ vfsub('d') }}
\ No newline at end of file
diff --git a/main.py b/main.py
index c3c10375..a92f9242 100644
--- a/main.py
+++ b/main.py
@@ -358,6 +358,16 @@ def vfcmp(cond):
             """
         )
 
+    @env.macro
+    def vfmul(name):
+        precision = precisions[name]
+        fp_type = fp_types[name]
+        return instruction(
+            intrinsic=f"{fp_type} __lsx_vfmul_{name} ({fp_type} a, {fp_type} b)",
+            instr=f"vfmul.{name} vr, vr, vr",
+            desc=f"Multiply {precision} precision floating point elements in `a` and elements in `b`.",
+        )
+
     @env.macro
     def vfdiv(name):
         precision = precisions[name]
@@ -365,7 +375,27 @@ def vfdiv(name):
         return instruction(
             intrinsic=f"{fp_type} __lsx_vfdiv_{name} ({fp_type} a, {fp_type} b)",
             instr=f"vfdiv.{name} vr, vr, vr",
-            desc=f"Divide {fp_type} precision floating point elements in `a` by elements in `b`.",
+            desc=f"Divide {precision} precision floating point elements in `a` by elements in `b`.",
+        )
+
+    @env.macro
+    def vfadd(name):
+        precision = precisions[name]
+        fp_type = fp_types[name]
+        return instruction(
+            intrinsic=f"{fp_type} __lsx_vfadd_{name} ({fp_type} a, {fp_type} b)",
+            instr=f"vfadd.{name} vr, vr, vr",
+            desc=f"Add {precision} precision floating point elements in `a` to elements in `b`.",
+        )
+
+    @env.macro
+    def vfsub(name):
+        precision = precisions[name]
+        fp_type = fp_types[name]
+        return instruction(
+            intrinsic=f"{fp_type} __lsx_vfsub_{name} ({fp_type} a, {fp_type} b)",
+            instr=f"vfsub.{name} vr, vr, vr",
+            desc=f"Subtract {precision} precision floating point elements in `a` by elements in `b`.",
         )
 
     @env.macro