diff --git a/README.md b/README.md index 9e783c7e..2a5aa87a 100644 --- a/README.md +++ b/README.md @@ -8,10 +8,6 @@ TODO List: ### vfrstp.b/h -### vfsub.s/d - -### vfmul.s/d - ### vfmax.s/d ### vfmin.s/d diff --git a/code/gen_impl.py b/code/gen_impl.py index eb0e2b15..e4c766ad 100644 --- a/code/gen_impl.py +++ b/code/gen_impl.py @@ -763,12 +763,13 @@ for width in ["s", "d"]: m = members_fp[width] - with open(f"vfdiv_{width}.h", "w") as f: - print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) - print( - f" dst.{m}[i] = a.{m}[i] / b.{m}[i];", - file=f, - ) - print(f"}}", file=f) + for name, op in [("div", "/"), ("mul", "*"), ("sub", "-"), ("add", "+")]: + with open(f"vf{name}_{width}.h", "w") as f: + print(f"for (int i = 0;i < {128 // w};i++) {{", file=f) + print( + f" dst.{m}[i] = a.{m}[i] {op} b.{m}[i];", + file=f, + ) + print(f"}}", file=f) os.system("clang-format -i *.cpp *.h") diff --git a/code/vfadd_d.h b/code/vfadd_d.h new file mode 100644 index 00000000..0e5ebc35 --- /dev/null +++ b/code/vfadd_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp64[i] = a.fp64[i] + b.fp64[i]; +} diff --git a/code/vfadd_s.h b/code/vfadd_s.h new file mode 100644 index 00000000..3f67c7f7 --- /dev/null +++ b/code/vfadd_s.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp32[i] = a.fp32[i] + b.fp32[i]; +} diff --git a/code/vfmul_d.h b/code/vfmul_d.h new file mode 100644 index 00000000..becd05f4 --- /dev/null +++ b/code/vfmul_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp64[i] = a.fp64[i] * b.fp64[i]; +} diff --git a/code/vfmul_s.h b/code/vfmul_s.h new file mode 100644 index 00000000..93fc790b --- /dev/null +++ b/code/vfmul_s.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp32[i] = a.fp32[i] * b.fp32[i]; +} diff --git a/code/vfsub_d.h b/code/vfsub_d.h new file mode 100644 index 00000000..cd1778b7 --- /dev/null +++ b/code/vfsub_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp64[i] = a.fp64[i] - b.fp64[i]; +} diff --git a/code/vfsub_s.h b/code/vfsub_s.h new file mode 100644 index 00000000..4937b20b --- /dev/null +++ b/code/vfsub_s.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.fp32[i] = a.fp32[i] - b.fp32[i]; +} diff --git a/docs/lsx/float_computation.md b/docs/lsx/float_computation.md index 62dd618c..b8351200 100644 --- a/docs/lsx/float_computation.md +++ b/docs/lsx/float_computation.md @@ -1,50 +1,13 @@ # Floating Point Computation -## __m128d __lsx_vfadd_d (__m128d a, __m128d b) - -### Synopsis - -```c++ -__m128d __lsx_vfadd_d (__m128d a, __m128d b) -#include -Instruction: vfadd.d vr, vr, vr -CPU Flags: LSX -``` - -### Description - -Add double precision floating point elements in `a` to `b` and store the result in `dst`. - -### Operation - -```c++ -for (int i = 0;i < 2;i++) { - dst.fp64[i] = a.fp64[i] + b.fp64[i]; -} -``` - -## __m128 __lsx_vfadd_s (__m128 a, __m128 b) - -### Synopsis - -```c++ -__m128d __lsx_vfadd_s (__m128d a, __m128d b) -#include -Instruction: vfadd.s vr, vr, vr -CPU Flags: LSX -``` - -### Description - -Add single precision floating point elements in `a` to `b` and store the result in `dst`. - -### Operation - -```c++ -for (int i = 0;i < 4;i++) { - dst.fp32[i] = a.fp32[i] + b.fp32[i]; -} -``` +{{ vfadd('s') }} +{{ vfadd('d') }} {{ vfdiv('s') }} {{ vfdiv('d') }} + +{{ vfmul('s') }} +{{ vfmul('d') }} + +{{ vfsub('s') }} +{{ vfsub('d') }} \ No newline at end of file diff --git a/main.py b/main.py index c3c10375..a92f9242 100644 --- a/main.py +++ b/main.py @@ -358,6 +358,16 @@ def vfcmp(cond): """ ) + @env.macro + def vfmul(name): + precision = precisions[name] + fp_type = fp_types[name] + return instruction( + intrinsic=f"{fp_type} __lsx_vfmul_{name} ({fp_type} a, {fp_type} b)", + instr=f"vfmul.{name} vr, vr, vr", + desc=f"Multiply {precision} precision floating point elements in `a` and elements in `b`.", + ) + @env.macro def vfdiv(name): precision = precisions[name] @@ -365,7 +375,27 @@ def vfdiv(name): return instruction( intrinsic=f"{fp_type} __lsx_vfdiv_{name} ({fp_type} a, {fp_type} b)", instr=f"vfdiv.{name} vr, vr, vr", - desc=f"Divide {fp_type} precision floating point elements in `a` by elements in `b`.", + desc=f"Divide {precision} precision floating point elements in `a` by elements in `b`.", + ) + + @env.macro + def vfadd(name): + precision = precisions[name] + fp_type = fp_types[name] + return instruction( + intrinsic=f"{fp_type} __lsx_vfadd_{name} ({fp_type} a, {fp_type} b)", + instr=f"vfadd.{name} vr, vr, vr", + desc=f"Add {precision} precision floating point elements in `a` to elements in `b`.", + ) + + @env.macro + def vfsub(name): + precision = precisions[name] + fp_type = fp_types[name] + return instruction( + intrinsic=f"{fp_type} __lsx_vfsub_{name} ({fp_type} a, {fp_type} b)", + instr=f"vfsub.{name} vr, vr, vr", + desc=f"Subtract {precision} precision floating point elements in `a` by elements in `b`.", ) @env.macro