From 11729587dd5114d82ae2abf31cb8de6f99e372a7 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Wed, 13 Dec 2023 20:41:25 +0800 Subject: [PATCH] Move all instructions to main.py --- code/vfclass_d.h | 3 + code/vfclass_s.h | 3 + code/vfmadd_d.h | 3 + code/vfmadd_s.h | 3 + code/vfmsub_d.h | 3 + code/vfmsub_s.h | 3 + code/vfnmadd_d.h | 3 + code/vfnmadd_s.h | 3 + code/vfnmsub_d.h | 3 + code/vfnmsub_s.h | 3 + docs/lsx/bitwise_operations.md | 42 +------- docs/lsx/float_conversion.md | 130 ++--------------------- docs/lsx/float_misc.md | 48 +-------- docs/lsx/fma.md | 188 ++------------------------------- main.py | 145 +++++++++++++++++++++++++ 15 files changed, 193 insertions(+), 390 deletions(-) create mode 100644 code/vfclass_d.h create mode 100644 code/vfclass_s.h create mode 100644 code/vfmadd_d.h create mode 100644 code/vfmadd_s.h create mode 100644 code/vfmsub_d.h create mode 100644 code/vfmsub_s.h create mode 100644 code/vfnmadd_d.h create mode 100644 code/vfnmadd_s.h create mode 100644 code/vfnmsub_d.h create mode 100644 code/vfnmsub_s.h diff --git a/code/vfclass_d.h b/code/vfclass_d.h new file mode 100644 index 00000000..3a148e0a --- /dev/null +++ b/code/vfclass_d.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 2;i++) { + dst.dword[i] = fp_classify(a.fp64[i]); +} \ No newline at end of file diff --git a/code/vfclass_s.h b/code/vfclass_s.h new file mode 100644 index 00000000..e11265f4 --- /dev/null +++ b/code/vfclass_s.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 4;i++) { + dst.word[i] = fp_classify(a.fp32[i]); +} \ No newline at end of file diff --git a/code/vfmadd_d.h b/code/vfmadd_d.h new file mode 100644 index 00000000..03b8f030 --- /dev/null +++ b/code/vfmadd_d.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 2;i++) { + dst.fp64[i] = a.fp64[i] * b.fp64[i] + c.fp64[i]; +} \ No newline at end of file diff --git a/code/vfmadd_s.h b/code/vfmadd_s.h new file mode 100644 index 00000000..528eed2f --- /dev/null +++ b/code/vfmadd_s.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 4;i++) { + dst.fp32[i] = a.fp32[i] * b.fp32[i] + c.fp32[i]; +} \ No newline at end of file diff --git a/code/vfmsub_d.h b/code/vfmsub_d.h new file mode 100644 index 00000000..cf80e91e --- /dev/null +++ b/code/vfmsub_d.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 2;i++) { + dst.fp64[i] = a.fp64[i] * b.fp64[i] - c.fp64[i]; +} \ No newline at end of file diff --git a/code/vfmsub_s.h b/code/vfmsub_s.h new file mode 100644 index 00000000..f5d2db36 --- /dev/null +++ b/code/vfmsub_s.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 4;i++) { + dst.fp32[i] = a.fp32[i] * b.fp32[i] - c.fp32[i]; +} \ No newline at end of file diff --git a/code/vfnmadd_d.h b/code/vfnmadd_d.h new file mode 100644 index 00000000..6672c0c4 --- /dev/null +++ b/code/vfnmadd_d.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 2;i++) { + dst.fp64[i] = (a.fp64[i] * b.fp64[i] + c.fp64[i]); +} \ No newline at end of file diff --git a/code/vfnmadd_s.h b/code/vfnmadd_s.h new file mode 100644 index 00000000..553d1eef --- /dev/null +++ b/code/vfnmadd_s.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 4;i++) { + dst.fp32[i] = -(a.fp32[i] * b.fp32[i] + c.fp32[i]); +} \ No newline at end of file diff --git a/code/vfnmsub_d.h b/code/vfnmsub_d.h new file mode 100644 index 00000000..5abbb7a6 --- /dev/null +++ b/code/vfnmsub_d.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 2;i++) { + dst.fp64[i] = -(a.fp64[i] * b.fp64[i] - c.fp64[i]); +} \ No newline at end of file diff --git a/code/vfnmsub_s.h b/code/vfnmsub_s.h new file mode 100644 index 00000000..32b44b70 --- /dev/null +++ b/code/vfnmsub_s.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 4;i++) { + dst.fp32[i] = -(a.fp32[i] * b.fp32[i] - c.fp32[i]); +} \ No newline at end of file diff --git a/docs/lsx/bitwise_operations.md b/docs/lsx/bitwise_operations.md index a40d243e..3753a4db 100644 --- a/docs/lsx/bitwise_operations.md +++ b/docs/lsx/bitwise_operations.md @@ -1,46 +1,8 @@ # Bitwise Operations -## __m128i __lsx_vbitsel_v (__m128i a, __m128i b, __m128i c) +{{ vbitsel_v() }} -### Synopsis - -```c++ -__m128i __lsx_vbitsel_v (__m128i a, __m128i b, __m128i c) -#include -Instruction: vbitsel.v vr, vr, vr -CPU Flags: LSX -``` - -### Description - -Compute bitwise selection: for each bit position, if the bit in `c` equals to one, copy the bit from `b` to `dst`, otherwise copy from `a`. - -### Operation - -```c++ -{% include 'vbitsel_v.h' %} -``` - -## __m128i __lsx_vbitseli_b (__m128i a, __m128i b, imm0_255 imm) - -### Synopsis - -```c++ -__m128i __lsx_vbitseli_b (__m128i a, __m128i b, imm0_255 imm) -#include -Instruction: vbitseli.b vr, vr, imm -CPU Flags: LSX -``` - -### Description - -Compute bitwise selection: for each bit position, if the bit in `a` equals to one, copy the bit from `imm` to `dst`, otherwise copy from `b`. - -### Operation - -```c++ -{% include 'vbitseli_b.h' %} -``` +{{ vbitseli_b() }} {{ vbitclr('b') }} {{ vbitclr('h') }} diff --git a/docs/lsx/float_conversion.md b/docs/lsx/float_conversion.md index a127b169..19261699 100644 --- a/docs/lsx/float_conversion.md +++ b/docs/lsx/float_conversion.md @@ -1,132 +1,14 @@ # Floating Point Conversion -## __m128d __lsx_vfcvth_d_s (__m128 a) +{{ vfcvth_d_s() }} +{{ vfcvtl_d_s() }} -### Synopsis +{{ vfcvt_s_d() }} -```c++ -__m128d __lsx_vfcvth_d_s (__m128 a) -#include -Instruction: vfcvth.d.s vr, vr -CPU Flags: LSX -``` +{{ vfcvth_s_h() }} +{{ vfcvtl_s_h() }} -### Description - -Convert single precision floating point elements in higher half of `a` to double precision. - -### Operation - -```c++ -{% include('vfcvth_d_s.h') %} -``` - -## __m128d __lsx_vfcvtl_d_s (__m128 a) - -### Synopsis - -```c++ -__m128d __lsx_vfcvtl_d_s (__m128 a) -#include -Instruction: vfcvtl.d.s vr, vr -CPU Flags: LSX -``` - -### Description - -Convert single precision floating point elements in lower half of `a` to double precision. - -### Operation - -```c++ -{% include('vfcvtl_d_s.h') %} -``` - - -## __m128 __lsx_vfcvt_s_d (__m128d a, __m128d b) - -### Synopsis - -```c++ -__m128 __lsx_vfcvt_s_d (__m128a, __m128d b) -#include -Instruction: vfcvt.s.d vr, vr, vr -CPU Flags: LSX -``` - -### Description - -Convert double precision floating point elements in `a` and `b` to double precision. - -### Operation - -```c++ -{% include('vfcvt_s_d.h') %} -``` - -## __m128 __lsx_vfcvth_s_h (__m128i a) - -### Synopsis - -```c++ -__m128 __lsx_vfcvth_s_h (__m128i a) -#include -Instruction: vfcvth.s.h vr, vr -CPU Flags: LSX -``` - -### Description - -Convert half precision floating point elements in higher half of `a` to single precision. - -### Operation - -```c++ -{% include('vfcvth_s_h.h') %} -``` - -## __m128 __lsx_vfcvtl_s_h (__m128i a) - -### Synopsis - -```c++ -__m128 __lsx_vfcvtl_s_h (__m128i a) -#include -Instruction: vfcvtl.s.h vr, vr -CPU Flags: LSX -``` - -### Description - -Convert half precision floating point elements in lower half of `a` to single precision. - -### Operation - -```c++ -{% include('vfcvtl_s_h.h') %} -``` - - -## __m128i __lsx_vfcvt_h_s (__m128 a, __m128 b) - -### Synopsis - -```c++ -__m128i __lsx_vfcvt_h_s (__m128, __m128 b) -#include -Instruction: vfcvt.h.s vr, vr, vr -CPU Flags: LSX -``` - -### Description - -Convert single precision floating point elements in `a` and `b` to half precision. - -### Operation - -```c++ -{% include('vfcvt_h_s.h') %} -``` +{{ vfcvt_h_s() }} {{ vffint_d_w('h') }} {{ vffint_d_w('l') }} diff --git a/docs/lsx/float_misc.md b/docs/lsx/float_misc.md index b6abf3cf..fa99b45a 100644 --- a/docs/lsx/float_misc.md +++ b/docs/lsx/float_misc.md @@ -1,51 +1,7 @@ # Floatint Point Misc -## __m128i __lsx_vfclass_d (__m128d a) - -### Synopsis - -```c++ -__m128i __lsx_vfclass_d (__m128d a) -#include -Instruction: vfclass.d vr, vr -CPU Flags: LSX -``` - -### Description - -Classifiy each double precision floating point elements in `a`. - -### Operation - -```c++ -for (int i = 0;i < 2;i++) { - dst.dword[i] = fp_classify(a.fp64[i]); -} -``` - -## __m128i __lsx_vfclass_s (__m128 a) - -### Synopsis - -```c++ -__m128i __lsx_vfclass_s (__m128d a) -#include -Instruction: vfclass.s vr, vr -CPU Flags: LSX -``` - -### Description - -Classifiy each single precision floating point elements in `a`. - -### Operation - -```c++ -for (int i = 0;i < 4;i++) { - dst.word[i] = fp_classify(a.fp32[i]); -} -``` - +{{ vclass_d() }} +{{ vclass_s() }} {{ vfrint('', 's') }} {{ vfrint('', 'd') }} diff --git a/docs/lsx/fma.md b/docs/lsx/fma.md index 3fba949a..f1476d92 100644 --- a/docs/lsx/fma.md +++ b/docs/lsx/fma.md @@ -1,185 +1,13 @@ # Fused Multiply-Add -## __m128 __lsx_vfmadd_s (__m128 a, __m128 b, __m128 c) +{{ vfmadd_d() }} +{{ vfmadd_s() }} -### Synopsis +{{ vfmsub_d() }} +{{ vfmsub_s() }} -```c++ -__m128 __lsx_vfmadd_s (__m128 a, __m128 b, __m128 c) -#include -Instruction: vfmadd.s vr, vr, vr -CPU Flags: LSX -``` +{{ vfnmadd_d() }} +{{ vfnmadd_s() }} -### Description - -Compute packed single precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, accumulate to elements in `c` and store the result in `dst`. - -### Operation - -```c++ -for (int i = 0;i < 4;i++) { - dst.fp32[i] = a.fp32[i] * b.fp32[i] + c.fp32[i]; -} -``` - -## __m128d __lsx_vfmadd_d (__m128d a, __m128d b, __m128d c) - -### Synopsis - -```c++ -__m128d __lsx_vfmadd_d (__m128d a, __m128d b, __m128d c) -#include -Instruction: vfmadd.d vr, vr, vr -CPU Flags: LSX -``` - -### Description - -Compute packed double precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, accumulate to elements in `c` and store the result in `dst`. - -### Operation - -```c++ -for (int i = 0;i < 2;i++) { - dst.fp64[i] = a.fp64[i] * b.fp64[i] + c.fp64[i]; -} -``` - -## __m128 __lsx_vfmsub_s (__m128 a, __m128 b, __m128 c) - -### Synopsis - -```c++ -__m128 __lsx_vfmsub_s (__m128 a, __m128 b, __m128 c) -#include -Instruction: vfmsub.s vr, vr, vr -CPU Flags: LSX -``` - -### Description - -Compute packed single precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, subtract elements in `c` and store the result in `dst`. - -### Operation - -```c++ -for (int i = 0;i < 4;i++) { - dst.fp32[i] = a.fp32[i] * b.fp32[i] - c.fp32[i]; -} -``` - -## __m128d __lsx_vfmsub_d (__m128d a, __m128d b, __m128d c) - -### Synopsis - -```c++ -__m128d __lsx_vfmsub_d (__m128d a, __m128d b, __m128d c) -#include -Instruction: vfmsub.d vr, vr, vr -CPU Flags: LSX -``` - -### Description - -Compute packed double precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, subtract elements in `c` and store the result in `dst`. - -### Operation - -```c++ -for (int i = 0;i < 2;i++) { - dst.fp64[i] = a.fp64[i] * b.fp64[i] - c.fp64[i]; -} -``` - -## __m128 __lsx_vfnmadd_s (__m128 a, __m128 b, __m128 c) - -### Synopsis - -```c++ -__m128 __lsx_vfnmadd_s (__m128 a, __m128 b, __m128 c) -#include -Instruction: vfnmadd.s vr, vr, vr -CPU Flags: LSX -``` - -### Description - -Compute packed single precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, accumulate to elements in `c` and store the negated result in `dst`. - -### Operation - -```c++ -for (int i = 0;i < 4;i++) { - dst.fp32[i] = -(a.fp32[i] * b.fp32[i] + c.fp32[i]); -} -``` - -## __m128d __lsx_vfnmadd_d (__m128d a, __m128d b, __m128d c) - -### Synopsis - -```c++ -__m128d __lsx_vfnmadd_d (__m128d a, __m128d b, __m128d c) -#include -Instruction: vfnmadd.d vr, vr, vr -CPU Flags: LSX -``` - -### Description - -Compute packed double precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, accumulate to elements in `c` and store the negated result in `dst`. - -### Operation - -```c++ -for (int i = 0;i < 2;i++) { - dst.fp64[i] = (a.fp64[i] * b.fp64[i] + c.fp64[i]); -} -``` - -## __m128 __lsx_vfnmsub_s (__m128 a, __m128 b, __m128 c) - -### Synopsis - -```c++ -__m128 __lsx_vfnmsub_s (__m128 a, __m128 b, __m128 c) -#include -Instruction: vfnmsub.s vr, vr, vr -CPU Flags: LSX -``` - -### Description - -Compute packed single precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, subtract elements in `c` and store the negated result in `dst`. - -### Operation - -```c++ -for (int i = 0;i < 4;i++) { - dst.fp32[i] = -(a.fp32[i] * b.fp32[i] - c.fp32[i]); -} -``` - -## __m128d __lsx_vfnmsub_d (__m128d a, __m128d b, __m128d c) - -### Synopsis - -```c++ -__m128d __lsx_vfnmsub_d (__m128d a, __m128d b, __m128d c) -#include -Instruction: vfnmsub.d vr, vr, vr -CPU Flags: LSX -``` - -### Description - -Compute packed double precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, subtract elements in `c` and store the negated result in `dst`. - -### Operation - -```c++ -for (int i = 0;i < 2;i++) { - dst.fp64[i] = -(a.fp64[i] * b.fp64[i] - c.fp64[i]); -} -``` +{{ vfnmsub_d() }} +{{ vfnmsub_s() }} diff --git a/main.py b/main.py index 29dceb0c..b336fbec 100644 --- a/main.py +++ b/main.py @@ -1457,4 +1457,149 @@ def vstx(): intrinsic=f"void __lsx_vstx (__m128i data, void * addr, long int offset)", instr=f"vstx vr, r, r", desc=f"Write 128-bit data in `data` to memory address `addr + offset`.", + ) + + @env.macro + def vbitsel_v(): + return instruction( + intrinsic=f"__m128i __lsx_vbitsel_v (__m128i a, __m128i b, __m128i c)", + instr=f"vbitsel.v vr, vr, vr", + desc=f"Compute bitwise selection: for each bit position, if the bit in `c` equals to one, copy the bit from `b` to `dst`, otherwise copy from `a`.", + ) + + @env.macro + def vbitseli_b(): + return instruction( + intrinsic=f"__m128i __lsx_vbitseli_b (__m128i a, __m128i b, imm0_255 imm)", + instr=f"vbitseli.b vr, vr, imm", + desc=f"Compute bitwise selection: for each bit position, if the bit in `a` equals to one, copy the bit from `imm` to `dst`, otherwise copy from `b`.", + ) + + @env.macro + def vfcvth_d_s(): + return instruction( + intrinsic=f"__m128d __lsx_vfcvth_d_s (__m128 a)", + instr=f"vfcvth.d.s vr, vr", + desc=f"Convert single precision floating point elements in higher half of `a` to double precision.", + ) + + @env.macro + def vfcvtl_d_s(): + return instruction( + intrinsic=f"__m128d __lsx_vfcvtl_d_s (__m128 a)", + instr=f"vfcvtl.d.s vr, vr", + desc=f"Convert single precision floating point elements in lower half of `a` to double precision.", + ) + + @env.macro + def vfcvt_s_d(): + return instruction( + intrinsic=f"__m128 __lsx_vfcvt_s_d (__m128d a, __m128d b)", + instr=f"vfcvt.s.d vr, vr, vr", + desc=f"Convert double precision floating point elements in `a` and `b` to double precision.", + ) + + @env.macro + def vfcvth_s_h(): + return instruction( + intrinsic=f"__m128 __lsx_vfcvth_s_h (__m128i a)", + instr=f"vfcvth.s.h vr, vr", + desc=f"Convert half precision floating point elements in higher half of `a` to single precision.", + ) + + @env.macro + def vfcvtl_s_h(): + return instruction( + intrinsic=f"__m128 __lsx_vfcvtl_s_h (__m128i a)", + instr=f"vfcvtl.s.h vr, vr", + desc=f"Convert half precision floating point elements in lower half of `a` to single precision.", + ) + + + @env.macro + def vfcvt_h_s(): + return instruction( + intrinsic=f"__m128i __lsx_vfcvt_h_s (__m128 a, __m128 b)", + instr=f"vfcvt.h.s vr, vr, vr", + desc=f"Convert single precision floating point elements in `a` and `b` to half precision.", + ) + + @env.macro + def vfclass_d(): + return instruction( + intrinsic=f"__m128i __lsx_vfclass_d (__m128d a)", + instr=f"vfclass.d vr, vr", + desc=f"Classifiy each double precision floating point elements in `a`.", + ) + + @env.macro + def vfclass_s(): + return instruction( + intrinsic=f"__m128i __lsx_vfclass_s (__m128 a)", + instr=f"vfclass.s vr, vr", + desc=f"Classifiy each single precision floating point elements in `a`.", + ) + + @env.macro + def vfmadd_s(): + return instruction( + intrinsic=f"__m128 __lsx_vfmadd_s (__m128 a, __m128 b, __m128 c)", + instr=f"vfmadd.s vr, vr, vr", + desc=f"Compute packed single precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, accumulate to elements in `c` and store the result in `dst`.", + ) + + @env.macro + def vfmadd_d(): + return instruction( + intrinsic=f"__m128d __lsx_vfmadd_d (__m128d a, __m128d b, __m128d c)", + instr=f"vfmadd.d vr, vr, vr", + desc=f"Compute packed double precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, accumulate to elements in `c` and store the result in `dst`.", + ) + + @env.macro + def vfmsub_s(): + return instruction( + intrinsic=f"__m128 __lsx_vfmsub_s (__m128 a, __m128 b, __m128 c)", + instr=f"vfmsub.s vr, vr, vr", + desc=f"Compute packed single precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, subtract elements in `c` and store the result in `dst`.", + ) + + @env.macro + def vfmsub_d(): + return instruction( + intrinsic=f"__m128d __lsx_vfmsub_d (__m128d a, __m128d b, __m128d c)", + instr=f"vfmsub.d vr, vr, vr", + desc=f"Compute packed double precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, subtract elements in `c` and store the result in `dst`.", + ) + + @env.macro + def vfnmadd_s(): + return instruction( + intrinsic=f"__m128 __lsx_vfnmadd_s (__m128 a, __m128 b, __m128 c)", + instr=f"vfnmadd.s vr, vr, vr", + desc=f"Compute packed single precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, accumulate to elements in `c` and store the negated result in `dst`.", + ) + + @env.macro + def vfnmadd_d(): + return instruction( + intrinsic=f"__m128d __lsx_vfnmadd_d (__m128d a, __m128d b, __m128d c)", + instr=f"vfnmadd.d vr, vr, vr", + desc=f"Compute packed double precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, accumulate to elements in `c` and store the negated result in `dst`.", + ) + + @env.macro + def vfnmsub_s(): + return instruction( + intrinsic=f"__m128 __lsx_vfnmsub_s (__m128 a, __m128 b, __m128 c)", + instr=f"vfnmsub.s vr, vr, vr", + desc=f"Compute packed single precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, subtract elements in `c` and store the negated result in `dst`.", + ) + + @env.macro + def vfnmsub_d(): + return instruction( + intrinsic=f"__m128d __lsx_vfnmsub_d (__m128d a, __m128d b, __m128d c)", + instr=f"vfnmsub.d vr, vr, vr", + desc=f"Compute packed double precision floating point FMA(Fused Multiply-Add): multiply elements in `a` and `b`, subtract elements in `c` and store the negated result in `dst`.", ) \ No newline at end of file