From 8fac972fd8553c0d735cdbed9dc525309d38dae8 Mon Sep 17 00:00:00 2001 From: Jiajie Chen Date: Thu, 14 Dec 2023 11:01:36 +0800 Subject: [PATCH] Add lasx instructions --- code/xvabsd_b.h | 4 + code/xvabsd_bu.h | 4 + code/xvabsd_d.h | 5 + code/xvabsd_du.h | 5 + code/xvabsd_h.h | 4 + code/xvabsd_hu.h | 4 + code/xvabsd_w.h | 4 + code/xvabsd_wu.h | 4 + code/xvadd_q.h | 3 + code/xvadda_b.h | 3 + code/xvadda_d.h | 3 + code/xvadda_h.h | 3 + code/xvadda_w.h | 3 + code/xvand_v.h | 3 + code/xvandi_b.h | 3 + code/xvandn_v.h | 3 + code/xvbsll_v.h | 3 + code/xvbsrl_v.h | 3 + code/xvfclass_d.h | 3 + code/xvfclass_s.h | 3 + code/xvfmadd_d.h | 3 + code/xvfmadd_s.h | 3 + code/xvfmsub_d.h | 3 + code/xvfmsub_s.h | 3 + code/xvfnmadd_d.h | 3 + code/xvfnmadd_s.h | 3 + code/xvfnmsub_d.h | 3 + code/xvfnmsub_s.h | 3 + code/xvinsgr2vr_b.h | 3 + code/xvinsgr2vr_h.h | 3 + code/xvld.h | 1 + code/xvldi.h | 93 +++++++++++ code/xvldrepl_b.h | 4 + code/xvldrepl_d.h | 4 + code/xvldrepl_h.h | 4 + code/xvldrepl_w.h | 4 + code/xvldx.h | 1 + code/xvnor_v.h | 3 + code/xvnori_b.h | 3 + code/xvor_v.h | 3 + code/xvori_b.h | 3 + code/xvorn_v.h | 3 + code/xvpermi_d.h | 4 + code/xvreplvei_b.h | 3 + code/xvreplvei_d.h | 3 + code/xvreplvei_h.h | 3 + code/xvreplvei_w.h | 3 + code/xvsetallnez_b.h | 6 + code/xvsetallnez_d.h | 6 + code/xvsetallnez_h.h | 6 + code/xvsetallnez_w.h | 6 + code/xvsetanyeqz_b.h | 6 + code/xvsetanyeqz_d.h | 6 + code/xvsetanyeqz_h.h | 6 + code/xvsetanyeqz_w.h | 6 + code/xvseteqz_v.h | 1 + code/xvsetnez_v.h | 1 + code/xvshuf4i_d.h | 6 + code/xvshuf_b.h | 7 + code/xvshuf_d.h | 7 + code/xvshuf_h.h | 7 + code/xvshuf_w.h | 7 + code/xvst.h | 1 + code/xvstx.h | 1 + code/xvsub_q.h | 3 + code/xvxor_v.h | 3 + code/xvxori_b.h | 3 + docs/lasx/branch.md | 12 ++ docs/lasx/float_comparison.md | 24 +++ docs/lasx/float_computation.md | 43 +++++ docs/lasx/float_misc.md | 15 ++ docs/lasx/fma.md | 13 ++ docs/lasx/integer_comparison.md | 47 ++++++ docs/lasx/integer_computation.md | 270 +++++++++++++++++++++++++++++++ docs/lasx/logical.md | 12 ++ docs/lasx/memory.md | 17 ++ docs/lasx/misc.md | 96 +++++++++++ docs/lasx/permutation.md | 3 + docs/lasx/shift.md | 171 ++++++++++++++++++++ docs/lasx/shuffling.md | 12 ++ main.py | 28 ++-- 81 files changed, 1091 insertions(+), 9 deletions(-) create mode 100644 code/xvabsd_b.h create mode 100644 code/xvabsd_bu.h create mode 100644 code/xvabsd_d.h create mode 100644 code/xvabsd_du.h create mode 100644 code/xvabsd_h.h create mode 100644 code/xvabsd_hu.h create mode 100644 code/xvabsd_w.h create mode 100644 code/xvabsd_wu.h create mode 100644 code/xvadd_q.h create mode 100644 code/xvadda_b.h create mode 100644 code/xvadda_d.h create mode 100644 code/xvadda_h.h create mode 100644 code/xvadda_w.h create mode 100644 code/xvand_v.h create mode 100644 code/xvandi_b.h create mode 100644 code/xvandn_v.h create mode 100644 code/xvbsll_v.h create mode 100644 code/xvbsrl_v.h create mode 100644 code/xvfclass_d.h create mode 100644 code/xvfclass_s.h create mode 100644 code/xvfmadd_d.h create mode 100644 code/xvfmadd_s.h create mode 100644 code/xvfmsub_d.h create mode 100644 code/xvfmsub_s.h create mode 100644 code/xvfnmadd_d.h create mode 100644 code/xvfnmadd_s.h create mode 100644 code/xvfnmsub_d.h create mode 100644 code/xvfnmsub_s.h create mode 100644 code/xvinsgr2vr_b.h create mode 100644 code/xvinsgr2vr_h.h create mode 100644 code/xvld.h create mode 100644 code/xvldi.h create mode 100644 code/xvldrepl_b.h create mode 100644 code/xvldrepl_d.h create mode 100644 code/xvldrepl_h.h create mode 100644 code/xvldrepl_w.h create mode 100644 code/xvldx.h create mode 100644 code/xvnor_v.h create mode 100644 code/xvnori_b.h create mode 100644 code/xvor_v.h create mode 100644 code/xvori_b.h create mode 100644 code/xvorn_v.h create mode 100644 code/xvpermi_d.h create mode 100644 code/xvreplvei_b.h create mode 100644 code/xvreplvei_d.h create mode 100644 code/xvreplvei_h.h create mode 100644 code/xvreplvei_w.h create mode 100644 code/xvsetallnez_b.h create mode 100644 code/xvsetallnez_d.h create mode 100644 code/xvsetallnez_h.h create mode 100644 code/xvsetallnez_w.h create mode 100644 code/xvsetanyeqz_b.h create mode 100644 code/xvsetanyeqz_d.h create mode 100644 code/xvsetanyeqz_h.h create mode 100644 code/xvsetanyeqz_w.h create mode 100644 code/xvseteqz_v.h create mode 100644 code/xvsetnez_v.h create mode 100644 code/xvshuf4i_d.h create mode 100644 code/xvshuf_b.h create mode 100644 code/xvshuf_d.h create mode 100644 code/xvshuf_h.h create mode 100644 code/xvshuf_w.h create mode 100644 code/xvst.h create mode 100644 code/xvstx.h create mode 100644 code/xvsub_q.h create mode 100644 code/xvxor_v.h create mode 100644 code/xvxori_b.h create mode 100644 docs/lasx/branch.md create mode 100644 docs/lasx/float_comparison.md create mode 100644 docs/lasx/float_computation.md create mode 100644 docs/lasx/float_misc.md create mode 100644 docs/lasx/fma.md create mode 100644 docs/lasx/integer_comparison.md create mode 100644 docs/lasx/integer_computation.md create mode 100644 docs/lasx/logical.md create mode 100644 docs/lasx/memory.md create mode 100644 docs/lasx/misc.md create mode 100644 docs/lasx/permutation.md create mode 100644 docs/lasx/shift.md create mode 100644 docs/lasx/shuffling.md diff --git a/code/xvabsd_b.h b/code/xvabsd_b.h new file mode 100644 index 00000000..3e7fce1f --- /dev/null +++ b/code/xvabsd_b.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 32; i++) { + dst.byte[i] = ((s8)a.byte[i] > (s8)b.byte[i]) ? (a.byte[i] - b.byte[i]) + : (b.byte[i] - a.byte[i]); +} diff --git a/code/xvabsd_bu.h b/code/xvabsd_bu.h new file mode 100644 index 00000000..15d6ff86 --- /dev/null +++ b/code/xvabsd_bu.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 32; i++) { + dst.byte[i] = ((u8)a.byte[i] > (u8)b.byte[i]) ? (a.byte[i] - b.byte[i]) + : (b.byte[i] - a.byte[i]); +} diff --git a/code/xvabsd_d.h b/code/xvabsd_d.h new file mode 100644 index 00000000..e68642df --- /dev/null +++ b/code/xvabsd_d.h @@ -0,0 +1,5 @@ +for (int i = 0; i < 4; i++) { + dst.dword[i] = ((s64)a.dword[i] > (s64)b.dword[i]) + ? (a.dword[i] - b.dword[i]) + : (b.dword[i] - a.dword[i]); +} diff --git a/code/xvabsd_du.h b/code/xvabsd_du.h new file mode 100644 index 00000000..63ca39d0 --- /dev/null +++ b/code/xvabsd_du.h @@ -0,0 +1,5 @@ +for (int i = 0; i < 4; i++) { + dst.dword[i] = ((u64)a.dword[i] > (u64)b.dword[i]) + ? (a.dword[i] - b.dword[i]) + : (b.dword[i] - a.dword[i]); +} diff --git a/code/xvabsd_h.h b/code/xvabsd_h.h new file mode 100644 index 00000000..0f3e6db9 --- /dev/null +++ b/code/xvabsd_h.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 16; i++) { + dst.half[i] = ((s16)a.half[i] > (s16)b.half[i]) ? (a.half[i] - b.half[i]) + : (b.half[i] - a.half[i]); +} diff --git a/code/xvabsd_hu.h b/code/xvabsd_hu.h new file mode 100644 index 00000000..46fb3705 --- /dev/null +++ b/code/xvabsd_hu.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 16; i++) { + dst.half[i] = ((u16)a.half[i] > (u16)b.half[i]) ? (a.half[i] - b.half[i]) + : (b.half[i] - a.half[i]); +} diff --git a/code/xvabsd_w.h b/code/xvabsd_w.h new file mode 100644 index 00000000..46fecba2 --- /dev/null +++ b/code/xvabsd_w.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 8; i++) { + dst.word[i] = ((s32)a.word[i] > (s32)b.word[i]) ? (a.word[i] - b.word[i]) + : (b.word[i] - a.word[i]); +} diff --git a/code/xvabsd_wu.h b/code/xvabsd_wu.h new file mode 100644 index 00000000..f17cb37d --- /dev/null +++ b/code/xvabsd_wu.h @@ -0,0 +1,4 @@ +for (int i = 0; i < 8; i++) { + dst.word[i] = ((u32)a.word[i] > (u32)b.word[i]) ? (a.word[i] - b.word[i]) + : (b.word[i] - a.word[i]); +} diff --git a/code/xvadd_q.h b/code/xvadd_q.h new file mode 100644 index 00000000..b406be11 --- /dev/null +++ b/code/xvadd_q.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 2;i++) { + dst.qword[i] = a.qword[i] + b.qword[i]; +} diff --git a/code/xvadda_b.h b/code/xvadda_b.h new file mode 100644 index 00000000..f175d8e4 --- /dev/null +++ b/code/xvadda_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 32; i++) { + dst.byte[i] = abs((s8)a.byte[i]) + abs((s8)b.byte[i]); +} diff --git a/code/xvadda_d.h b/code/xvadda_d.h new file mode 100644 index 00000000..274918e0 --- /dev/null +++ b/code/xvadda_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.dword[i] = abs((s64)a.dword[i]) + abs((s64)b.dword[i]); +} diff --git a/code/xvadda_h.h b/code/xvadda_h.h new file mode 100644 index 00000000..85aa238a --- /dev/null +++ b/code/xvadda_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.half[i] = abs((s16)a.half[i]) + abs((s16)b.half[i]); +} diff --git a/code/xvadda_w.h b/code/xvadda_w.h new file mode 100644 index 00000000..e94ad153 --- /dev/null +++ b/code/xvadda_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.word[i] = abs((s32)a.word[i]) + abs((s32)b.word[i]); +} diff --git a/code/xvand_v.h b/code/xvand_v.h new file mode 100644 index 00000000..d8d65019 --- /dev/null +++ b/code/xvand_v.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.dword[i] = a.dword[i] & b.dword[i]; +} \ No newline at end of file diff --git a/code/xvandi_b.h b/code/xvandi_b.h new file mode 100644 index 00000000..5a8ef7c2 --- /dev/null +++ b/code/xvandi_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 32; i++) { + dst.byte[i] = a.byte[i] & imm; +} \ No newline at end of file diff --git a/code/xvandn_v.h b/code/xvandn_v.h new file mode 100644 index 00000000..b2a9a2ff --- /dev/null +++ b/code/xvandn_v.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.dword[i] = b.dword[i] & (~a.dword[i]); +} \ No newline at end of file diff --git a/code/xvbsll_v.h b/code/xvbsll_v.h new file mode 100644 index 00000000..71b4ff2e --- /dev/null +++ b/code/xvbsll_v.h @@ -0,0 +1,3 @@ +int shift = (imm * 8) % 128; +dst.qword[0] = (u128)a.qword[0] << shift; +dst.qword[1] = (u128)a.qword[1] << shift; \ No newline at end of file diff --git a/code/xvbsrl_v.h b/code/xvbsrl_v.h new file mode 100644 index 00000000..3db1a3fa --- /dev/null +++ b/code/xvbsrl_v.h @@ -0,0 +1,3 @@ +int shift = (imm * 8) % 128; +dst.qword[0] = (u128)a.qword[0] >> shift; +dst.qword[1] = (u128)a.qword[1] >> shift; \ No newline at end of file diff --git a/code/xvfclass_d.h b/code/xvfclass_d.h new file mode 100644 index 00000000..60502ac8 --- /dev/null +++ b/code/xvfclass_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.dword[i] = fp_classify(a.fp64[i]); +} \ No newline at end of file diff --git a/code/xvfclass_s.h b/code/xvfclass_s.h new file mode 100644 index 00000000..ecdde8ae --- /dev/null +++ b/code/xvfclass_s.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 2; i++) { + dst.word[i] = fp_classify(a.fp32[i]); +} \ No newline at end of file diff --git a/code/xvfmadd_d.h b/code/xvfmadd_d.h new file mode 100644 index 00000000..6b4d858e --- /dev/null +++ b/code/xvfmadd_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.fp64[i] = a.fp64[i] * b.fp64[i] + c.fp64[i]; +} \ No newline at end of file diff --git a/code/xvfmadd_s.h b/code/xvfmadd_s.h new file mode 100644 index 00000000..5a5f245b --- /dev/null +++ b/code/xvfmadd_s.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.fp32[i] = a.fp32[i] * b.fp32[i] + c.fp32[i]; +} \ No newline at end of file diff --git a/code/xvfmsub_d.h b/code/xvfmsub_d.h new file mode 100644 index 00000000..5f77ee1c --- /dev/null +++ b/code/xvfmsub_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.fp64[i] = a.fp64[i] * b.fp64[i] - c.fp64[i]; +} \ No newline at end of file diff --git a/code/xvfmsub_s.h b/code/xvfmsub_s.h new file mode 100644 index 00000000..77379f33 --- /dev/null +++ b/code/xvfmsub_s.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.fp32[i] = a.fp32[i] * b.fp32[i] - c.fp32[i]; +} \ No newline at end of file diff --git a/code/xvfnmadd_d.h b/code/xvfnmadd_d.h new file mode 100644 index 00000000..9022eca2 --- /dev/null +++ b/code/xvfnmadd_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.fp64[i] = (a.fp64[i] * b.fp64[i] + c.fp64[i]); +} \ No newline at end of file diff --git a/code/xvfnmadd_s.h b/code/xvfnmadd_s.h new file mode 100644 index 00000000..5537629e --- /dev/null +++ b/code/xvfnmadd_s.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.fp32[i] = -(a.fp32[i] * b.fp32[i] + c.fp32[i]); +} \ No newline at end of file diff --git a/code/xvfnmsub_d.h b/code/xvfnmsub_d.h new file mode 100644 index 00000000..9e95fc98 --- /dev/null +++ b/code/xvfnmsub_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.fp64[i] = -(a.fp64[i] * b.fp64[i] - c.fp64[i]); +} \ No newline at end of file diff --git a/code/xvfnmsub_s.h b/code/xvfnmsub_s.h new file mode 100644 index 00000000..cf9fab41 --- /dev/null +++ b/code/xvfnmsub_s.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.fp32[i] = -(a.fp32[i] * b.fp32[i] - c.fp32[i]); +} \ No newline at end of file diff --git a/code/xvinsgr2vr_b.h b/code/xvinsgr2vr_b.h new file mode 100644 index 00000000..62a2e91c --- /dev/null +++ b/code/xvinsgr2vr_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 32; i++) { + dst.byte[i] = (i == imm) ? b : a.byte[i]; +} diff --git a/code/xvinsgr2vr_h.h b/code/xvinsgr2vr_h.h new file mode 100644 index 00000000..2bbe0359 --- /dev/null +++ b/code/xvinsgr2vr_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.half[i] = (i == imm) ? b : a.half[i]; +} diff --git a/code/xvld.h b/code/xvld.h new file mode 100644 index 00000000..5b664763 --- /dev/null +++ b/code/xvld.h @@ -0,0 +1 @@ +dst = memory_load(256, addr + offset); \ No newline at end of file diff --git a/code/xvldi.h b/code/xvldi.h new file mode 100644 index 00000000..54764a92 --- /dev/null +++ b/code/xvldi.h @@ -0,0 +1,93 @@ +u64 imm12_10 = (imm >> 10) & 0b111; +u64 imm12_8 = (imm >> 8) & 0b11111; +u64 imm9_0 = imm & 0x3FF; +s64 simm9_0 = ((s64)imm9_0 << 54) >> 54; +u64 imm7_0 = imm & 0xFF; +u64 imm7 = (imm >> 7) & 0x1; +u64 imm6 = (imm >> 6) & 0x1; +u64 imm5 = (imm >> 5) & 0x1; +u64 imm5_0 = imm & 0x3F; +u64 imm4 = (imm >> 4) & 0x1; +u64 imm3 = (imm >> 3) & 0x1; +u64 imm2 = (imm >> 2) & 0x1; +u64 imm1 = (imm >> 1) & 0x1; +u64 imm0 = imm & 0x1; + +u64 broadcast_value; +u64 broadcast_width; +if (imm12_10 == 0b000) { + broadcast_value = imm7_0; + broadcast_width = 8; +} else if (imm12_10 == 0b001) { + broadcast_value = simm9_0; + broadcast_width = 16; +} else if (imm12_10 == 0b010) { + broadcast_value = simm9_0; + broadcast_width = 32; +} else if (imm12_10 == 0b011) { + broadcast_value = simm9_0; + broadcast_width = 64; +} else if (imm12_8 == 0b10000) { + broadcast_value = imm7_0; + broadcast_width = 32; +} else if (imm12_8 == 0b10001) { + broadcast_value = imm7_0 << 8; + broadcast_width = 32; +} else if (imm12_8 == 0b10010) { + broadcast_value = imm7_0 << 16; + broadcast_width = 32; +} else if (imm12_8 == 0b10011) { + broadcast_value = imm7_0 << 24; + broadcast_width = 32; +} else if (imm12_8 == 0b10100) { + broadcast_value = imm7_0; + broadcast_width = 16; +} else if (imm12_8 == 0b10101) { + broadcast_value = imm7_0 << 8; + broadcast_width = 16; +} else if (imm12_8 == 0b10110) { + broadcast_value = (imm7_0 << 8) | 0xFF; + broadcast_width = 32; +} else if (imm12_8 == 0b10111) { + broadcast_value = (imm7_0 << 16) | 0xFFFF; + broadcast_width = 32; +} else if (imm12_8 == 0b11000) { + broadcast_value = imm7_0; + broadcast_width = 8; +} else if (imm12_8 == 0b11001) { + broadcast_value = imm0 * 0xFF + imm1 * 0xFF00 + imm2 * 0xFF0000 + + imm3 * 0xFF000000 + imm4 * 0xFF00000000 + + imm5 * 0xFF0000000000 + imm6 * 0xFF000000000000 + + imm7 * 0xFF00000000000000; + broadcast_width = 64; +} else if (imm12_8 == 0b11010) { + broadcast_value = (imm7 << 31) | ((1 - imm6) << 30) | ((imm6 * 0x1F) << 25) | + (imm5_0 << 19); + broadcast_width = 32; +} else if (imm12_8 == 0b11011) { + broadcast_value = (imm7 << 31) | ((1 - imm6) << 30) | ((imm6 * 0x1F) << 25) | + (imm5_0 << 19); + broadcast_width = 64; +} else if (imm12_8 == 0b11100) { + broadcast_value = (imm7 << 63) | ((1 - imm6) << 62) | ((imm6 * 0xFF) << 54) | + (imm5_0 << 48); + broadcast_width = 64; +} + +if (broadcast_width == 8) { + for (int i = 0; i < 32; i++) { + dst.byte[i] = broadcast_value; + } +} else if (broadcast_width == 16) { + for (int i = 0; i < 16; i++) { + dst.half[i] = broadcast_value; + } +} else if (broadcast_width == 32) { + for (int i = 0; i < 8; i++) { + dst.word[i] = broadcast_value; + } +} else if (broadcast_width == 64) { + for (int i = 0; i < 4; i++) { + dst.dword[i] = broadcast_value; + } +} \ No newline at end of file diff --git a/code/xvldrepl_b.h b/code/xvldrepl_b.h new file mode 100644 index 00000000..32a2c8cc --- /dev/null +++ b/code/xvldrepl_b.h @@ -0,0 +1,4 @@ +u8 data = memory_load(8, addr + offset); +for (int i = 0; i < 32; i++) { + dst.byte[i] = data; +} diff --git a/code/xvldrepl_d.h b/code/xvldrepl_d.h new file mode 100644 index 00000000..6ac384a8 --- /dev/null +++ b/code/xvldrepl_d.h @@ -0,0 +1,4 @@ +u64 data = memory_load(64, addr + (offset << 3)); +for (int i = 0; i < 4; i++) { + dst.dword[i] = data; +} diff --git a/code/xvldrepl_h.h b/code/xvldrepl_h.h new file mode 100644 index 00000000..89e813a0 --- /dev/null +++ b/code/xvldrepl_h.h @@ -0,0 +1,4 @@ +u16 data = memory_load(16, addr + (offset << 1)); +for (int i = 0; i < 16; i++) { + dst.half[i] = data; +} diff --git a/code/xvldrepl_w.h b/code/xvldrepl_w.h new file mode 100644 index 00000000..adc76d7e --- /dev/null +++ b/code/xvldrepl_w.h @@ -0,0 +1,4 @@ +u32 data = memory_load(32, addr + (offset << 2)); +for (int i = 0; i < 8; i++) { + dst.word[i] = data; +} \ No newline at end of file diff --git a/code/xvldx.h b/code/xvldx.h new file mode 100644 index 00000000..5b664763 --- /dev/null +++ b/code/xvldx.h @@ -0,0 +1 @@ +dst = memory_load(256, addr + offset); \ No newline at end of file diff --git a/code/xvnor_v.h b/code/xvnor_v.h new file mode 100644 index 00000000..b98d1426 --- /dev/null +++ b/code/xvnor_v.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.dword[i] = ~(a.dword[i] | b.dword[i]); +} \ No newline at end of file diff --git a/code/xvnori_b.h b/code/xvnori_b.h new file mode 100644 index 00000000..05f957aa --- /dev/null +++ b/code/xvnori_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 32; i++) { + dst.byte[i] = ~(a.byte[i] | imm); +} \ No newline at end of file diff --git a/code/xvor_v.h b/code/xvor_v.h new file mode 100644 index 00000000..f53defce --- /dev/null +++ b/code/xvor_v.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.dword[i] = a.dword[i] | b.dword[i]; +} \ No newline at end of file diff --git a/code/xvori_b.h b/code/xvori_b.h new file mode 100644 index 00000000..7ccd948e --- /dev/null +++ b/code/xvori_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 32; i++) { + dst.byte[i] = a.byte[i] | imm; +} \ No newline at end of file diff --git a/code/xvorn_v.h b/code/xvorn_v.h new file mode 100644 index 00000000..aa7b7139 --- /dev/null +++ b/code/xvorn_v.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.dword[i] = a.dword[i] | (~b.dword[i]); +} \ No newline at end of file diff --git a/code/xvpermi_d.h b/code/xvpermi_d.h new file mode 100644 index 00000000..9f056c56 --- /dev/null +++ b/code/xvpermi_d.h @@ -0,0 +1,4 @@ +dst.dword[0] = a.dword[imm & 0x3]; +dst.dword[1] = a.dword[(imm >> 2) & 0x3]; +dst.dword[2] = a.dword[(imm >> 4) & 0x3]; +dst.dword[3] = a.dword[(imm >> 6) & 0x3]; diff --git a/code/xvreplvei_b.h b/code/xvreplvei_b.h new file mode 100644 index 00000000..965b15ce --- /dev/null +++ b/code/xvreplvei_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 32; i++) { + dst.byte[i] = a.byte[idx]; +} diff --git a/code/xvreplvei_d.h b/code/xvreplvei_d.h new file mode 100644 index 00000000..fbb3bc0c --- /dev/null +++ b/code/xvreplvei_d.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.dword[i] = a.dword[idx]; +} diff --git a/code/xvreplvei_h.h b/code/xvreplvei_h.h new file mode 100644 index 00000000..979487d1 --- /dev/null +++ b/code/xvreplvei_h.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 16; i++) { + dst.half[i] = a.half[idx]; +} diff --git a/code/xvreplvei_w.h b/code/xvreplvei_w.h new file mode 100644 index 00000000..110f6304 --- /dev/null +++ b/code/xvreplvei_w.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 8; i++) { + dst.word[i] = a.word[idx]; +} diff --git a/code/xvsetallnez_b.h b/code/xvsetallnez_b.h new file mode 100644 index 00000000..1ea16583 --- /dev/null +++ b/code/xvsetallnez_b.h @@ -0,0 +1,6 @@ +dst = 1; +for (int i = 0; i < 32; i++) { + if (a.byte[i] == 0) { + dst = 0; + } +} \ No newline at end of file diff --git a/code/xvsetallnez_d.h b/code/xvsetallnez_d.h new file mode 100644 index 00000000..0e464078 --- /dev/null +++ b/code/xvsetallnez_d.h @@ -0,0 +1,6 @@ +dst = 1; +for (int i = 0; i < 4; i++) { + if (a.dword[i] == 0) { + dst = 0; + } +} diff --git a/code/xvsetallnez_h.h b/code/xvsetallnez_h.h new file mode 100644 index 00000000..a7b33f5f --- /dev/null +++ b/code/xvsetallnez_h.h @@ -0,0 +1,6 @@ +dst = 1; +for (int i = 0; i < 16; i++) { + if (a.half[i] == 0) { + dst = 0; + } +} \ No newline at end of file diff --git a/code/xvsetallnez_w.h b/code/xvsetallnez_w.h new file mode 100644 index 00000000..f1bfe3a5 --- /dev/null +++ b/code/xvsetallnez_w.h @@ -0,0 +1,6 @@ +dst = 1; +for (int i = 0; i < 8; i++) { + if (a.word[i] == 0) { + dst = 0; + } +} \ No newline at end of file diff --git a/code/xvsetanyeqz_b.h b/code/xvsetanyeqz_b.h new file mode 100644 index 00000000..cb7d6fe6 --- /dev/null +++ b/code/xvsetanyeqz_b.h @@ -0,0 +1,6 @@ +dst = 0; +for (int i = 0; i < 32; i++) { + if (a.byte[i] == 0) { + dst = 1; + } +} \ No newline at end of file diff --git a/code/xvsetanyeqz_d.h b/code/xvsetanyeqz_d.h new file mode 100644 index 00000000..d82a0fed --- /dev/null +++ b/code/xvsetanyeqz_d.h @@ -0,0 +1,6 @@ +dst = 0; +for (int i = 0; i < 4; i++) { + if (a.dword[i] == 0) { + dst = 1; + } +} \ No newline at end of file diff --git a/code/xvsetanyeqz_h.h b/code/xvsetanyeqz_h.h new file mode 100644 index 00000000..d0b99e3c --- /dev/null +++ b/code/xvsetanyeqz_h.h @@ -0,0 +1,6 @@ +dst = 0; +for (int i = 0; i < 16; i++) { + if (a.half[i] == 0) { + dst = 1; + } +} \ No newline at end of file diff --git a/code/xvsetanyeqz_w.h b/code/xvsetanyeqz_w.h new file mode 100644 index 00000000..138c9a85 --- /dev/null +++ b/code/xvsetanyeqz_w.h @@ -0,0 +1,6 @@ +dst = 0; +for (int i = 0; i < 8; i++) { + if (a.word[i] == 0) { + dst = 1; + } +} \ No newline at end of file diff --git a/code/xvseteqz_v.h b/code/xvseteqz_v.h new file mode 100644 index 00000000..8af4af59 --- /dev/null +++ b/code/xvseteqz_v.h @@ -0,0 +1 @@ +dst = a.qword[0] == 0 && a.qword[1] == 0; \ No newline at end of file diff --git a/code/xvsetnez_v.h b/code/xvsetnez_v.h new file mode 100644 index 00000000..7cb492d6 --- /dev/null +++ b/code/xvsetnez_v.h @@ -0,0 +1 @@ +dst = a.qword[0] != 0 || a.qword[1] != 0; \ No newline at end of file diff --git a/code/xvshuf4i_d.h b/code/xvshuf4i_d.h new file mode 100644 index 00000000..f0460885 --- /dev/null +++ b/code/xvshuf4i_d.h @@ -0,0 +1,6 @@ +dst.dword[0] = (imm & 2) ? b.dword[(imm & 1)] : a.dword[(imm & 1)]; +dst.dword[1] = + (imm & 8) ? b.dword[((imm >> 2) & 1)] : a.dword[((imm >> 2) & 1)]; +dst.dword[2] = (imm & 2) ? b.dword[(imm & 1) + 2] : a.dword[(imm & 1) + 2]; +dst.dword[3] = + (imm & 8) ? b.dword[((imm >> 2) & 1) + 2] : a.dword[((imm >> 2) & 1) + 2]; \ No newline at end of file diff --git a/code/xvshuf_b.h b/code/xvshuf_b.h new file mode 100644 index 00000000..ed70425d --- /dev/null +++ b/code/xvshuf_b.h @@ -0,0 +1,7 @@ +for (int i = 0; i < 32; i++) { + if ((c.byte[i] % 64) < 32) { + dst.byte[i] = b.byte[c.byte[i] % 32]; + } else { + dst.byte[i] = a.byte[c.byte[i] % 32]; + } +} \ No newline at end of file diff --git a/code/xvshuf_d.h b/code/xvshuf_d.h new file mode 100644 index 00000000..62067197 --- /dev/null +++ b/code/xvshuf_d.h @@ -0,0 +1,7 @@ +for (int i = 0; i < 4; i++) { + if ((a.dword[i] % 8) < 4) { + dst.dword[i] = c.dword[a.dword[i] % 4]; + } else { + dst.dword[i] = b.dword[a.dword[i] % 4]; + } +} \ No newline at end of file diff --git a/code/xvshuf_h.h b/code/xvshuf_h.h new file mode 100644 index 00000000..0a337092 --- /dev/null +++ b/code/xvshuf_h.h @@ -0,0 +1,7 @@ +for (int i = 0; i < 16; i++) { + if ((a.half[i] % 32) < 16) { + dst.half[i] = c.half[a.half[i] % 16]; + } else { + dst.half[i] = b.half[a.half[i] % 16]; + } +} \ No newline at end of file diff --git a/code/xvshuf_w.h b/code/xvshuf_w.h new file mode 100644 index 00000000..3ba2b866 --- /dev/null +++ b/code/xvshuf_w.h @@ -0,0 +1,7 @@ +for (int i = 0; i < 8; i++) { + if ((a.word[i] % 16) < 8) { + dst.word[i] = c.word[a.word[i] % 8]; + } else { + dst.word[i] = b.word[a.word[i] % 8]; + } +} \ No newline at end of file diff --git a/code/xvst.h b/code/xvst.h new file mode 100644 index 00000000..ffc2bb61 --- /dev/null +++ b/code/xvst.h @@ -0,0 +1 @@ +memory_store(256, data, addr + offset); \ No newline at end of file diff --git a/code/xvstx.h b/code/xvstx.h new file mode 100644 index 00000000..ffc2bb61 --- /dev/null +++ b/code/xvstx.h @@ -0,0 +1 @@ +memory_store(256, data, addr + offset); \ No newline at end of file diff --git a/code/xvsub_q.h b/code/xvsub_q.h new file mode 100644 index 00000000..3c00c47f --- /dev/null +++ b/code/xvsub_q.h @@ -0,0 +1,3 @@ +for (int i = 0;i < 2;i++) { + dst.qword[i] = a.qword[i] - b.qword[i]; +} diff --git a/code/xvxor_v.h b/code/xvxor_v.h new file mode 100644 index 00000000..38e0c5de --- /dev/null +++ b/code/xvxor_v.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 4; i++) { + dst.dword[i] = a.dword[i] ^ b.dword[i]; +} \ No newline at end of file diff --git a/code/xvxori_b.h b/code/xvxori_b.h new file mode 100644 index 00000000..89e3e36b --- /dev/null +++ b/code/xvxori_b.h @@ -0,0 +1,3 @@ +for (int i = 0; i < 32; i++) { + dst.byte[i] = a.byte[i] ^ imm; +} \ No newline at end of file diff --git a/docs/lasx/branch.md b/docs/lasx/branch.md new file mode 100644 index 00000000..77c5186d --- /dev/null +++ b/docs/lasx/branch.md @@ -0,0 +1,12 @@ +# Branch + +{{ xbz_v() }} +{{ xbnz_v() }} +{{ xbz('b') }} +{{ xbz('h') }} +{{ xbz('w') }} +{{ xbz('d') }} +{{ xbnz('b') }} +{{ xbnz('h') }} +{{ xbnz('w') }} +{{ xbnz('d') }} \ No newline at end of file diff --git a/docs/lasx/float_comparison.md b/docs/lasx/float_comparison.md new file mode 100644 index 00000000..447cf1c3 --- /dev/null +++ b/docs/lasx/float_comparison.md @@ -0,0 +1,24 @@ +# Floating Point Comparison + +{{ xvfcmp('caf') }} +{{ xvfcmp('ceq') }} +{{ xvfcmp('cle') }} +{{ xvfcmp('clt') }} +{{ xvfcmp('cne') }} +{{ xvfcmp('cor') }} +{{ xvfcmp('cueq') }} +{{ xvfcmp('cule') }} +{{ xvfcmp('cult') }} +{{ xvfcmp('cun') }} +{{ xvfcmp('cune') }} +{{ xvfcmp('saf') }} +{{ xvfcmp('seq') }} +{{ xvfcmp('sle') }} +{{ xvfcmp('slt') }} +{{ xvfcmp('sne') }} +{{ xvfcmp('sor') }} +{{ xvfcmp('sueq') }} +{{ xvfcmp('sule') }} +{{ xvfcmp('sult') }} +{{ xvfcmp('sun') }} +{{ xvfcmp('sune') }} diff --git a/docs/lasx/float_computation.md b/docs/lasx/float_computation.md new file mode 100644 index 00000000..ba221a91 --- /dev/null +++ b/docs/lasx/float_computation.md @@ -0,0 +1,43 @@ +# Floating Point Computation + +{{ xvfadd('s') }} +{{ xvfadd('d') }} + +{{ xvfdiv('s') }} +{{ xvfdiv('d') }} + +{{ xvfmax('s') }} +{{ xvfmax('d') }} + +{{ xvfmaxa('s') }} +{{ xvfmaxa('d') }} + +{{ xvfmin('s') }} +{{ xvfmin('d') }} + +{{ xvfmina('s') }} +{{ xvfmina('d') }} + +{{ xvfmul('s') }} +{{ xvfmul('d') }} + +{{ xvfsub('s') }} +{{ xvfsub('d') }} + +{{ xvfop('2-based logarithm', 'logb', 's') }} +{{ xvfop('2-based logarithm', 'logb', 'd') }} + +{{ xvfop('square root', 'sqrt', 's') }} +{{ xvfop('square root', 'sqrt', 'd') }} + +{{ xvfop('reciprocal of square root', 'rsqrt', 's') }} +{{ xvfop('reciprocal of square root', 'rsqrt', 'd') }} + +{{ xvfop('reciprocal', 'recip', 's') }} +{{ xvfop('reciprocal', 'recip', 'd') }} + +{{ xvfop('estimated reciprocal of square root', 'rsqrte', 's') }} +{{ xvfop('estimated reciprocal of square root', 'rsqrte', 'd') }} + +{{ xvfop('estimated reciprocal', 'recipe', 's') }} +{{ xvfop('estimated reciprocal', 'recipe', 'd') }} \ No newline at end of file diff --git a/docs/lasx/float_misc.md b/docs/lasx/float_misc.md new file mode 100644 index 00000000..94f66679 --- /dev/null +++ b/docs/lasx/float_misc.md @@ -0,0 +1,15 @@ +# Floating Point Misc + +{{ xvfclass_d() }} +{{ xvfclass_s() }} + +{{ xvfrint('', 's') }} +{{ xvfrint('', 'd') }} +{{ xvfrint('rp', 's') }} +{{ xvfrint('rp', 'd') }} +{{ xvfrint('rm', 's') }} +{{ xvfrint('rm', 'd') }} +{{ xvfrint('rz', 's') }} +{{ xvfrint('rz', 'd') }} +{{ xvfrint('rne', 's') }} +{{ xvfrint('rne', 'd') }} diff --git a/docs/lasx/fma.md b/docs/lasx/fma.md new file mode 100644 index 00000000..c6c1a0a0 --- /dev/null +++ b/docs/lasx/fma.md @@ -0,0 +1,13 @@ +# Fused Multiply-Add + +{{ xvfmadd_d() }} +{{ xvfmadd_s() }} + +{{ xvfmsub_d() }} +{{ xvfmsub_s() }} + +{{ xvfnmadd_d() }} +{{ xvfnmadd_s() }} + +{{ xvfnmsub_d() }} +{{ xvfnmsub_s() }} diff --git a/docs/lasx/integer_comparison.md b/docs/lasx/integer_comparison.md new file mode 100644 index 00000000..5ad7ec87 --- /dev/null +++ b/docs/lasx/integer_comparison.md @@ -0,0 +1,47 @@ +# Integer Comparison + +{{ xvseq('b') }} +{{ xvseq('h') }} +{{ xvseq('w') }} +{{ xvseq('d') }} + +{{ xvseqi('b') }} +{{ xvseqi('h') }} +{{ xvseqi('w') }} +{{ xvseqi('d') }} + +{{ xvslt('b') }} +{{ xvslt('bu') }} +{{ xvslt('h') }} +{{ xvslt('hu') }} +{{ xvslt('w') }} +{{ xvslt('wu') }} +{{ xvslt('d') }} +{{ xvslt('du') }} + +{{ xvslti('b') }} +{{ xvslti('bu') }} +{{ xvslti('h') }} +{{ xvslti('hu') }} +{{ xvslti('w') }} +{{ xvslti('wu') }} +{{ xvslti('d') }} +{{ xvslti('du') }} + +{{ xvsle('b') }} +{{ xvsle('bu') }} +{{ xvsle('h') }} +{{ xvsle('hu') }} +{{ xvsle('w') }} +{{ xvsle('wu') }} +{{ xvsle('d') }} +{{ xvsle('du') }} + +{{ xvslei('b') }} +{{ xvslei('bu') }} +{{ xvslei('h') }} +{{ xvslei('hu') }} +{{ xvslei('w') }} +{{ xvslei('wu') }} +{{ xvslei('d') }} +{{ xvslei('du') }} \ No newline at end of file diff --git a/docs/lasx/integer_computation.md b/docs/lasx/integer_computation.md new file mode 100644 index 00000000..6e704562 --- /dev/null +++ b/docs/lasx/integer_computation.md @@ -0,0 +1,270 @@ +# Integer Computation + +{{ xvadd('b') }} +{{ xvadd('h') }} +{{ xvadd('w') }} +{{ xvadd('d') }} +{{ xvadd('q') }} + +{{ xvabsd('b') }} +{{ xvabsd('bu') }} +{{ xvabsd('h') }} +{{ xvabsd('hu') }} +{{ xvabsd('w') }} +{{ xvabsd('wu') }} +{{ xvabsd('d') }} +{{ xvabsd('du') }} + +{{ xvadda('b') }} +{{ xvadda('h') }} +{{ xvadda('w') }} +{{ xvadda('d') }} + +{{ xvaddi('bu') }} +{{ xvaddi('hu') }} +{{ xvaddi('wu') }} +{{ xvaddi('du') }} + +{{ xvaddwev('h', 'b') }} +{{ xvaddwev('h', 'bu') }} +{{ xvaddwev('h', 'bu', 'b') }} +{{ xvaddwev('w', 'h') }} +{{ xvaddwev('w', 'hu') }} +{{ xvaddwev('w', 'hu', 'h') }} +{{ xvaddwev('d', 'w') }} +{{ xvaddwev('d', 'wu') }} +{{ xvaddwev('d', 'wu', 'w') }} +{{ xvaddwev('q', 'd') }} +{{ xvaddwev('q', 'du') }} +{{ xvaddwev('q', 'du', 'd') }} + +{{ xvaddwod('h', 'b') }} +{{ xvaddwod('h', 'bu') }} +{{ xvaddwod('h', 'bu', 'b') }} +{{ xvaddwod('w', 'h') }} +{{ xvaddwod('w', 'hu') }} +{{ xvaddwod('w', 'hu', 'h') }} +{{ xvaddwod('d', 'w') }} +{{ xvaddwod('d', 'wu') }} +{{ xvaddwod('d', 'wu', 'w') }} +{{ xvaddwod('q', 'd') }} +{{ xvaddwod('q', 'du') }} +{{ xvaddwod('q', 'du', 'd') }} + +{{ xvavg('b') }} +{{ xvavg('bu') }} +{{ xvavg('h') }} +{{ xvavg('hu') }} +{{ xvavg('w') }} +{{ xvavg('wu') }} +{{ xvavg('d') }} +{{ xvavg('du') }} + +{{ xvavgr('b') }} +{{ xvavgr('bu') }} +{{ xvavgr('h') }} +{{ xvavgr('hu') }} +{{ xvavgr('w') }} +{{ xvavgr('wu') }} +{{ xvavgr('d') }} +{{ xvavgr('du') }} + +{{ xvdiv('b') }} +{{ xvdiv('bu') }} +{{ xvdiv('h') }} +{{ xvdiv('hu') }} +{{ xvdiv('w') }} +{{ xvdiv('wu') }} +{{ xvdiv('d') }} +{{ xvdiv('du') }} + +{{ xvhaddw('h', 'b') }} +{{ xvhaddw('hu', 'bu') }} +{{ xvhaddw('w', 'h') }} +{{ xvhaddw('wu', 'hu') }} +{{ xvhaddw('d', 'w') }} +{{ xvhaddw('du', 'wu') }} +{{ xvhaddw('q', 'd') }} +{{ xvhaddw('qu', 'du') }} + +{{ xvhsubw('h', 'b') }} +{{ xvhsubw('hu', 'bu') }} +{{ xvhsubw('w', 'h') }} +{{ xvhsubw('wu', 'hu') }} +{{ xvhsubw('d', 'w') }} +{{ xvhsubw('du', 'wu') }} +{{ xvhsubw('q', 'd') }} +{{ xvhsubw('qu', 'du') }} + +{{ xvmadd('b') }} +{{ xvmadd('h') }} +{{ xvmadd('w') }} +{{ xvmadd('d') }} + +{{ xvmaddwev('h', 'b') }} +{{ xvmaddwev('h', 'bu') }} +{{ xvmaddwev('h', 'bu', 'b') }} +{{ xvmaddwev('w', 'h') }} +{{ xvmaddwev('w', 'hu') }} +{{ xvmaddwev('w', 'hu', 'h') }} +{{ xvmaddwev('d', 'w') }} +{{ xvmaddwev('d', 'wu') }} +{{ xvmaddwev('d', 'wu', 'w') }} +{{ xvmaddwev('q', 'd') }} +{{ xvmaddwev('q', 'du') }} +{{ xvmaddwev('q', 'du', 'd') }} + +{{ xvmaddwod('h', 'b') }} +{{ xvmaddwod('h', 'bu') }} +{{ xvmaddwod('h', 'bu', 'b') }} +{{ xvmaddwod('w', 'h') }} +{{ xvmaddwod('w', 'hu') }} +{{ xvmaddwod('w', 'hu', 'h') }} +{{ xvmaddwod('d', 'w') }} +{{ xvmaddwod('d', 'wu') }} +{{ xvmaddwod('d', 'wu', 'w') }} +{{ xvmaddwod('q', 'd') }} +{{ xvmaddwod('q', 'du') }} +{{ xvmaddwod('q', 'du', 'd') }} + +{{ xvmax('b') }} +{{ xvmax('bu') }} +{{ xvmax('h') }} +{{ xvmax('hu') }} +{{ xvmax('w') }} +{{ xvmax('wu') }} +{{ xvmax('d') }} +{{ xvmax('du') }} + +{{ xvmaxi('b') }} +{{ xvmaxi('bu') }} +{{ xvmaxi('h') }} +{{ xvmaxi('hu') }} +{{ xvmaxi('w') }} +{{ xvmaxi('wu') }} +{{ xvmaxi('d') }} +{{ xvmaxi('du') }} + +{{ xvmin('b') }} +{{ xvmin('bu') }} +{{ xvmin('h') }} +{{ xvmin('hu') }} +{{ xvmin('w') }} +{{ xvmin('wu') }} +{{ xvmin('d') }} +{{ xvmin('du') }} + +{{ xvmini('b') }} +{{ xvmini('bu') }} +{{ xvmini('h') }} +{{ xvmini('hu') }} +{{ xvmini('w') }} +{{ xvmini('wu') }} +{{ xvmini('d') }} +{{ xvmini('du') }} + +{{ xvmod('b') }} +{{ xvmod('bu') }} +{{ xvmod('h') }} +{{ xvmod('hu') }} +{{ xvmod('w') }} +{{ xvmod('wu') }} +{{ xvmod('d') }} +{{ xvmod('du') }} + +{{ xvmsub('b') }} +{{ xvmsub('h') }} +{{ xvmsub('w') }} +{{ xvmsub('d') }} + +{{ xvmuh('b') }} +{{ xvmuh('bu') }} +{{ xvmuh('h') }} +{{ xvmuh('hu') }} +{{ xvmuh('w') }} +{{ xvmuh('wu') }} +{{ xvmuh('d') }} +{{ xvmuh('du') }} + +{{ xvmul('b') }} +{{ xvmul('h') }} +{{ xvmul('w') }} +{{ xvmul('d') }} + +{{ xvmulwev('h', 'b') }} +{{ xvmulwev('h', 'bu') }} +{{ xvmulwev('h', 'bu', 'b') }} +{{ xvmulwev('w', 'h') }} +{{ xvmulwev('w', 'hu') }} +{{ xvmulwev('w', 'hu', 'h') }} +{{ xvmulwev('d', 'w') }} +{{ xvmulwev('d', 'wu') }} +{{ xvmulwev('d', 'wu', 'w') }} +{{ xvmulwev('q', 'd') }} +{{ xvmulwev('q', 'du') }} +{{ xvmulwev('q', 'du', 'd') }} + +{{ xvmulwod('h', 'b') }} +{{ xvmulwod('h', 'bu') }} +{{ xvmulwod('h', 'bu', 'b') }} +{{ xvmulwod('w', 'h') }} +{{ xvmulwod('w', 'hu') }} +{{ xvmulwod('w', 'hu', 'h') }} +{{ xvmulwod('d', 'w') }} +{{ xvmulwod('d', 'wu') }} +{{ xvmulwod('d', 'wu', 'w') }} +{{ xvmulwod('q', 'd') }} +{{ xvmulwod('q', 'du') }} +{{ xvmulwod('q', 'du', 'd') }} + +{{ xvneg('b') }} +{{ xvneg('h') }} +{{ xvneg('w') }} +{{ xvneg('d') }} + +{{ xvsadd('b') }} +{{ xvsadd('bu') }} +{{ xvsadd('h') }} +{{ xvsadd('hu') }} +{{ xvsadd('w') }} +{{ xvsadd('wu') }} +{{ xvsadd('d') }} +{{ xvsadd('du') }} + +{{ xvssub('b') }} +{{ xvssub('bu') }} +{{ xvssub('h') }} +{{ xvssub('hu') }} +{{ xvssub('w') }} +{{ xvssub('wu') }} +{{ xvssub('d') }} +{{ xvssub('du') }} + +{{ xvsub('b') }} +{{ xvsub('h') }} +{{ xvsub('w') }} +{{ xvsub('d') }} +{{ xvsub('q') }} + +{{ xvsubi('bu') }} +{{ xvsubi('hu') }} +{{ xvsubi('wu') }} +{{ xvsubi('du') }} + +{{ xvsubwev('h', 'b') }} +{{ xvsubwev('h', 'bu') }} +{{ xvsubwev('w', 'h') }} +{{ xvsubwev('w', 'hu') }} +{{ xvsubwev('d', 'w') }} +{{ xvsubwev('d', 'wu') }} +{{ xvsubwev('q', 'd') }} +{{ xvsubwev('q', 'du') }} + +{{ xvsubwod('h', 'b') }} +{{ xvsubwod('h', 'bu') }} +{{ xvsubwod('w', 'h') }} +{{ xvsubwod('w', 'hu') }} +{{ xvsubwod('d', 'w') }} +{{ xvsubwod('d', 'wu') }} +{{ xvsubwod('q', 'd') }} +{{ xvsubwod('q', 'du') }} \ No newline at end of file diff --git a/docs/lasx/logical.md b/docs/lasx/logical.md new file mode 100644 index 00000000..71911475 --- /dev/null +++ b/docs/lasx/logical.md @@ -0,0 +1,12 @@ +# Logical + +{{ xvlogical('and') }} +{{ xvlogicali('and') }} +{{ xvlogical('andn') }} +{{ xvlogical('nor') }} +{{ xvlogicali('nor') }} +{{ xvlogical('or') }} +{{ xvlogicali('or') }} +{{ xvlogical('orn') }} +{{ xvlogical('xor') }} +{{ xvlogicali('xor') }} \ No newline at end of file diff --git a/docs/lasx/memory.md b/docs/lasx/memory.md new file mode 100644 index 00000000..b87280b4 --- /dev/null +++ b/docs/lasx/memory.md @@ -0,0 +1,17 @@ +# Memory Load & Store + +{{ xvld() }} +{{ xvldx() }} + +{{ xvldrepl('b') }} +{{ xvldrepl('h') }} +{{ xvldrepl('w') }} +{{ xvldrepl('d') }} + +{{ xvst() }} +{{ xvstx() }} + +{{ xvstelm('b') }} +{{ xvstelm('h') }} +{{ xvstelm('w') }} +{{ xvstelm('d') }} diff --git a/docs/lasx/misc.md b/docs/lasx/misc.md new file mode 100644 index 00000000..3c2bc8c5 --- /dev/null +++ b/docs/lasx/misc.md @@ -0,0 +1,96 @@ +# Misc + +{{ xvilvh('b') }} +{{ xvilvh('h') }} +{{ xvilvh('w') }} +{{ xvilvh('d') }} + +{{ xvilvl('b') }} +{{ xvilvl('h') }} +{{ xvilvl('w') }} +{{ xvilvl('d') }} + +{{ xvinsgr2vr('b') }} +{{ xvinsgr2vr('h') }} +{{ xvinsgr2vr('w') }} +{{ xvinsgr2vr('d') }} + +{{ xvfrstp('b') }} +{{ xvfrstp('h') }} + +{{ xvfrstpi('b') }} +{{ xvfrstpi('h') }} + +{{ xvmskgez('b') }} + +{{ xvmskltz('b') }} +{{ xvmskltz('h') }} +{{ xvmskltz('w') }} +{{ xvmskltz('d') }} + +{{ xvmsknz('b') }} + +{{ xvpackev('b') }} +{{ xvpackev('h') }} +{{ xvpackev('w') }} +{{ xvpackev('d') }} + +{{ xvpackod('b') }} +{{ xvpackod('h') }} +{{ xvpackod('w') }} +{{ xvpackod('d') }} + +{{ xvpickev('b') }} +{{ xvpickev('h') }} +{{ xvpickev('w') }} +{{ xvpickev('d') }} + +{{ xvpickve2gr('b') }} +{{ xvpickve2gr('bu') }} +{{ xvpickve2gr('h') }} +{{ xvpickve2gr('hu') }} +{{ xvpickve2gr('w') }} +{{ xvpickve2gr('wu') }} +{{ xvpickve2gr('d') }} +{{ xvpickve2gr('du') }} + +{{ xvpickod('b') }} +{{ xvpickod('h') }} +{{ xvpickod('w') }} +{{ xvpickod('d') }} + +{{ xvrepli('b') }} +{{ xvrepli('h') }} +{{ xvrepli('w') }} +{{ xvrepli('d') }} + +{{ xvreplgr2vr('b') }} +{{ xvreplgr2vr('h') }} +{{ xvreplgr2vr('w') }} +{{ xvreplgr2vr('d') }} + +{{ xvreplve('b') }} +{{ xvreplve('h') }} +{{ xvreplve('w') }} +{{ xvreplve('d') }} + +{{ xvreplvei('b') }} +{{ xvreplvei('h') }} +{{ xvreplvei('w') }} +{{ xvreplvei('d') }} + +{{ xvsat('b') }} +{{ xvsat('bu') }} +{{ xvsat('h') }} +{{ xvsat('hu') }} +{{ xvsat('w') }} +{{ xvsat('wu') }} +{{ xvsat('d') }} +{{ xvsat('du') }} + +{{ xvsigncov('b') }} +{{ xvsigncov('h') }} +{{ xvsigncov('w') }} +{{ xvsigncov('d') }} + +{{ xvldi() }} \ No newline at end of file diff --git a/docs/lasx/permutation.md b/docs/lasx/permutation.md new file mode 100644 index 00000000..7c45aa5b --- /dev/null +++ b/docs/lasx/permutation.md @@ -0,0 +1,3 @@ +# Permutation + +{{ xvpermi_d() }} \ No newline at end of file diff --git a/docs/lasx/shift.md b/docs/lasx/shift.md new file mode 100644 index 00000000..6e31e501 --- /dev/null +++ b/docs/lasx/shift.md @@ -0,0 +1,171 @@ +# Shift + +{{ xvbsll_srl('sll', 'left') }} +{{ xvbsll_srl('srl', 'right') }} + +{{ xvsll('b') }} +{{ xvsll('h') }} +{{ xvsll('w') }} +{{ xvsll('d') }} + +{{ xvslli('b') }} +{{ xvslli('h') }} +{{ xvslli('w') }} +{{ xvslli('d') }} + +{{ xvsllwil('h', 'b') }} +{{ xvsllwil('hu', 'bu') }} +{{ xvsllwil('w', 'h') }} +{{ xvsllwil('wu', 'hu') }} +{{ xvsllwil('d', 'w') }} +{{ xvsllwil('du', 'wu') }} + +{{ xvsra('b') }} +{{ xvsra('h') }} +{{ xvsra('w') }} +{{ xvsra('d') }} + +{{ xvsrai('b') }} +{{ xvsrai('h') }} +{{ xvsrai('w') }} +{{ xvsrai('d') }} + +{{ xvsran('b', 'h') }} +{{ xvsran('h', 'w') }} +{{ xvsran('w', 'd') }} + +{{ xvsrani('b', 'h') }} +{{ xvsrani('h', 'w') }} +{{ xvsrani('w', 'd') }} +{{ xvsrani('d', 'q') }} + +{{ xvsrar('b') }} +{{ xvsrar('h') }} +{{ xvsrar('w') }} +{{ xvsrar('d') }} + +{{ xvsrari('b') }} +{{ xvsrari('h') }} +{{ xvsrari('w') }} +{{ xvsrari('d') }} + +{{ xvsrarn('b', 'h') }} +{{ xvsrarn('h', 'w') }} +{{ xvsrarn('w', 'd') }} + +{{ xvsrarni('b', 'h') }} +{{ xvsrarni('h', 'w') }} +{{ xvsrarni('w', 'd') }} +{{ xvsrarni('d', 'q') }} + +{{ xvsrl('b') }} +{{ xvsrl('h') }} +{{ xvsrl('w') }} +{{ xvsrl('d') }} + +{{ xvsrli('b') }} +{{ xvsrli('h') }} +{{ xvsrli('w') }} +{{ xvsrli('d') }} + +{{ xvsrln('b', 'h') }} +{{ xvsrln('h', 'w') }} +{{ xvsrln('w', 'd') }} + +{{ xvsrlni('b', 'h') }} +{{ xvsrlni('h', 'w') }} +{{ xvsrlni('w', 'd') }} +{{ xvsrlni('d', 'q') }} + +{{ xvsrlr('b') }} +{{ xvsrlr('h') }} +{{ xvsrlr('w') }} +{{ xvsrlr('d') }} + +{{ xvsrlri('b') }} +{{ xvsrlri('h') }} +{{ xvsrlri('w') }} +{{ xvsrlri('d') }} + +{{ xvsrlrn('b', 'h') }} +{{ xvsrlrn('h', 'w') }} +{{ xvsrlrn('w', 'd') }} + +{{ xvsrlrni('b', 'h') }} +{{ xvsrlrni('h', 'w') }} +{{ xvsrlrni('w', 'd') }} +{{ xvsrlrni('d', 'q') }} + +{{ xvssran('b', 'h') }} +{{ xvssran('bu', 'h') }} +{{ xvssran('h', 'w') }} +{{ xvssran('hu', 'w') }} +{{ xvssran('w', 'd') }} +{{ xvssran('wu', 'd') }} + +{{ xvssrani('b', 'h') }} +{{ xvssrani('bu', 'h') }} +{{ xvssrani('h', 'w') }} +{{ xvssrani('hu', 'w') }} +{{ xvssrani('w', 'd') }} +{{ xvssrani('wu', 'd') }} +{{ xvssrani('d', 'q') }} +{{ xvssrani('du', 'q') }} + +{{ xvssrarn('b', 'h') }} +{{ xvssrarn('bu', 'h') }} +{{ xvssrarn('h', 'w') }} +{{ xvssrarn('hu', 'w') }} +{{ xvssrarn('w', 'd') }} +{{ xvssrarn('wu', 'd') }} + +{{ xvssrarni('b', 'h') }} +{{ xvssrarni('bu', 'h') }} +{{ xvssrarni('h', 'w') }} +{{ xvssrarni('hu', 'w') }} +{{ xvssrarni('w', 'd') }} +{{ xvssrarni('wu', 'd') }} +{{ xvssrarni('d', 'q') }} +{{ xvssrarni('du', 'q') }} + +{{ xvssrln('b', 'h') }} +{{ xvssrln('bu', 'h') }} +{{ xvssrln('h', 'w') }} +{{ xvssrln('hu', 'w') }} +{{ xvssrln('w', 'd') }} +{{ xvssrln('wu', 'd') }} + +{{ xvssrlni('b', 'h') }} +{{ xvssrlni('bu', 'h') }} +{{ xvssrlni('h', 'w') }} +{{ xvssrlni('hu', 'w') }} +{{ xvssrlni('w', 'd') }} +{{ xvssrlni('wu', 'd') }} +{{ xvssrlni('d', 'q') }} +{{ xvssrlni('du', 'q') }} + +{{ xvssrlrn('b', 'h') }} +{{ xvssrlrn('bu', 'h') }} +{{ xvssrlrn('h', 'w') }} +{{ xvssrlrn('hu', 'w') }} +{{ xvssrlrn('w', 'd') }} +{{ xvssrlrn('wu', 'd') }} + +{{ xvssrlrni('b', 'h') }} +{{ xvssrlrni('bu', 'h') }} +{{ xvssrlrni('h', 'w') }} +{{ xvssrlrni('hu', 'w') }} +{{ xvssrlrni('w', 'd') }} +{{ xvssrlrni('wu', 'd') }} +{{ xvssrlrni('d', 'q') }} +{{ xvssrlrni('du', 'q') }} + +{{ xvrotr('b') }} +{{ xvrotr('h') }} +{{ xvrotr('w') }} +{{ xvrotr('d') }} + +{{ xvrotri('b') }} +{{ xvrotri('h') }} +{{ xvrotri('w') }} +{{ xvrotri('d') }} diff --git a/docs/lasx/shuffling.md b/docs/lasx/shuffling.md new file mode 100644 index 00000000..18736382 --- /dev/null +++ b/docs/lasx/shuffling.md @@ -0,0 +1,12 @@ +# Shuffling + +{{ xvshuf_b() }} + +{{ xvshuf_hwd('h') }} +{{ xvshuf_hwd('w') }} +{{ xvshuf_hwd('d') }} + +{{ xvshuf4i('b') }} +{{ xvshuf4i('h') }} +{{ xvshuf4i('w') }} +{{ xvshuf4i('d') }} diff --git a/main.py b/main.py index 00e6ee97..13434024 100644 --- a/main.py +++ b/main.py @@ -61,10 +61,12 @@ def instruction(intrinsic, instr, desc): for part in intrinsic.split(" "): if part.startswith("__lsx_"): file_name = part[6:] - if cur_simd == "lasx": + elif part.startswith("__lasx_"): + file_name = part[7:] + if cur_simd == "lasx" and file_name[0] != "x": file_name = "x" + file_name instr = "x" + instr - intrinsic = intrinsic.replace("m128", "m256").replace("_lsx_", "_lasx_") + intrinsic = intrinsic.replace("m128", "m256").replace("_lsx_", "_lasx_x") if not os.path.exists(f"code/{file_name}.h"): file_name = instr.split(" ")[0].replace(".", "_") @@ -1055,7 +1057,7 @@ def bz_v(): return instruction( intrinsic=f"int __lsx_bz_v (__m128i a)", instr=f"vseteqz.v vr, vr; bcnez", - desc=f"Expected to be used in branches: branch if the 128-bit vector `a` equals to zero.", + desc=f"Expected to be used in branches: branch if the whole vector `a` equals to zero.", ) @my_macro(env) @@ -1063,7 +1065,7 @@ def bnz_v(): return instruction( intrinsic=f"int __lsx_bnz_v (__m128i a)", instr=f"vsetnez.v vr, vr; bcnez", - desc=f"Expected to be used in branches: branch if the 128-bit vector `a` is non-zero.", + desc=f"Expected to be used in branches: branch if the whole vector `a` is non-zero.", ) @my_macro(env) @@ -1423,7 +1425,7 @@ def vbsll_srl(name, dir): return instruction( intrinsic=f"__m128i __lsx_vb{name}_v (__m128i a, imm0_31 imm)", instr=f"vb{name}.v vr, vr, imm", - desc=f"Compute 128-bit `a` shifted {dir} by `imm * 8` bits.", + desc=f"Compute whole vector `a` shifted {dir} by `imm * 8` bits.", ) @my_macro(env) @@ -1434,12 +1436,20 @@ def vpermi_w(): desc=f"Permute words from `a` and `b` with indices recorded in `imm` and store into `dst`.", ) + @env.macro + def xvpermi_d(): + return instruction( + intrinsic=f"__m256i __lasx_xvpermi_d (__m256i a, imm0_255 imm)", + instr=f"xvpermi.w vr, vr, imm", + desc=f"Permute double words from `a` with indices recorded in `imm` and store into `dst`.", + ) + @my_macro(env) def vld(): return instruction( intrinsic=f"__m128i __lsx_vld (void * addr, imm_n2048_2047 offset)", instr=f"vld vr, r, imm", - desc=f"Read 128-bit data from memory address `addr + offset`, save the data into `dst`.", + desc=f"Read whole vector from memory address `addr + offset`, save the data into `dst`.", ) @my_macro(env) @@ -1447,7 +1457,7 @@ def vldx(): return instruction( intrinsic=f"__m128i __lsx_vldx (void * addr, long int offset)", instr=f"vldx vr, r, r", - desc=f"Read 128-bit data from memory address `addr + offset`, save the data into `dst`.", + desc=f"Read whole vector from memory address `addr + offset`, save the data into `dst`.", ) @my_macro(env) @@ -1455,7 +1465,7 @@ def vst(): return instruction( intrinsic=f"void __lsx_vst (__m128i data, void * addr, imm_n2048_2047 offset)", instr=f"vst vr, r, imm", - desc=f"Write 128-bit data in `data` to memory address `addr + offset`.", + desc=f"Write whole vector data in `data` to memory address `addr + offset`.", ) @my_macro(env) @@ -1463,7 +1473,7 @@ def vstx(): return instruction( intrinsic=f"void __lsx_vstx (__m128i data, void * addr, long int offset)", instr=f"vstx vr, r, r", - desc=f"Write 128-bit data in `data` to memory address `addr + offset`.", + desc=f"Write whole-vector data in `data` to memory address `addr + offset`.", ) @my_macro(env)