Skip to content

Commit

Permalink
Add remaining lasx instructions
Browse files Browse the repository at this point in the history
  • Loading branch information
jiegec committed Dec 14, 2023
1 parent 0f3c4e3 commit 6b74959
Show file tree
Hide file tree
Showing 25 changed files with 250 additions and 20 deletions.
122 changes: 105 additions & 17 deletions code/gen_impl.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,10 @@
file=f,
)
print(f"}}", file=f)
print(f"for (int i = {vlen // 2 // double_w};i < {vlen // double_w};i++) {{", file=f)
print(
f"for (int i = {vlen // 2 // double_w};i < {vlen // double_w};i++) {{",
file=f,
)
print(
f" dst.{double_m}[i] = ({sign}{double_w})({sign}{w})a.{m}[i + {vlen // 2 // double_w}] << imm;",
file=f,
Expand All @@ -152,13 +155,18 @@
)
print(f"}}", file=f)
else:
print(f"for (int i = 0;i < {vlen // 2 // w};i++) {{", file=f)
print(
f"for (int i = 0;i < {vlen // 2 // w};i++) {{", file=f
)
print(
f" dst.{m}[i] = (i < {vlen // 4 // w}) ? ({sign}{w})(({sign}{double_w})a.{double_m}[i] >> (b.{double_m}[i] & {double_w-1})) : 0;",
file=f,
)
print(f"}}", file=f)
print(f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{", file=f)
print(
f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{",
file=f,
)
print(
f" dst.{m}[i] = (i < {3 * vlen // 4 // w}) ? ({sign}{w})(({sign}{double_w})a.{double_m}[i - {vlen // 4 // w}] >> (b.{double_m}[i - {vlen // 4 // w}] & {double_w-1})) : 0;",
file=f,
Expand All @@ -168,7 +176,9 @@
if prefix == "v":
print(f"for (int i = 0;i < {vlen // w};i++) {{", file=f)
print(f"if (i < {vlen // 2 // w}) {{", file=f)
print(f"u8 shift = (b.{double_m}[i] & {double_w-1});", file=f)
print(
f"u8 shift = (b.{double_m}[i] & {double_w-1});", file=f
)
print(f"if (shift == 0) {{", file=f)
print(
f" dst.{m}[i] = ({sign}{w})({sign}{double_w})a.{double_m}[i];",
Expand All @@ -185,9 +195,13 @@
print(f"}}", file=f)
print(f"}}", file=f)
else:
print(f"for (int i = 0;i < {vlen // 2 // w};i++) {{", file=f)
print(
f"for (int i = 0;i < {vlen // 2 // w};i++) {{", file=f
)
print(f"if (i < {vlen // 4 // w}) {{", file=f)
print(f"u8 shift = (b.{double_m}[i] & {double_w-1});", file=f)
print(
f"u8 shift = (b.{double_m}[i] & {double_w-1});", file=f
)
print(f"if (shift == 0) {{", file=f)
print(
f" dst.{m}[i] = ({sign}{w})({sign}{double_w})a.{double_m}[i];",
Expand All @@ -203,9 +217,15 @@
print(f" dst.{m}[i] = 0;", file=f)
print(f"}}", file=f)
print(f"}}", file=f)
print(f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{", file=f)
print(
f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{",
file=f,
)
print(f"if (i < {3 * vlen // 4 // w}) {{", file=f)
print(f"u8 shift = (b.{double_m}[i - {vlen // 4 // w}] & {double_w-1});", file=f)
print(
f"u8 shift = (b.{double_m}[i - {vlen // 4 // w}] & {double_w-1});",
file=f,
)
print(f"if (shift == 0) {{", file=f)
print(
f" dst.{m}[i] = ({sign}{w})({sign}{double_w})a.{double_m}[i - {vlen // 4 // w}];",
Expand Down Expand Up @@ -276,7 +296,10 @@
print(f"}}", file=f)
print(f"}}", file=f)

print(f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{", file=f)
print(
f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{",
file=f,
)
print(f"if (i < {3 * vlen // 4 // w}) {{", file=f)
print(
f" {shift_sign}{double_w} temp = ({shift_sign}{double_w})a.{double_m}[i - {vlen // 4 // w}] >> (b.{double_m}[i - {vlen // 4 // w}] & {double_w-1});",
Expand Down Expand Up @@ -349,10 +372,16 @@
print(f"}}", file=f)
print(f"}}", file=f)

print(f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{", file=f)
print(
f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{",
file=f,
)
print(f"if (i < {3 * vlen // 4 // w}) {{", file=f)
print(f"{shift_sign}{double_w} temp;", file=f)
print(f"if ((b.{double_m}[i - {vlen // 4 // w}] & {double_w-1}) == 0) {{", file=f)
print(
f"if ((b.{double_m}[i - {vlen // 4 // w}] & {double_w-1}) == 0) {{",
file=f,
)
print(
f" temp = ({shift_sign}{double_w})a.{double_m}[i - {vlen // 4 // w}];",
file=f,
Expand Down Expand Up @@ -393,7 +422,10 @@
)
print(f"}}", file=f)

print(f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{", file=f)
print(
f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{",
file=f,
)
print(
f" dst.{m}[i] = (i < {3 * vlen // 4 // w}) ? ({sign}{w})(({sign}{double_w})b.{double_m}[i - {vlen // 4 // w}] >> imm) : ({sign}{w})(({sign}{double_w})a.{double_m}[i - {vlen // 2 // w}] >> imm);",
file=f,
Expand Down Expand Up @@ -457,7 +489,10 @@
print(f"}}", file=f)
print(f"}}", file=f)

print(f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{", file=f)
print(
f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{",
file=f,
)
print(f"if (i < {3 * vlen // 4 // w}) {{", file=f)
print(f"if (imm == 0) {{", file=f)
print(
Expand Down Expand Up @@ -546,7 +581,9 @@
print(f"}}", file=f)
print(f"}}", file=f)

print(f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{", file=f)
print(
f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{", file=f
)
print(f"if (i < {3 * vlen // 4 // w}) {{", file=f)
print(
f" {shift_sign}{double_w} temp = ({shift_sign}{double_w})b.{double_m}[i - {vlen // 4 // w}] >> imm;",
Expand Down Expand Up @@ -645,7 +682,9 @@
print(f"}}", file=f)
print(f"}}", file=f)

print(f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{", file=f)
print(
f"for (int i = {vlen // 2 // w};i < {vlen // w};i++) {{", file=f
)
print(f"if (i < {3 * vlen // 4 // w}) {{", file=f)
print(f"{shift_sign}{double_w} temp;", file=f)
print(f"if (imm == 0) {{", file=f)
Expand Down Expand Up @@ -1000,6 +1039,15 @@
file=f,
)
print(f"}}", file=f)
if prefix == "xv" and (width == "d" or width == "w"):
# xvinsve0_d/w
with open(f"{prefix}insve0_{width}.h", "w") as f:
print(f"for (int i = 0;i < {vlen // w};i++) {{", file=f)
print(
f" dst.{m}[i] = (i == imm) ? b.{m}[0] : a.{m}[i];",
file=f,
)
print(f"}}", file=f)
with open(f"{prefix}seq_{width}.h", "w") as f:
print(f"for (int i = 0;i < {vlen // w};i++) {{", file=f)
print(
Expand Down Expand Up @@ -1183,7 +1231,7 @@
)
print(f"}}", file=f)
with open(f"{prefix}replve_{width}.h", "w") as f:
mask = 128 // w # not vlen
mask = 128 // w # not vlen
if prefix == "v":
print(f"for (int i = 0;i < {vlen // w};i++) {{", file=f)
print(
Expand Down Expand Up @@ -1212,6 +1260,14 @@
file=f,
)
print(f"}}", file=f)
else:
with open(f"{prefix}repl128vei_{width}.h", "w") as f:
print(f"for (int i = 0;i < {vlen // w};i++) {{", file=f)
print(
f" dst.{m}[i] = a.{m}[idx];",
file=f,
)
print(f"}}", file=f)
with open(f"{prefix}replgr2vr_{width}.h", "w") as f:
print(f"for (int i = 0;i < {vlen // w};i++) {{", file=f)
print(
Expand Down Expand Up @@ -1524,15 +1580,16 @@
)
print(f"}}", file=f)

# xvpickve
if prefix == "xv":
# xvpickve
for width in ["w", "w_f", "d", "d_f"]:
w = widths[width[0]]
m = members[width[0]]
with open(f"{prefix}pickve_{width}.h", "w") as f:
print(f"for (int i = 0;i < {vlen // w};i++) {{", file=f)
print(f" dst.{m}[i] = (i == 0) ? a.{m}[imm] : 0;", file=f)
print(f"}}", file=f)
# xvreplve0
for width in ["b", "h", "w", "q", "d"]:
w = widths[width]
m = members[width]
Expand All @@ -1543,5 +1600,36 @@
file=f,
)
print(f"}}", file=f)
if prefix == "v":
# vext2xv
for width, width2 in [
("h", "b"),
("w", "b"),
("d", "b"),
("w", "h"),
("d", "h"),
("d", "w"),
("hu", "bu"),
("wu", "bu"),
("du", "bu"),
("wu", "hu"),
("du", "hu"),
("du", "wu"),
]:
w = widths[width]
m = members[width]
w2 = widths[width2]
m2 = members[width2]
if "u" in width:
sign = "u"
else:
sign = "s"
with open(f"{prefix}ext2xv_{width}_{width2}.h", "w") as f:
print(f"for (int i = 0;i < {vlen // w};i++) {{", file=f)
print(
f" dst.{m}[i] = ({sign}{w})({sign}{w2})a.{m2}[i];",
file=f,
)
print(f"}}", file=f)

os.system("clang-format -i *.cpp *.h")
3 changes: 3 additions & 0 deletions code/vext2xv_d_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s64)(s8)a.byte[i];
}
3 changes: 3 additions & 0 deletions code/vext2xv_d_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s64)(s16)a.half[i];
}
3 changes: 3 additions & 0 deletions code/vext2xv_d_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (s64)(s32)a.word[i];
}
3 changes: 3 additions & 0 deletions code/vext2xv_du_bu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (u64)(u8)a.byte[i];
}
3 changes: 3 additions & 0 deletions code/vext2xv_du_hu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (u64)(u16)a.half[i];
}
3 changes: 3 additions & 0 deletions code/vext2xv_du_wu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 2; i++) {
dst.dword[i] = (u64)(u32)a.word[i];
}
3 changes: 3 additions & 0 deletions code/vext2xv_h_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = (s16)(s8)a.byte[i];
}
3 changes: 3 additions & 0 deletions code/vext2xv_hu_bu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.half[i] = (u16)(u8)a.byte[i];
}
3 changes: 3 additions & 0 deletions code/vext2xv_w_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = (s32)(s8)a.byte[i];
}
3 changes: 3 additions & 0 deletions code/vext2xv_w_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = (s32)(s16)a.half[i];
}
3 changes: 3 additions & 0 deletions code/vext2xv_wu_bu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = (u32)(u8)a.byte[i];
}
3 changes: 3 additions & 0 deletions code/vext2xv_wu_hu.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.word[i] = (u32)(u16)a.half[i];
}
3 changes: 3 additions & 0 deletions code/xvinsve0_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.dword[i] = (i == imm) ? b.dword[0] : a.dword[i];
}
3 changes: 3 additions & 0 deletions code/xvinsve0_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.word[i] = (i == imm) ? b.word[0] : a.word[i];
}
3 changes: 3 additions & 0 deletions code/xvperm_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0;i < 8;i++) {
dst.word[i] = a.word[b.word[i] & 0x8];
}
4 changes: 4 additions & 0 deletions code/xvpermi_q.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
dst.word[0] = b.word[imm & 0x3];
dst.word[1] = b.word[(imm >> 2) & 0x3];
dst.word[2] = a.word[(imm >> 4) & 0x3];
dst.word[3] = a.word[(imm >> 6) & 0x3];
8 changes: 8 additions & 0 deletions code/xvpermi_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
dst.word[0] = b.word[imm & 0x3];
dst.word[1] = b.word[(imm >> 2) & 0x3];
dst.word[2] = a.word[(imm >> 4) & 0x3];
dst.word[3] = a.word[(imm >> 6) & 0x3];
dst.word[4] = b.word[4 + (imm & 0x3)];
dst.word[5] = b.word[4 + ((imm >> 2) & 0x3)];
dst.word[6] = a.word[4 + ((imm >> 4) & 0x3)];
dst.word[7] = a.word[4 + ((imm >> 6) & 0x3)];
3 changes: 3 additions & 0 deletions code/xvrepl128vei_b.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 32; i++) {
dst.byte[i] = a.byte[idx];
}
3 changes: 3 additions & 0 deletions code/xvrepl128vei_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.dword[i] = a.dword[idx];
}
3 changes: 3 additions & 0 deletions code/xvrepl128vei_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 16; i++) {
dst.half[i] = a.half[idx];
}
3 changes: 3 additions & 0 deletions code/xvrepl128vei_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.word[i] = a.word[idx];
}
21 changes: 21 additions & 0 deletions docs/lasx/misc.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
# Misc

{{ vext2xv('h', 'b') }}
{{ vext2xv('hu', 'bu') }}
{{ vext2xv('w', 'b') }}
{{ vext2xv('wu', 'bu') }}
{{ vext2xv('w', 'h') }}
{{ vext2xv('wu', 'hu') }}
{{ vext2xv('d', 'b') }}
{{ vext2xv('du', 'bu') }}
{{ vext2xv('d', 'h') }}
{{ vext2xv('du', 'hu') }}
{{ vext2xv('d', 'w') }}
{{ vext2xv('du', 'wu') }}

{{ xvilvh('b') }}
{{ xvilvh('h') }}
{{ xvilvh('w') }}
Expand All @@ -13,6 +26,9 @@
{{ xvinsgr2vr('w') }}
{{ xvinsgr2vr('d') }}

{{ xvinsve0('w') }}
{{ xvinsve0('d') }}

{{ xvfrstp('b') }}
{{ xvfrstp('h') }}

Expand Down Expand Up @@ -79,6 +95,11 @@
{{ xvreplve0('d') }}
{{ xvreplve0('q') }}

{{ xvrepl128vei('b') }}
{{ xvrepl128vei('h') }}
{{ xvrepl128vei('w') }}
{{ xvrepl128vei('d') }}

{{ xvsat('b') }}
{{ xvsat('bu') }}
{{ xvsat('h') }}
Expand Down
6 changes: 5 additions & 1 deletion docs/lasx/permutation.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
# Permutation

{{ xvpermi_d() }}
{{ xvpermi_w() }}
{{ xvpermi_d() }}
{{ xvpermi_q() }}

{{ xvperm_w() }}
Loading

0 comments on commit 6b74959

Please sign in to comment.