Skip to content

Commit

Permalink
Add missing vmulwod and vdiv
Browse files Browse the repository at this point in the history
  • Loading branch information
jiegec committed Dec 13, 2023
1 parent fcdd483 commit 5c2a854
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 35 deletions.
45 changes: 22 additions & 23 deletions check_lsx.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,26 @@
# Update gh-pages before running:
# git fetch origin gh-pages && git -C ../gh-pages reset origin/gh-pages --hard

def parse_fn(line, skip_last):
before_paren = line.split("(")[0].split(" ")
between_parens = line.split("(")[1].split(")")[0]
args = between_parens.split(",")

result = before_paren
result.append("(")
for arg in args:
if skip_last:
result += arg.split(" ")[:-1]
else:
result += arg.split(" ")
result.append(")")

return tuple(result)

# gcc intrinsics
gcc_intrinsics = set()
for line in open('gcc_lsxintrin.h', 'r'):
gcc_intrinsics.add(tuple(line.strip()[:-1].split(' ')))
gcc_intrinsics.add(parse_fn(line, False))
#print(gcc_intrinsics)

# find documented intrinsics
Expand All @@ -19,28 +35,11 @@
for line in open(f, 'r'):
if "h2" in line:
intrinsics = line.split(">")[1].split("<")[0]

# strip off names that gcc does not have
parts = intrinsics.split(" ")
begin_params = False
i = 0
while i < len(parts):
if "(" in parts[i]:
begin_params = True
i += 1
elif "," in parts[i]:
parts = parts[:i] + parts[i+1:]
elif ")" in parts[i]:
parts = parts[:i] + parts[i+1:]
parts[i-1] += ")"
i += 1
elif begin_params:
parts[i-1] += ","
i += 1
else:
i += 1
documented_intrinsics.add(tuple(parts))
documented_intrinsics.add(parse_fn(intrinsics, True))

undocumented = gcc_intrinsics - documented_intrinsics
for entry in undocumented:
print("Undocumented:", " ".join(entry))
print("Undocumented:", " ".join(entry))
for e in documented_intrinsics:
if e[1] == entry[1]:
print("Matching:", " ".join(e))
4 changes: 2 additions & 2 deletions docs/lsx/float_conversion.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,12 +85,12 @@ Convert half precision floating point elements in higher half of `a` to single p
{% include('vfcvth_s_h.h') %}
```

## __m128d __lsx_vfcvtl_s_h (__m128i a)
## __m128 __lsx_vfcvtl_s_h (__m128i a)

### Synopsis

```c++
__m128d __lsx_vfcvtl_s_h (__m128i a)
__m128 __lsx_vfcvtl_s_h (__m128i a)
#include <lsxintrin.h>
Instruction: vfcvtl.s.h vr, vr
CPU Flags: LSX
Expand Down
13 changes: 13 additions & 0 deletions docs/lsx/integer_computation.md
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,19 @@
{{ vmulwev('q', 'du') }}
{{ vmulwev('q', 'du', 'd') }}

{{ vmulwod('h', 'b') }}
{{ vmulwod('h', 'bu') }}
{{ vmulwod('h', 'bu', 'b') }}
{{ vmulwod('w', 'h') }}
{{ vmulwod('w', 'hu') }}
{{ vmulwod('w', 'hu', 'h') }}
{{ vmulwod('d', 'w') }}
{{ vmulwod('d', 'wu') }}
{{ vmulwod('d', 'wu', 'w') }}
{{ vmulwod('q', 'd') }}
{{ vmulwod('q', 'du') }}
{{ vmulwod('q', 'du', 'd') }}

{{ vneg('b') }}
{{ vneg('h') }}
{{ vneg('w') }}
Expand Down
16 changes: 8 additions & 8 deletions gcc_lsxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ __m128i __lsx_vsrai_w (__m128i, imm0_31);
__m128i __lsx_vsran_b_h (__m128i, __m128i);
__m128i __lsx_vsran_h_w (__m128i, __m128i);
__m128i __lsx_vsrani_b_h (__m128i, __m128i, imm0_15);
__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127)
__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127);
__m128i __lsx_vsrani_h_w (__m128i, __m128i, imm0_31);
__m128i __lsx_vsrani_w_d (__m128i, __m128i, imm0_63);
__m128i __lsx_vsran_w_d (__m128i, __m128i);
Expand Down Expand Up @@ -601,7 +601,7 @@ __m128i __lsx_vsrli_w (__m128i, imm0_31);
__m128i __lsx_vsrln_b_h (__m128i, __m128i);
__m128i __lsx_vsrln_h_w (__m128i, __m128i);
__m128i __lsx_vsrlni_b_h (__m128i, __m128i, imm0_15);
__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127)
__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127);
__m128i __lsx_vsrlni_h_w (__m128i, __m128i, imm0_31);
__m128i __lsx_vsrlni_w_d (__m128i, __m128i, imm0_63);
__m128i __lsx_vsrln_w_d (__m128i, __m128i);
Expand All @@ -627,7 +627,7 @@ __m128i __lsx_vssran_hu_w (__m128i, __m128i);
__m128i __lsx_vssran_h_w (__m128i, __m128i);
__m128i __lsx_vssrani_b_h (__m128i, __m128i, imm0_15);
__m128i __lsx_vssrani_bu_h (__m128i, __m128i, imm0_15);
__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127)
__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127);
__m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127)
__m128i __lsx_vssrani_hu_w (__m128i, __m128i, imm0_31);
__m128i __lsx_vssrani_h_w (__m128i, __m128i, imm0_31);
Expand All @@ -641,8 +641,8 @@ __m128i __lsx_vssrarn_hu_w (__m128i, __m128i);
__m128i __lsx_vssrarn_h_w (__m128i, __m128i);
__m128i __lsx_vssrarni_b_h (__m128i, __m128i, imm0_15);
__m128i __lsx_vssrarni_bu_h (__m128i, __m128i, imm0_15);
__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127)
__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127)
__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127);
__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127);
__m128i __lsx_vssrarni_hu_w (__m128i, __m128i, imm0_31);
__m128i __lsx_vssrarni_h_w (__m128i, __m128i, imm0_31);
__m128i __lsx_vssrarni_w_d (__m128i, __m128i, imm0_63);
Expand All @@ -655,7 +655,7 @@ __m128i __lsx_vssrln_hu_w (__m128i, __m128i);
__m128i __lsx_vssrln_h_w (__m128i, __m128i);
__m128i __lsx_vssrlni_b_h (__m128i, __m128i, imm0_15);
__m128i __lsx_vssrlni_bu_h (__m128i, __m128i, imm0_15);
__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127)
__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127);
__m128i __lsx_vssrlni_du_q (__m128i, __m128i, imm0_127)
__m128i __lsx_vssrlni_hu_w (__m128i, __m128i, imm0_31);
__m128i __lsx_vssrlni_h_w (__m128i, __m128i, imm0_31);
Expand All @@ -669,8 +669,8 @@ __m128i __lsx_vssrlrn_hu_w (__m128i, __m128i);
__m128i __lsx_vssrlrn_h_w (__m128i, __m128i);
__m128i __lsx_vssrlrni_b_h (__m128i, __m128i, imm0_15);
__m128i __lsx_vssrlrni_bu_h (__m128i, __m128i, imm0_15);
__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127)
__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127)
__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127);
__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127);
__m128i __lsx_vssrlrni_hu_w (__m128i, __m128i, imm0_31);
__m128i __lsx_vssrlrni_h_w (__m128i, __m128i, imm0_31);
__m128i __lsx_vssrlrni_w_d (__m128i, __m128i, imm0_63);
Expand Down
4 changes: 2 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def vadd_mul_sub_w_ev_od(op, desc, even_odd, wide, narrow, narrow2=None):
suffix = "od"
return instruction(
intrinsic=f"__m128i __lsx_v{op}w{suffix}_{wide}_{narrow}{intrinsic_suffix} (__m128i a, __m128i b)",
instr=f"v{op}wev.{wide}.{narrow}{inst_suffix} vr, vr, vr",
instr=f"v{op}w{suffix}.{wide}.{narrow}{inst_suffix} vr, vr, vr",
desc=f"{desc} {even_odd}-positioned {signedness} {narrow_width}-bit elements in `a` and {signedness2} elements in `b`, save the {wide_width}-bit result in `dst`.",
)

Expand Down Expand Up @@ -269,7 +269,7 @@ def vdiv(name):
width = widths[name]
signedness = signednesses[name]
return instruction(
intrinsic=f"__m128i __lsx_div_{name} (__m128i a, __m128i b)",
intrinsic=f"__m128i __lsx_vdiv_{name} (__m128i a, __m128i b)",
instr=f"vdiv.{name} vr, vr, vr",
desc=f"Divide {signedness} {width}-bit elements in `a` by elements in `b`.",
)
Expand Down

0 comments on commit 5c2a854

Please sign in to comment.