diff --git a/check_lsx.py b/check_lsx.py index 29d052e3..3e177a8c 100644 --- a/check_lsx.py +++ b/check_lsx.py @@ -6,10 +6,26 @@ # Update gh-pages before running: # git fetch origin gh-pages && git -C ../gh-pages reset origin/gh-pages --hard +def parse_fn(line, skip_last): + before_paren = line.split("(")[0].split(" ") + between_parens = line.split("(")[1].split(")")[0] + args = between_parens.split(",") + + result = before_paren + result.append("(") + for arg in args: + if skip_last: + result += arg.split(" ")[:-1] + else: + result += arg.split(" ") + result.append(")") + + return tuple(result) + # gcc intrinsics gcc_intrinsics = set() for line in open('gcc_lsxintrin.h', 'r'): - gcc_intrinsics.add(tuple(line.strip()[:-1].split(' '))) + gcc_intrinsics.add(parse_fn(line, False)) #print(gcc_intrinsics) # find documented intrinsics @@ -19,28 +35,11 @@ for line in open(f, 'r'): if "h2" in line: intrinsics = line.split(">")[1].split("<")[0] - - # strip off names that gcc does not have - parts = intrinsics.split(" ") - begin_params = False - i = 0 - while i < len(parts): - if "(" in parts[i]: - begin_params = True - i += 1 - elif "," in parts[i]: - parts = parts[:i] + parts[i+1:] - elif ")" in parts[i]: - parts = parts[:i] + parts[i+1:] - parts[i-1] += ")" - i += 1 - elif begin_params: - parts[i-1] += "," - i += 1 - else: - i += 1 - documented_intrinsics.add(tuple(parts)) + documented_intrinsics.add(parse_fn(intrinsics, True)) undocumented = gcc_intrinsics - documented_intrinsics for entry in undocumented: - print("Undocumented:", " ".join(entry)) \ No newline at end of file + print("Undocumented:", " ".join(entry)) + for e in documented_intrinsics: + if e[1] == entry[1]: + print("Matching:", " ".join(e)) \ No newline at end of file diff --git a/docs/lsx/float_conversion.md b/docs/lsx/float_conversion.md index f7603694..a127b169 100644 --- a/docs/lsx/float_conversion.md +++ b/docs/lsx/float_conversion.md @@ -85,12 +85,12 @@ Convert half precision floating point elements in higher half of `a` to single p {% include('vfcvth_s_h.h') %} ``` -## __m128d __lsx_vfcvtl_s_h (__m128i a) +## __m128 __lsx_vfcvtl_s_h (__m128i a) ### Synopsis ```c++ -__m128d __lsx_vfcvtl_s_h (__m128i a) +__m128 __lsx_vfcvtl_s_h (__m128i a) #include Instruction: vfcvtl.s.h vr, vr CPU Flags: LSX diff --git a/docs/lsx/integer_computation.md b/docs/lsx/integer_computation.md index 4387d690..f5ac0baf 100644 --- a/docs/lsx/integer_computation.md +++ b/docs/lsx/integer_computation.md @@ -204,6 +204,19 @@ {{ vmulwev('q', 'du') }} {{ vmulwev('q', 'du', 'd') }} +{{ vmulwod('h', 'b') }} +{{ vmulwod('h', 'bu') }} +{{ vmulwod('h', 'bu', 'b') }} +{{ vmulwod('w', 'h') }} +{{ vmulwod('w', 'hu') }} +{{ vmulwod('w', 'hu', 'h') }} +{{ vmulwod('d', 'w') }} +{{ vmulwod('d', 'wu') }} +{{ vmulwod('d', 'wu', 'w') }} +{{ vmulwod('q', 'd') }} +{{ vmulwod('q', 'du') }} +{{ vmulwod('q', 'du', 'd') }} + {{ vneg('b') }} {{ vneg('h') }} {{ vneg('w') }} diff --git a/gcc_lsxintrin.h b/gcc_lsxintrin.h index daff3a14..afb67b50 100644 --- a/gcc_lsxintrin.h +++ b/gcc_lsxintrin.h @@ -571,7 +571,7 @@ __m128i __lsx_vsrai_w (__m128i, imm0_31); __m128i __lsx_vsran_b_h (__m128i, __m128i); __m128i __lsx_vsran_h_w (__m128i, __m128i); __m128i __lsx_vsrani_b_h (__m128i, __m128i, imm0_15); -__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127) +__m128i __lsx_vsrani_d_q (__m128i, __m128i, imm0_127); __m128i __lsx_vsrani_h_w (__m128i, __m128i, imm0_31); __m128i __lsx_vsrani_w_d (__m128i, __m128i, imm0_63); __m128i __lsx_vsran_w_d (__m128i, __m128i); @@ -601,7 +601,7 @@ __m128i __lsx_vsrli_w (__m128i, imm0_31); __m128i __lsx_vsrln_b_h (__m128i, __m128i); __m128i __lsx_vsrln_h_w (__m128i, __m128i); __m128i __lsx_vsrlni_b_h (__m128i, __m128i, imm0_15); -__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127) +__m128i __lsx_vsrlni_d_q (__m128i, __m128i, imm0_127); __m128i __lsx_vsrlni_h_w (__m128i, __m128i, imm0_31); __m128i __lsx_vsrlni_w_d (__m128i, __m128i, imm0_63); __m128i __lsx_vsrln_w_d (__m128i, __m128i); @@ -627,7 +627,7 @@ __m128i __lsx_vssran_hu_w (__m128i, __m128i); __m128i __lsx_vssran_h_w (__m128i, __m128i); __m128i __lsx_vssrani_b_h (__m128i, __m128i, imm0_15); __m128i __lsx_vssrani_bu_h (__m128i, __m128i, imm0_15); -__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127) +__m128i __lsx_vssrani_d_q (__m128i, __m128i, imm0_127); __m128i __lsx_vssrani_du_q (__m128i, __m128i, imm0_127) __m128i __lsx_vssrani_hu_w (__m128i, __m128i, imm0_31); __m128i __lsx_vssrani_h_w (__m128i, __m128i, imm0_31); @@ -641,8 +641,8 @@ __m128i __lsx_vssrarn_hu_w (__m128i, __m128i); __m128i __lsx_vssrarn_h_w (__m128i, __m128i); __m128i __lsx_vssrarni_b_h (__m128i, __m128i, imm0_15); __m128i __lsx_vssrarni_bu_h (__m128i, __m128i, imm0_15); -__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127) -__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127) +__m128i __lsx_vssrarni_d_q (__m128i, __m128i, imm0_127); +__m128i __lsx_vssrarni_du_q (__m128i, __m128i, imm0_127); __m128i __lsx_vssrarni_hu_w (__m128i, __m128i, imm0_31); __m128i __lsx_vssrarni_h_w (__m128i, __m128i, imm0_31); __m128i __lsx_vssrarni_w_d (__m128i, __m128i, imm0_63); @@ -655,7 +655,7 @@ __m128i __lsx_vssrln_hu_w (__m128i, __m128i); __m128i __lsx_vssrln_h_w (__m128i, __m128i); __m128i __lsx_vssrlni_b_h (__m128i, __m128i, imm0_15); __m128i __lsx_vssrlni_bu_h (__m128i, __m128i, imm0_15); -__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127) +__m128i __lsx_vssrlni_d_q (__m128i, __m128i, imm0_127); __m128i __lsx_vssrlni_du_q (__m128i, __m128i, imm0_127) __m128i __lsx_vssrlni_hu_w (__m128i, __m128i, imm0_31); __m128i __lsx_vssrlni_h_w (__m128i, __m128i, imm0_31); @@ -669,8 +669,8 @@ __m128i __lsx_vssrlrn_hu_w (__m128i, __m128i); __m128i __lsx_vssrlrn_h_w (__m128i, __m128i); __m128i __lsx_vssrlrni_b_h (__m128i, __m128i, imm0_15); __m128i __lsx_vssrlrni_bu_h (__m128i, __m128i, imm0_15); -__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127) -__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127) +__m128i __lsx_vssrlrni_d_q (__m128i, __m128i, imm0_127); +__m128i __lsx_vssrlrni_du_q (__m128i, __m128i, imm0_127); __m128i __lsx_vssrlrni_hu_w (__m128i, __m128i, imm0_31); __m128i __lsx_vssrlrni_h_w (__m128i, __m128i, imm0_31); __m128i __lsx_vssrlrni_w_d (__m128i, __m128i, imm0_63); diff --git a/main.py b/main.py index d1007ebd..2748a93c 100644 --- a/main.py +++ b/main.py @@ -140,7 +140,7 @@ def vadd_mul_sub_w_ev_od(op, desc, even_odd, wide, narrow, narrow2=None): suffix = "od" return instruction( intrinsic=f"__m128i __lsx_v{op}w{suffix}_{wide}_{narrow}{intrinsic_suffix} (__m128i a, __m128i b)", - instr=f"v{op}wev.{wide}.{narrow}{inst_suffix} vr, vr, vr", + instr=f"v{op}w{suffix}.{wide}.{narrow}{inst_suffix} vr, vr, vr", desc=f"{desc} {even_odd}-positioned {signedness} {narrow_width}-bit elements in `a` and {signedness2} elements in `b`, save the {wide_width}-bit result in `dst`.", ) @@ -269,7 +269,7 @@ def vdiv(name): width = widths[name] signedness = signednesses[name] return instruction( - intrinsic=f"__m128i __lsx_div_{name} (__m128i a, __m128i b)", + intrinsic=f"__m128i __lsx_vdiv_{name} (__m128i a, __m128i b)", instr=f"vdiv.{name} vr, vr, vr", desc=f"Divide {signedness} {width}-bit elements in `a` by elements in `b`.", )