Skip to content

Commit

Permalink
Add missing float_conversion
Browse files Browse the repository at this point in the history
  • Loading branch information
jiegec committed Dec 14, 2023
1 parent 95e8144 commit 0965215
Show file tree
Hide file tree
Showing 12 changed files with 110 additions and 15 deletions.
7 changes: 7 additions & 0 deletions code/xvfcvt_h_s.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
for (int i = 0; i < 16; i++) {
if (i < 8) {
dst.fp16[i] = b.fp32[i];
} else {
dst.fp16[i] = a.fp32[i - 8];
}
}
7 changes: 7 additions & 0 deletions code/xvfcvt_s_d.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
for (int i = 0; i < 8; i++) {
if (i < 4) {
dst.fp32[i] = b.fp64[i];
} else {
dst.fp32[i] = a.fp64[i - 4];
}
}
3 changes: 3 additions & 0 deletions code/xvfcvth_d_s.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.fp64[i] = a.fp32[4 + i];
}
3 changes: 3 additions & 0 deletions code/xvfcvth_s_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.fp32[i] = a.fp16[8 + i];
}
3 changes: 3 additions & 0 deletions code/xvfcvtl_d_s.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.fp64[i] = a.fp32[i];
}
3 changes: 3 additions & 0 deletions code/xvfcvtl_s_h.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 8; i++) {
dst.fp32[i] = a.fp16[i];
}
5 changes: 5 additions & 0 deletions code/xvffint_s_l.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
for (int i = 0; i < 8; i++) {
dst.fp32[i] =
(i < 4) ? (f32)(s32)a.dword[i]
: (f32)(s32)b.dword[i]; // rounding mode is not expressed in C
}
3 changes: 3 additions & 0 deletions code/xvffinth_d_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.fp64[i] = (f64)(s32)a.word[i + 4]; // rounding mode is not expressed in C
}
3 changes: 3 additions & 0 deletions code/xvffintl_d_w.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
for (int i = 0; i < 4; i++) {
dst.fp64[i] = (f64)(s32)a.word[i]; // rounding mode is not expressed in C
}
54 changes: 54 additions & 0 deletions docs/lasx/float_conversion.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Floating Point Conversion

{{ xvfcvth_d_s() }}
{{ xvfcvtl_d_s() }}

{{ xvfcvt_s_d() }}

{{ xvfcvth_s_h() }}
{{ xvfcvtl_s_h() }}

{{ xvfcvt_h_s() }}

{{ xvffint_d_w('h') }}
{{ xvffint_d_w('l') }}

{{ xvffint('d', 'l') }}
{{ xvffint('d', 'lu') }}
{{ xvffint('s', 'w') }}
{{ xvffint('s', 'wu') }}

{{ xvffint_s_l() }}

{{ xvftint_l_s('', 'l') }}
{{ xvftint_l_s('', 'h') }}
{{ xvftint_l_s('rm', 'l') }}
{{ xvftint_l_s('rm', 'h') }}
{{ xvftint_l_s('rp', 'l') }}
{{ xvftint_l_s('rp', 'h') }}
{{ xvftint_l_s('rz', 'l') }}
{{ xvftint_l_s('rz', 'h') }}
{{ xvftint_l_s('rne', 'l') }}
{{ xvftint_l_s('rne', 'h') }}

{{ xvftint('', 'l', 'd') }}
{{ xvftint('', 'w', 's') }}
{{ xvftint('rm', 'l', 'd') }}
{{ xvftint('rm', 'w', 's') }}
{{ xvftint('rp', 'l', 'd') }}
{{ xvftint('rp', 'w', 's') }}
{{ xvftint('rz', 'l', 'd') }}
{{ xvftint('rz', 'w', 's') }}
{{ xvftint('rne', 'l', 'd') }}
{{ xvftint('rne', 'w', 's') }}

{{ xvftint('', 'lu', 'd') }}
{{ xvftint('', 'wu', 's') }}
{{ xvftint('rz', 'lu', 'd') }}
{{ xvftint('rz', 'wu', 's') }}

{{ xvftint_w_d('') }}
{{ xvftint_w_d('rm') }}
{{ xvftint_w_d('rp') }}
{{ xvftint_w_d('rz') }}
{{ xvftint_w_d('rne') }}
24 changes: 12 additions & 12 deletions gcc_lasxintrin.h
Original file line number Diff line number Diff line change
Expand Up @@ -228,14 +228,14 @@ __m256 __lasx_xvfnmsub_s (__m256, __m256, __m256);
__m256d __lasx_xvfrecip_d (__m256d);
__m256 __lasx_xvfrecip_s (__m256);
__m256d __lasx_xvfrint_d (__m256d);
__m256i __lasx_xvfrintrm_d (__m256d);
__m256i __lasx_xvfrintrm_s (__m256);
__m256i __lasx_xvfrintrne_d (__m256d);
__m256i __lasx_xvfrintrne_s (__m256);
__m256i __lasx_xvfrintrp_d (__m256d);
__m256i __lasx_xvfrintrp_s (__m256);
__m256i __lasx_xvfrintrz_d (__m256d);
__m256i __lasx_xvfrintrz_s (__m256);
__m256d __lasx_xvfrintrm_d (__m256d);
__m256 __lasx_xvfrintrm_s (__m256);
__m256d __lasx_xvfrintrne_d (__m256d);
__m256 __lasx_xvfrintrne_s (__m256);
__m256d __lasx_xvfrintrp_d (__m256d);
__m256 __lasx_xvfrintrp_s (__m256);
__m256d __lasx_xvfrintrz_d (__m256d);
__m256 __lasx_xvfrintrz_s (__m256);
__m256 __lasx_xvfrint_s (__m256);
__m256d __lasx_xvfrsqrt_d (__m256d);
__m256 __lasx_xvfrsqrt_s (__m256);
Expand Down Expand Up @@ -702,10 +702,10 @@ __m256i __lasx_xvssub_hu (__m256i, __m256i);
__m256i __lasx_xvssub_w (__m256i, __m256i);
__m256i __lasx_xvssub_wu (__m256i, __m256i);
void __lasx_xvst (__m256i, void *, imm_n2048_2047);
void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, idx);
void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, idx);
void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, idx);
void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, idx);
void __lasx_xvstelm_b (__m256i, void *, imm_n128_127, imm0_15);
void __lasx_xvstelm_d (__m256i, void *, imm_n128_127, imm0_1);
void __lasx_xvstelm_h (__m256i, void *, imm_n128_127, imm0_7);
void __lasx_xvstelm_w (__m256i, void *, imm_n128_127, imm0_3);
void __lasx_xvstx (__m256i, void *, long int);
__m256i __lasx_xvsub_b (__m256i, __m256i);
__m256i __lasx_xvsub_d (__m256i, __m256i);
Expand Down
10 changes: 7 additions & 3 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,21 @@

# dirty way to reduce code
cur_simd = "lsx"
cur_vlen = 128

# depends on implementation of env.macro()
def my_macro(env):
def wrap(fn):
def vfn(*args):
global cur_simd
global cur_simd, cur_vlen
cur_simd = "lsx"
cur_vlen = 128
return fn(*args)
env.macros[f"{fn.__name__}"] = vfn
def xvfn(*args):
global cur_simd
global cur_simd, cur_vlen
cur_simd = "lasx"
cur_vlen = 256
return fn(*args)
env.macros[f"x{fn.__name__}"] = xvfn
return fn
Expand Down Expand Up @@ -1031,8 +1034,9 @@ def vpickve2gr(name):
"d": "long int",
"du": "unsigned long int",
}[name]
global cur_vlen
return instruction(
intrinsic=f"{return_type} __lsx_vpickve2gr_{name} (__m128i a, imm0_{128 // width - 1} idx)",
intrinsic=f"{return_type} __lsx_vpickve2gr_{name} (__m128i a, imm0_{cur_vlen // width - 1} idx)",
instr=f"vpickve2gr.{name} r, vr, imm",
desc=f"Pick the `lane` specified by `idx` from `a` and store into `dst`.",
)
Expand Down

0 comments on commit 0965215

Please sign in to comment.