Skip to content

Commit

Permalink
Rework SIMD zeroing
Browse files Browse the repository at this point in the history
  • Loading branch information
TDecking authored and Amanieu committed Nov 30, 2024
1 parent 44db3a5 commit 1bbe5d7
Show file tree
Hide file tree
Showing 20 changed files with 1,468 additions and 2,906 deletions.
3 changes: 3 additions & 0 deletions crates/core_arch/src/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@ macro_rules! simd_ty {

#[allow(clippy::use_self)]
impl $id {
/// A value of this type where all elements are zeroed out.
pub(crate) const ZERO: Self = unsafe { crate::mem::zeroed() };

#[inline(always)]
pub(crate) const fn new($($param_name: $elem_type),*) -> Self {
$id([$($param_name),*])
Expand Down
14 changes: 7 additions & 7 deletions crates/core_arch/src/wasm32/simd128.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2232,7 +2232,7 @@ pub fn v128_any_true(a: v128) -> bool {
pub fn i8x16_abs(a: v128) -> v128 {
unsafe {
let a = a.as_i8x16();
let zero = simd::i8x16::splat(0);
let zero = simd::i8x16::ZERO;
simd_select::<simd::m8x16, simd::i8x16>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
}
}
Expand Down Expand Up @@ -2524,7 +2524,7 @@ pub use i16x8_extadd_pairwise_u8x16 as u16x8_extadd_pairwise_u8x16;
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn i16x8_abs(a: v128) -> v128 {
let a = a.as_i16x8();
let zero = simd::i16x8::splat(0);
let zero = simd::i16x8::ZERO;
unsafe {
simd_select::<simd::m16x8, simd::i16x8>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
}
Expand Down Expand Up @@ -3012,7 +3012,7 @@ pub use i32x4_extadd_pairwise_u16x8 as u32x4_extadd_pairwise_u16x8;
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn i32x4_abs(a: v128) -> v128 {
let a = a.as_i32x4();
let zero = simd::i32x4::splat(0);
let zero = simd::i32x4::ZERO;
unsafe {
simd_select::<simd::m32x4, simd::i32x4>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
}
Expand Down Expand Up @@ -3394,7 +3394,7 @@ pub use i32x4_extmul_high_u16x8 as u32x4_extmul_high_u16x8;
#[stable(feature = "wasm_simd", since = "1.54.0")]
pub fn i64x2_abs(a: v128) -> v128 {
let a = a.as_i64x2();
let zero = simd::i64x2::splat(0);
let zero = simd::i64x2::ZERO;
unsafe {
simd_select::<simd::m64x2, simd::i64x2>(simd_lt(a, zero), simd_sub(zero, a), a).v128()
}
Expand Down Expand Up @@ -4105,7 +4105,7 @@ pub fn i32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
let ret: simd::i32x4 = unsafe {
simd_shuffle!(
llvm_i32x2_trunc_sat_f64x2_s(a.as_f64x2()),
simd::i32x2::splat(0),
simd::i32x2::ZERO,
[0, 1, 2, 3],
)
};
Expand All @@ -4129,7 +4129,7 @@ pub fn u32x4_trunc_sat_f64x2_zero(a: v128) -> v128 {
let ret: simd::i32x4 = unsafe {
simd_shuffle!(
llvm_i32x2_trunc_sat_f64x2_u(a.as_f64x2()),
simd::i32x2::splat(0),
simd::i32x2::ZERO,
[0, 1, 2, 3],
)
};
Expand Down Expand Up @@ -4176,7 +4176,7 @@ pub fn f32x4_demote_f64x2_zero(a: v128) -> v128 {
unsafe {
simd_cast::<simd::f64x4, simd::f32x4>(simd_shuffle!(
a.as_f64x2(),
simd::f64x2::splat(0.0),
simd::f64x2::ZERO,
[0, 1, 2, 3]
))
.v128()
Expand Down
30 changes: 13 additions & 17 deletions crates/core_arch/src/x86/avx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,7 @@ pub unsafe fn _mm256_blend_ps<const IMM8: i32>(a: __m256, b: __m256) -> __m256 {
#[cfg_attr(test, assert_instr(vblendvpd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::splat(0));
let mask: i64x4 = simd_lt(transmute::<_, i64x4>(c), i64x4::ZERO);
transmute(simd_select(mask, b.as_f64x4(), a.as_f64x4()))
}

Expand All @@ -528,7 +528,7 @@ pub unsafe fn _mm256_blendv_pd(a: __m256d, b: __m256d, c: __m256d) -> __m256d {
#[cfg_attr(test, assert_instr(vblendvps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_blendv_ps(a: __m256, b: __m256, c: __m256) -> __m256 {
let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::splat(0));
let mask: i32x8 = simd_lt(transmute::<_, i32x8>(c), i32x8::ZERO);
transmute(simd_select(mask, b.as_f32x8(), a.as_f32x8()))
}

Expand Down Expand Up @@ -983,11 +983,7 @@ pub unsafe fn _mm256_extractf128_pd<const IMM1: i32>(a: __m256d) -> __m128d {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_extractf128_si256<const IMM1: i32>(a: __m256i) -> __m128i {
static_assert_uimm_bits!(IMM1, 1);
let dst: i64x2 = simd_shuffle!(
a.as_i64x4(),
_mm256_undefined_si256().as_i64x4(),
[[0, 1], [2, 3]][IMM1 as usize],
);
let dst: i64x2 = simd_shuffle!(a.as_i64x4(), i64x4::ZERO, [[0, 1], [2, 3]][IMM1 as usize],);
transmute(dst)
}

Expand Down Expand Up @@ -2139,7 +2135,7 @@ pub unsafe fn _mm_testnzc_ps(a: __m128, b: __m128) -> i32 {
pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
// Propagate the highest bit to the rest, because simd_bitmask
// requires all-1 or all-0.
let mask: i64x4 = simd_lt(transmute(a), i64x4::splat(0));
let mask: i64x4 = simd_lt(transmute(a), i64x4::ZERO);
simd_bitmask::<i64x4, u8>(mask).into()
}

Expand All @@ -2155,7 +2151,7 @@ pub unsafe fn _mm256_movemask_pd(a: __m256d) -> i32 {
pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
// Propagate the highest bit to the rest, because simd_bitmask
// requires all-1 or all-0.
let mask: i32x8 = simd_lt(transmute(a), i32x8::splat(0));
let mask: i32x8 = simd_lt(transmute(a), i32x8::ZERO);
simd_bitmask::<i32x8, u8>(mask).into()
}

Expand All @@ -2167,7 +2163,7 @@ pub unsafe fn _mm256_movemask_ps(a: __m256) -> i32 {
#[cfg_attr(test, assert_instr(vxorp))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_setzero_pd() -> __m256d {
_mm256_set1_pd(0.0)
const { mem::zeroed() }
}

/// Returns vector of type __m256 with all elements set to zero.
Expand All @@ -2178,7 +2174,7 @@ pub unsafe fn _mm256_setzero_pd() -> __m256d {
#[cfg_attr(test, assert_instr(vxorps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_setzero_ps() -> __m256 {
_mm256_set1_ps(0.0)
const { mem::zeroed() }
}

/// Returns vector of type __m256i with all elements set to zero.
Expand All @@ -2189,7 +2185,7 @@ pub unsafe fn _mm256_setzero_ps() -> __m256 {
#[cfg_attr(test, assert_instr(vxor))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_setzero_si256() -> __m256i {
_mm256_set1_epi8(0)
const { mem::zeroed() }
}

/// Sets packed double-precision (64-bit) floating-point elements in returned
Expand Down Expand Up @@ -2722,7 +2718,7 @@ pub unsafe fn _mm256_castpd128_pd256(a: __m128d) -> __m256d {
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_castsi128_si256(a: __m128i) -> __m256i {
let a = a.as_i64x2();
let undefined = _mm_undefined_si128().as_i64x2();
let undefined = i64x2::ZERO;
let dst: i64x4 = simd_shuffle!(a, undefined, [0, 1, 2, 2]);
transmute(dst)
}
Expand Down Expand Up @@ -2752,7 +2748,7 @@ pub unsafe fn _mm256_zextps128_ps256(a: __m128) -> __m256 {
// instructions, thus it has zero latency.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_zextsi128_si256(a: __m128i) -> __m256i {
let b = _mm_setzero_si128().as_i64x2();
let b = i64x2::ZERO;
let dst: i64x4 = simd_shuffle!(a.as_i64x2(), b, [0, 1, 2, 3]);
transmute(dst)
}
Expand Down Expand Up @@ -2782,7 +2778,7 @@ pub unsafe fn _mm256_zextpd128_pd256(a: __m128d) -> __m256d {
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_undefined_ps() -> __m256 {
_mm256_set1_ps(0.0)
const { mem::zeroed() }
}

/// Returns vector of type `__m256d` with indeterminate elements.
Expand All @@ -2795,7 +2791,7 @@ pub unsafe fn _mm256_undefined_ps() -> __m256 {
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_undefined_pd() -> __m256d {
_mm256_set1_pd(0.0)
const { mem::zeroed() }
}

/// Returns vector of type __m256i with with indeterminate elements.
Expand All @@ -2808,7 +2804,7 @@ pub unsafe fn _mm256_undefined_pd() -> __m256d {
// This intrinsic has no corresponding instruction.
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_undefined_si256() -> __m256i {
__m256i([0, 0, 0, 0])
const { mem::zeroed() }
}

/// Sets packed __m256 returned vector with the supplied values.
Expand Down
Loading

0 comments on commit 1bbe5d7

Please sign in to comment.