diff --git a/src/i16x8_.rs b/src/i16x8_.rs index 38addaf3..43404d2c 100644 --- a/src/i16x8_.rs +++ b/src/i16x8_.rs @@ -696,7 +696,18 @@ impl i16x8 { } else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{ unsafe {Self { neon: vabsq_s16(self.neon) }} } else { - self.is_negative().blend(self.neg(), self) + let arr: [i16; 8] = cast(self); + cast( + [ + arr[0].wrapping_abs(), + arr[1].wrapping_abs(), + arr[2].wrapping_abs(), + arr[3].wrapping_abs(), + arr[4].wrapping_abs(), + arr[5].wrapping_abs(), + arr[6].wrapping_abs(), + arr[7].wrapping_abs(), + ]) } } } @@ -705,7 +716,10 @@ impl i16x8 { #[must_use] pub fn unsigned_abs(self) -> u16x8 { pick! { - if #[cfg(target_feature="ssse3")] { + if #[cfg(target_feature="sse2")] { + let mask = shr_imm_i16_m128i::<15>(self.sse); + u16x8 { sse: bitxor_m128i(add_i16_m128i(self.sse, mask), mask) } + } else if #[cfg(target_feature="ssse3")] { u16x8 { sse: abs_i16_m128i(self.sse) } } else if #[cfg(target_feature="simd128")] { u16x8 { simd: i16x8_abs(self.simd) } @@ -713,10 +727,21 @@ impl i16x8 { unsafe {u16x8 { neon: vreinterpretq_u16_s16(vabsq_s16(self.neon)) }} } else { let arr: [i16; 8] = cast(self); - cast(arr.map(|x| x.unsigned_abs())) + cast( + [ + arr[0].unsigned_abs(), + arr[1].unsigned_abs(), + arr[2].unsigned_abs(), + arr[3].unsigned_abs(), + arr[4].unsigned_abs(), + arr[5].unsigned_abs(), + arr[6].unsigned_abs(), + arr[7].unsigned_abs(), + ]) } } } + #[inline] #[must_use] pub fn max(self, rhs: Self) -> Self { diff --git a/src/i32x4_.rs b/src/i32x4_.rs index 18467a73..a93caa51 100644 --- a/src/i32x4_.rs +++ b/src/i32x4_.rs @@ -451,7 +451,12 @@ impl i32x4 { unsafe {u32x4 { neon: vreinterpretq_u32_s32(vabsq_s32(self.neon)) }} } else { let arr: [i32; 4] = cast(self); - cast(arr.map(|x| x.unsigned_abs())) + cast([ + arr[0].unsigned_abs(), + arr[1].unsigned_abs(), + arr[2].unsigned_abs(), + arr[3].unsigned_abs(), + ]) } } } diff --git a/src/i64x2_.rs b/src/i64x2_.rs index 449801ec..1207a39c 100644 --- a/src/i64x2_.rs +++ b/src/i64x2_.rs @@ -409,7 +409,11 @@ impl i64x2 { unsafe {Self { neon: vabsq_s64(self.neon) }} } else { let arr: [i64; 2] = cast(self); - cast(arr.map(|x| x.wrapping_abs())) + cast( + [ + arr[0].wrapping_abs(), + arr[1].wrapping_abs(), + ]) } } } @@ -425,7 +429,11 @@ impl i64x2 { unsafe {u64x2 { neon: vreinterpretq_u64_s64(vabsq_s64(self.neon)) }} } else { let arr: [i64; 2] = cast(self); - cast(arr.map(|x| x.unsigned_abs())) + cast( + [ + arr[0].unsigned_abs(), + arr[1].unsigned_abs(), + ]) } } } diff --git a/src/i64x4_.rs b/src/i64x4_.rs index b6fc74db..b0495f15 100644 --- a/src/i64x4_.rs +++ b/src/i64x4_.rs @@ -315,7 +315,13 @@ impl i64x4 { if #[cfg(target_feature="avx2")] { // avx x86 doesn't have this builtin let arr: [i64; 4] = cast(self); - Self { avx2: cast(arr.map( |x| x.wrapping_abs())) } + cast( + [ + arr[0].wrapping_abs(), + arr[1].wrapping_abs(), + arr[2].wrapping_abs(), + arr[3].wrapping_abs(), + ]) } else { Self { a : self.a.abs(), @@ -332,7 +338,13 @@ impl i64x4 { if #[cfg(target_feature="avx2")] { // avx x86 doesn't have this builtin let arr: [i64; 4] = cast(self); - u64x4 { avx2: cast(arr.map( |x| x.unsigned_abs())) } + cast( + [ + arr[0].unsigned_abs(), + arr[1].unsigned_abs(), + arr[2].unsigned_abs(), + arr[3].unsigned_abs(), + ]) } else { u64x4 { a : self.a.unsigned_abs(), diff --git a/src/i8x16_.rs b/src/i8x16_.rs index b084f3c5..76df8a11 100644 --- a/src/i8x16_.rs +++ b/src/i8x16_.rs @@ -541,7 +541,24 @@ impl i8x16 { } else { let arr: [i8; 16] = cast(self); cast( - arr.map(|x| x.unsigned_abs())) + [ + arr[0].unsigned_abs(), + arr[1].unsigned_abs(), + arr[2].unsigned_abs(), + arr[3].unsigned_abs(), + arr[4].unsigned_abs(), + arr[5].unsigned_abs(), + arr[6].unsigned_abs(), + arr[7].unsigned_abs(), + arr[8].unsigned_abs(), + arr[9].unsigned_abs(), + arr[10].unsigned_abs(), + arr[11].unsigned_abs(), + arr[12].unsigned_abs(), + arr[13].unsigned_abs(), + arr[14].unsigned_abs(), + arr[15].unsigned_abs(), + ]) } } }