Skip to content

Commit

Permalink
replace map with explicit array after reading warning about bad debug…
Browse files Browse the repository at this point in the history
… perf
  • Loading branch information
mcroomp committed May 8, 2024
1 parent 67391dd commit c96eb85
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 9 deletions.
31 changes: 28 additions & 3 deletions src/i16x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,18 @@ impl i16x8 {
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
unsafe {Self { neon: vabsq_s16(self.neon) }}
} else {
self.is_negative().blend(self.neg(), self)
let arr: [i16; 8] = cast(self);
cast(
[
arr[0].wrapping_abs(),
arr[1].wrapping_abs(),
arr[2].wrapping_abs(),
arr[3].wrapping_abs(),
arr[4].wrapping_abs(),
arr[5].wrapping_abs(),
arr[6].wrapping_abs(),
arr[7].wrapping_abs(),
])
}
}
}
Expand All @@ -705,18 +716,32 @@ impl i16x8 {
#[must_use]
pub fn unsigned_abs(self) -> u16x8 {
pick! {
if #[cfg(target_feature="ssse3")] {
if #[cfg(target_feature="sse2")] {
let mask = shr_imm_i16_m128i::<15>(self.sse);
u16x8 { sse: bitxor_m128i(add_i16_m128i(self.sse, mask), mask) }
} else if #[cfg(target_feature="ssse3")] {
u16x8 { sse: abs_i16_m128i(self.sse) }
} else if #[cfg(target_feature="simd128")] {
u16x8 { simd: i16x8_abs(self.simd) }
} else if #[cfg(all(target_feature="neon",target_arch="aarch64"))]{
unsafe {u16x8 { neon: vreinterpretq_u16_s16(vabsq_s16(self.neon)) }}
} else {
let arr: [i16; 8] = cast(self);
cast(arr.map(|x| x.unsigned_abs()))
cast(
[
arr[0].unsigned_abs(),
arr[1].unsigned_abs(),
arr[2].unsigned_abs(),
arr[3].unsigned_abs(),
arr[4].unsigned_abs(),
arr[5].unsigned_abs(),
arr[6].unsigned_abs(),
arr[7].unsigned_abs(),
])
}
}
}

#[inline]
#[must_use]
pub fn max(self, rhs: Self) -> Self {
Expand Down
7 changes: 6 additions & 1 deletion src/i32x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -451,7 +451,12 @@ impl i32x4 {
unsafe {u32x4 { neon: vreinterpretq_u32_s32(vabsq_s32(self.neon)) }}
} else {
let arr: [i32; 4] = cast(self);
cast(arr.map(|x| x.unsigned_abs()))
cast([
arr[0].unsigned_abs(),
arr[1].unsigned_abs(),
arr[2].unsigned_abs(),
arr[3].unsigned_abs(),
])
}
}
}
Expand Down
12 changes: 10 additions & 2 deletions src/i64x2_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,7 +409,11 @@ impl i64x2 {
unsafe {Self { neon: vabsq_s64(self.neon) }}
} else {
let arr: [i64; 2] = cast(self);
cast(arr.map(|x| x.wrapping_abs()))
cast(
[
arr[0].wrapping_abs(),
arr[1].wrapping_abs(),
])
}
}
}
Expand All @@ -425,7 +429,11 @@ impl i64x2 {
unsafe {u64x2 { neon: vreinterpretq_u64_s64(vabsq_s64(self.neon)) }}
} else {
let arr: [i64; 2] = cast(self);
cast(arr.map(|x| x.unsigned_abs()))
cast(
[
arr[0].unsigned_abs(),
arr[1].unsigned_abs(),
])
}
}
}
Expand Down
16 changes: 14 additions & 2 deletions src/i64x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,13 @@ impl i64x4 {
if #[cfg(target_feature="avx2")] {
// avx x86 doesn't have this builtin
let arr: [i64; 4] = cast(self);
Self { avx2: cast(arr.map( |x| x.wrapping_abs())) }
cast(
[
arr[0].wrapping_abs(),
arr[1].wrapping_abs(),
arr[2].wrapping_abs(),
arr[3].wrapping_abs(),
])
} else {
Self {
a : self.a.abs(),
Expand All @@ -332,7 +338,13 @@ impl i64x4 {
if #[cfg(target_feature="avx2")] {
// avx x86 doesn't have this builtin
let arr: [i64; 4] = cast(self);
u64x4 { avx2: cast(arr.map( |x| x.unsigned_abs())) }
cast(
[
arr[0].unsigned_abs(),
arr[1].unsigned_abs(),
arr[2].unsigned_abs(),
arr[3].unsigned_abs(),
])
} else {
u64x4 {
a : self.a.unsigned_abs(),
Expand Down
19 changes: 18 additions & 1 deletion src/i8x16_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -541,7 +541,24 @@ impl i8x16 {
} else {
let arr: [i8; 16] = cast(self);
cast(
arr.map(|x| x.unsigned_abs()))
[
arr[0].unsigned_abs(),
arr[1].unsigned_abs(),
arr[2].unsigned_abs(),
arr[3].unsigned_abs(),
arr[4].unsigned_abs(),
arr[5].unsigned_abs(),
arr[6].unsigned_abs(),
arr[7].unsigned_abs(),
arr[8].unsigned_abs(),
arr[9].unsigned_abs(),
arr[10].unsigned_abs(),
arr[11].unsigned_abs(),
arr[12].unsigned_abs(),
arr[13].unsigned_abs(),
arr[14].unsigned_abs(),
arr[15].unsigned_abs(),
])
}
}
}
Expand Down

0 comments on commit c96eb85

Please sign in to comment.