Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix layout of non-power-of-two length vectors #422

Merged
merged 8 commits into from
Aug 13, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 0 additions & 28 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -245,34 +245,6 @@ jobs:
- name: Test (release)
run: cross test --verbose --target=${{ matrix.target }} --release

features:
name: "Test cargo features (${{ matrix.simd }} × ${{ matrix.features }})"
runs-on: ubuntu-latest
strategy:
fail-fast: false
matrix:
simd:
- ""
- "avx512"
features:
- ""
- "--features std"
- "--features all_lane_counts"
programmerjake marked this conversation as resolved.
Show resolved Hide resolved
- "--all-features"

steps:
- uses: actions/checkout@v2
- name: Detect AVX512
run: echo "CPU_FEATURE=$(lscpu | grep -o avx512[a-z]* | sed s/avx/+avx/ | tr '\n' ',' )" >> $GITHUB_ENV
- name: Check build
if: ${{ matrix.simd == '' }}
run: RUSTFLAGS="-Dwarnings" cargo test --all-targets --no-default-features ${{ matrix.features }}
- name: Check AVX
if: ${{ matrix.simd == 'avx512' && contains(env.CPU_FEATURE, 'avx512') }}
run: |
echo "Found AVX features: $CPU_FEATURE"
RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo test --all-targets --no-default-features ${{ matrix.features }}

miri:
runs-on: ubuntu-latest
steps:
Expand Down
3 changes: 1 addition & 2 deletions crates/core_simd/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,9 @@ categories = ["hardware-support", "no-std"]
license = "MIT OR Apache-2.0"

[features]
default = ["as_crate"]
default = ["as_crate", "std"]
as_crate = []
std = []
all_lane_counts = []

[target.'cfg(target_arch = "wasm32")'.dev-dependencies]
wasm-bindgen = "0.2"
Expand Down
8 changes: 3 additions & 5 deletions crates/core_simd/src/lane_count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,8 @@ macro_rules! supported_lane_count {
};
}

supported_lane_count!(1, 2, 4, 8, 16, 32, 64);
#[cfg(feature = "all_lane_counts")]
supported_lane_count!(
3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
56, 57, 58, 59, 60, 61, 62, 63
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
);
25 changes: 20 additions & 5 deletions crates/core_simd/src/ops.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ macro_rules! int_divrem_guard {
( $lhs:ident,
$rhs:ident,
{ const PANIC_ZERO: &'static str = $zero:literal;
$simd_call:ident
$simd_call:ident, $op:tt
},
$int:ident ) => {
if $rhs.simd_eq(Simd::splat(0 as _)).any() {
Expand All @@ -96,8 +96,23 @@ macro_rules! int_divrem_guard {
// Nice base case to make it easy to const-fold away the other branch.
$rhs
};
// Safety: $lhs and rhs are vectors
unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }

// aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two
// these operations aren't vectorized on aarch64 anyway
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These are LLVM backend bugs, right? simd_div/simd_rem still should work the same on all targets?

That seems worth tracking somewhere, having subtly buggy intrinsics is no good.

Copy link
Member

@programmerjake programmerjake Aug 7, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

also, theoretically LLVM should be able to generate SIMD code for division/remainder by a constant, by using the exact same fancy math as it would use for scalars (which it unfortunately currently does after scalarization of div ops for non-power-of-2 vectors), so once LLVM's bugs are fixed, I think we should switch back to generating SIMD ops.

https://clang.godbolt.org/z/MxK47TWGs

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, these are definitely backend bugs

#[cfg(target_arch = "aarch64")]
{
let mut out = Simd::splat(0 as _);
for i in 0..Self::LEN {
out[i] = $lhs[i] $op rhs[i];
}
out
}

#[cfg(not(target_arch = "aarch64"))]
{
// Safety: $lhs and rhs are vectors
unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
}
}
};
}
Expand Down Expand Up @@ -205,14 +220,14 @@ for_base_ops! {
impl Div::div {
int_divrem_guard {
const PANIC_ZERO: &'static str = "attempt to divide by zero";
simd_div
simd_div, /
}
}

impl Rem::rem {
int_divrem_guard {
const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
simd_rem
simd_rem, %
}
}

Expand Down
28 changes: 28 additions & 0 deletions crates/core_simd/src/simd/num/float.rs
Original file line number Diff line number Diff line change
Expand Up @@ -255,13 +255,41 @@ macro_rules! impl_trait {
type Bits = Simd<$bits_ty, N>;
type Cast<T: SimdElement> = Simd<T, N>;

#[cfg(not(target_arch = "aarch64"))]
#[inline]
fn cast<T: SimdCast>(self) -> Self::Cast<T>
{
// Safety: supported types are guaranteed by SimdCast
unsafe { core::intrinsics::simd::simd_as(self) }
}

// https://github.com/llvm/llvm-project/issues/94694
#[cfg(target_arch = "aarch64")]
#[inline]
fn cast<T: SimdCast>(self) -> Self::Cast<T>
{
const { assert!(N <= 64) };
if N <= 2 || N == 4 || N == 8 || N == 16 || N == 32 || N == 64 {
// Safety: supported types are guaranteed by SimdCast
unsafe { core::intrinsics::simd::simd_as(self) }
} else if N < 4 {
let x = self.resize::<4>(Default::default()).cast();
x.resize::<N>(x[0])
} else if N < 8 {
let x = self.resize::<8>(Default::default()).cast();
x.resize::<N>(x[0])
} else if N < 16 {
let x = self.resize::<16>(Default::default()).cast();
x.resize::<N>(x[0])
} else if N < 32 {
let x = self.resize::<32>(Default::default()).cast();
x.resize::<N>(x[0])
} else {
let x = self.resize::<64>(Default::default()).cast();
x.resize::<N>(x[0])
}
}

#[inline]
#[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>
Expand Down
2 changes: 1 addition & 1 deletion crates/core_simd/src/vector.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ use crate::simd::{
// directly constructing an instance of the type (i.e. `let vector = Simd(array)`) should be
// avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also
// causes rustc to emit illegal LLVM IR in some cases.
#[repr(simd)]
#[repr(simd, packed)]
Copy link
Member

@RalfJung RalfJung Jun 8, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the plan for simd without packed? Miri currently ICEs when such a type is used with a simd intrinsic and the size is not a power of 2. If portable-simd doesn't need support for that then do we need to have it at all? Can we just make simd itself have the behavior that simd, packed now has?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

simd without packed is used by stdarch. portable-simd might use it in the future too, though imo probably won't.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAIK stdarch only uses power-of-2 vectors, where packed makes no difference?

pub struct Simd<T, const N: usize>([T; N])
where
LaneCount<N>: SupportedLaneCount,
Expand Down
35 changes: 35 additions & 0 deletions crates/core_simd/tests/layout.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#![feature(portable_simd)]

macro_rules! layout_tests {
{ $($mod:ident, $ty:ty,)* } => {
$(
mod $mod {
test_helpers::test_lanes! {
fn no_padding<const LANES: usize>() {
assert_eq!(
core::mem::size_of::<core_simd::simd::Simd::<$ty, LANES>>(),
core::mem::size_of::<[$ty; LANES]>(),
);
}
}
}
)*
}
}

layout_tests! {
i8, i8,
i16, i16,
i32, i32,
i64, i64,
isize, isize,
u8, u8,
u16, u16,
u32, u32,
u64, u64,
usize, usize,
f32, f32,
f64, f64,
mut_ptr, *mut (),
const_ptr, *const (),
}
1 change: 0 additions & 1 deletion crates/core_simd/tests/masks.rs
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ macro_rules! test_mask_api {
assert_eq!(Mask::<$type, 2>::from_bitmask(bitmask), mask);
}

#[cfg(feature = "all_lane_counts")]
#[test]
fn roundtrip_bitmask_conversion_odd() {
let values = [
Expand Down
3 changes: 0 additions & 3 deletions crates/test_helpers/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,3 @@ publish = false

[dependencies]
proptest = { version = "0.10", default-features = false, features = ["alloc"] }

[features]
all_lane_counts = []
53 changes: 17 additions & 36 deletions crates/test_helpers/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -539,39 +539,27 @@ macro_rules! test_lanes {
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
lanes_1 1;
lanes_2 2;
);

#[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
$crate::test_lanes_helper!(
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
lanes_4 4;
lanes_8 8;
lanes_16 16;
lanes_32 32;
lanes_64 64;
);

#[cfg(feature = "all_lane_counts")]
$crate::test_lanes_helper!(
// test one non-power-of-2 length on miri
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
lanes_3 3;
calebzulawski marked this conversation as resolved.
Show resolved Hide resolved

lanes_6 6;
);

#[cfg(feature = "all_lane_counts")]
#[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
$crate::test_lanes_helper!(
#[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
lanes_4 4;
lanes_5 5;
lanes_6 6;

lanes_7 7;
lanes_8 8;
lanes_9 9;
lanes_10 10;
lanes_11 11;
lanes_12 12;
lanes_13 13;
lanes_14 14;
lanes_15 15;
lanes_16 16;
lanes_17 17;
lanes_18 18;
lanes_19 19;
Expand All @@ -587,6 +575,7 @@ macro_rules! test_lanes {
lanes_29 29;
lanes_30 30;
lanes_31 31;
lanes_32 32;
lanes_33 33;
lanes_34 34;
lanes_35 35;
Expand Down Expand Up @@ -618,6 +607,7 @@ macro_rules! test_lanes {
lanes_61 61;
lanes_62 62;
lanes_63 63;
lanes_64 64;
);
}
)*
Expand All @@ -639,43 +629,32 @@ macro_rules! test_lanes_panic {
core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount,
$body

// test some odd and even non-power-of-2 lengths on miri
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This doesn't have any even non-power-of-2. Maybe replace 5 by 6?

(Though I am also not sure why odd vs even would be an interesting difference here.)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

if you look down further it tests length 3 on miri, the idea is we want to catch bugs caused by repr(simd, packed) having alignment smaller than repr(simd) which only happens for non-power-of-2 sizes. even non-power-of-2 sizes cover where the alignment is in between the element alignment and the non-packed alignment. @calebzulawski can you add back in length 6 since that's the smallest length where that occurs?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah 3 and 6 would probably be reasonable then. To cut down on CI times I'd remove 5.

$crate::test_lanes_helper!(
#[should_panic];
lanes_1 1;
lanes_2 2;
lanes_4 4;
);

#[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
$crate::test_lanes_helper!(
#[should_panic];
lanes_8 8;
lanes_16 16;
lanes_32 32;
lanes_64 64;
);

#[cfg(feature = "all_lane_counts")]
$crate::test_lanes_helper!(
// test some odd and even non-power-of-2 lengths on miri
#[should_panic];
lanes_3 3;
lanes_5 5;

lanes_6 6;
);

#[cfg(feature = "all_lane_counts")]
#[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
$crate::test_lanes_helper!(
#[should_panic];
lanes_4 4;
lanes_5 5;

lanes_7 7;
lanes_8 8;
lanes_9 9;
lanes_10 10;
lanes_11 11;
lanes_12 12;
lanes_13 13;
lanes_14 14;
lanes_15 15;
lanes_16 16;
lanes_17 17;
lanes_18 18;
lanes_19 19;
Expand All @@ -691,6 +670,7 @@ macro_rules! test_lanes_panic {
lanes_29 29;
lanes_30 30;
lanes_31 31;
lanes_32 32;
lanes_33 33;
lanes_34 34;
lanes_35 35;
Expand Down Expand Up @@ -722,6 +702,7 @@ macro_rules! test_lanes_panic {
lanes_61 61;
lanes_62 62;
lanes_63 63;
lanes_64 64;
);
}
)*
Expand Down
Loading