rust-lang · calebzulawski · Aug 13, 2024 · Jun 3, 2024 · Jun 7, 2024 · Aug 7, 2024
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -245,34 +245,6 @@ jobs:
       - name: Test (release)
         run: cross test --verbose --target=${{ matrix.target }} --release
 
-  features:
-    name: "Test cargo features (${{ matrix.simd }} × ${{ matrix.features }})"
-    runs-on: ubuntu-latest
-    strategy:
-      fail-fast: false
-      matrix:
-        simd:
-          - ""
-          - "avx512"
-        features:
-          - ""
-          - "--features std"
-          - "--features all_lane_counts"
-          - "--all-features"
-
-    steps:
-      - uses: actions/checkout@v2
-      - name: Detect AVX512
-        run: echo "CPU_FEATURE=$(lscpu | grep -o avx512[a-z]* | sed s/avx/+avx/ | tr '\n' ',' )" >> $GITHUB_ENV
-      - name: Check build
-        if: ${{ matrix.simd == '' }}
-        run: RUSTFLAGS="-Dwarnings" cargo test --all-targets --no-default-features ${{ matrix.features }}
-      - name: Check AVX
-        if: ${{ matrix.simd == 'avx512' && contains(env.CPU_FEATURE, 'avx512') }}
-        run: |
-          echo "Found AVX features: $CPU_FEATURE"
-          RUSTFLAGS="-Dwarnings -Ctarget-feature=$CPU_FEATURE" cargo test --all-targets --no-default-features ${{ matrix.features }}
-
   miri:
     runs-on: ubuntu-latest
     steps:

diff --git a/crates/core_simd/Cargo.toml b/crates/core_simd/Cargo.toml
@@ -9,10 +9,9 @@ categories = ["hardware-support", "no-std"]
 license = "MIT OR Apache-2.0"
 
 [features]
-default = ["as_crate"]
+default = ["as_crate", "std"]
 as_crate = []
 std = []
-all_lane_counts = []
 
 [target.'cfg(target_arch = "wasm32")'.dev-dependencies]
 wasm-bindgen = "0.2"

diff --git a/crates/core_simd/src/lane_count.rs b/crates/core_simd/src/lane_count.rs
@@ -33,10 +33,8 @@ macro_rules! supported_lane_count {
     };
 }
 
-supported_lane_count!(1, 2, 4, 8, 16, 32, 64);
-#[cfg(feature = "all_lane_counts")]
 supported_lane_count!(
-    3, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
-    31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55,
-    56, 57, 58, 59, 60, 61, 62, 63
+    1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26,
+    27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
+    51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64
 );
diff --git a/crates/core_simd/src/ops.rs b/crates/core_simd/src/ops.rs
@@ -77,7 +77,7 @@ macro_rules! int_divrem_guard {
     (   $lhs:ident,
         $rhs:ident,
         {   const PANIC_ZERO: &'static str = $zero:literal;
-            $simd_call:ident
+            $simd_call:ident, $op:tt
         },
         $int:ident ) => {
         if $rhs.simd_eq(Simd::splat(0 as _)).any() {
@@ -96,8 +96,23 @@ macro_rules! int_divrem_guard {
                 // Nice base case to make it easy to const-fold away the other branch.
                 $rhs
             };
-            // Safety: $lhs and rhs are vectors
-            unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
+
+            // aarch64 div fails for arbitrary `v % 0`, mod fails when rhs is MIN, for non-powers-of-two
+            // these operations aren't vectorized on aarch64 anyway
+            #[cfg(target_arch = "aarch64")]
+            {
+                let mut out = Simd::splat(0 as _);
+                for i in 0..Self::LEN {
+                    out[i] = $lhs[i] $op rhs[i];
+                }
+                out
+            }
+
+            #[cfg(not(target_arch = "aarch64"))]
+            {
+                // Safety: $lhs and rhs are vectors
+                unsafe { core::intrinsics::simd::$simd_call($lhs, rhs) }
+            }
         }
     };
 }
@@ -205,14 +220,14 @@ for_base_ops! {
     impl Div::div {
         int_divrem_guard {
             const PANIC_ZERO: &'static str = "attempt to divide by zero";
-            simd_div
+            simd_div, /
         }
     }
 
     impl Rem::rem {
         int_divrem_guard {
             const PANIC_ZERO: &'static str = "attempt to calculate the remainder with a divisor of zero";
-            simd_rem
+            simd_rem, %
         }
     }
 

diff --git a/crates/core_simd/src/simd/num/float.rs b/crates/core_simd/src/simd/num/float.rs
@@ -255,13 +255,41 @@ macro_rules! impl_trait {
             type Bits = Simd<$bits_ty, N>;
             type Cast<T: SimdElement> = Simd<T, N>;
 
+            #[cfg(not(target_arch = "aarch64"))]
             #[inline]
             fn cast<T: SimdCast>(self) -> Self::Cast<T>
             {
                 // Safety: supported types are guaranteed by SimdCast
                 unsafe { core::intrinsics::simd::simd_as(self) }
             }
 
+            // https://github.com/llvm/llvm-project/issues/94694
+            #[cfg(target_arch = "aarch64")]
+            #[inline]
+            fn cast<T: SimdCast>(self) -> Self::Cast<T>
+            {
+                const { assert!(N <= 64) };
+                if N <= 2 || N == 4 || N == 8 || N == 16 || N == 32 || N == 64 {
+                    // Safety: supported types are guaranteed by SimdCast
+                    unsafe { core::intrinsics::simd::simd_as(self) }
+                } else if N < 4 {
+                    let x = self.resize::<4>(Default::default()).cast();
+                    x.resize::<N>(x[0])
+                } else if N < 8 {
+                    let x = self.resize::<8>(Default::default()).cast();
+                    x.resize::<N>(x[0])
+                } else if N < 16 {
+                    let x = self.resize::<16>(Default::default()).cast();
+                    x.resize::<N>(x[0])
+                } else if N < 32 {
+                    let x = self.resize::<32>(Default::default()).cast();
+                    x.resize::<N>(x[0])
+                } else {
+                    let x = self.resize::<64>(Default::default()).cast();
+                    x.resize::<N>(x[0])
+                }
+            }
+
             #[inline]
             #[cfg_attr(miri, track_caller)] // even without panics, this helps for Miri backtraces
             unsafe fn to_int_unchecked<I: SimdCast>(self) -> Self::Cast<I>

diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
@@ -99,7 +99,7 @@ use crate::simd::{
 // directly constructing an instance of the type (i.e. `let vector = Simd(array)`) should be
 // avoided, as it will likely become illegal on `#[repr(simd)]` structs in the future. It also
 // causes rustc to emit illegal LLVM IR in some cases.
-#[repr(simd)]
+#[repr(simd, packed)]
 pub struct Simd<T, const N: usize>([T; N])
 where
     LaneCount<N>: SupportedLaneCount,

diff --git a/crates/core_simd/tests/layout.rs b/crates/core_simd/tests/layout.rs
@@ -0,0 +1,35 @@
+#![feature(portable_simd)]
+
+macro_rules! layout_tests {
+    { $($mod:ident, $ty:ty,)* } => {
+        $(
+        mod $mod {
+            test_helpers::test_lanes! {
+                fn no_padding<const LANES: usize>() {
+                    assert_eq!(
+                        core::mem::size_of::<core_simd::simd::Simd::<$ty, LANES>>(),
+                        core::mem::size_of::<[$ty; LANES]>(),
+                    );
+                }
+            }
+        }
+        )*
+    }
+}
+
+layout_tests! {
+    i8, i8,
+    i16, i16,
+    i32, i32,
+    i64, i64,
+    isize, isize,
+    u8, u8,
+    u16, u16,
+    u32, u32,
+    u64, u64,
+    usize, usize,
+    f32, f32,
+    f64, f64,
+    mut_ptr, *mut (),
+    const_ptr, *const (),
+}
diff --git a/crates/core_simd/tests/masks.rs b/crates/core_simd/tests/masks.rs
@@ -99,7 +99,6 @@ macro_rules! test_mask_api {
                 assert_eq!(Mask::<$type, 2>::from_bitmask(bitmask), mask);
             }
 
-            #[cfg(feature = "all_lane_counts")]
             #[test]
             fn roundtrip_bitmask_conversion_odd() {
                 let values = [

diff --git a/crates/test_helpers/Cargo.toml b/crates/test_helpers/Cargo.toml
@@ -6,6 +6,3 @@ publish = false
 
 [dependencies]
 proptest = { version = "0.10", default-features = false, features = ["alloc"] }
-
-[features]
-all_lane_counts = []
diff --git a/crates/test_helpers/src/lib.rs b/crates/test_helpers/src/lib.rs
@@ -539,39 +539,27 @@ macro_rules! test_lanes {
                     #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
                     lanes_1 1;
                     lanes_2 2;
-                );
-
-                #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
-                $crate::test_lanes_helper!(
-                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
-                    lanes_4 4;
-                    lanes_8 8;
-                    lanes_16 16;
-                    lanes_32 32;
-                    lanes_64 64;
-                );
-
-                #[cfg(feature = "all_lane_counts")]
-                $crate::test_lanes_helper!(
-                    // test one non-power-of-2 length on miri
-                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
                     lanes_3 3;
+
+                    lanes_6 6;
                 );
 
-                #[cfg(feature = "all_lane_counts")]
                 #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
                 $crate::test_lanes_helper!(
                     #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test::wasm_bindgen_test)];
+                    lanes_4 4;
                     lanes_5 5;
-                    lanes_6 6;
+
                     lanes_7 7;
+                    lanes_8 8;
                     lanes_9 9;
                     lanes_10 10;
                     lanes_11 11;
                     lanes_12 12;
                     lanes_13 13;
                     lanes_14 14;
                     lanes_15 15;
+                    lanes_16 16;
                     lanes_17 17;
                     lanes_18 18;
                     lanes_19 19;
@@ -587,6 +575,7 @@ macro_rules! test_lanes {
                     lanes_29 29;
                     lanes_30 30;
                     lanes_31 31;
+                    lanes_32 32;
                     lanes_33 33;
                     lanes_34 34;
                     lanes_35 35;
@@ -618,6 +607,7 @@ macro_rules! test_lanes {
                     lanes_61 61;
                     lanes_62 62;
                     lanes_63 63;
+                    lanes_64 64;
                 );
             }
         )*
@@ -639,43 +629,32 @@ macro_rules! test_lanes_panic {
                     core_simd::simd::LaneCount<$lanes>: core_simd::simd::SupportedLaneCount,
                 $body
 
+                // test some odd and even non-power-of-2 lengths on miri
                 $crate::test_lanes_helper!(
                     #[should_panic];
                     lanes_1 1;
                     lanes_2 2;
-                    lanes_4 4;
-                );
-
-                #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
-                $crate::test_lanes_helper!(
-                    #[should_panic];
-                    lanes_8 8;
-                    lanes_16 16;
-                    lanes_32 32;
-                    lanes_64 64;
-                );
-
-                #[cfg(feature = "all_lane_counts")]
-                $crate::test_lanes_helper!(
-                    // test some odd and even non-power-of-2 lengths on miri
-                    #[should_panic];
                     lanes_3 3;
-                    lanes_5 5;
+
                     lanes_6 6;
                 );
 
-                #[cfg(feature = "all_lane_counts")]
                 #[cfg(not(miri))] // Miri intrinsic implementations are uniform and larger tests are sloooow
                 $crate::test_lanes_helper!(
                     #[should_panic];
+                    lanes_4 4;
+                    lanes_5 5;
+
                     lanes_7 7;
+                    lanes_8 8;
                     lanes_9 9;
                     lanes_10 10;
                     lanes_11 11;
                     lanes_12 12;
                     lanes_13 13;
                     lanes_14 14;
                     lanes_15 15;
+                    lanes_16 16;
                     lanes_17 17;
                     lanes_18 18;
                     lanes_19 19;
@@ -691,6 +670,7 @@ macro_rules! test_lanes_panic {
                     lanes_29 29;
                     lanes_30 30;
                     lanes_31 31;
+                    lanes_32 32;
                     lanes_33 33;
                     lanes_34 34;
                     lanes_35 35;
@@ -722,6 +702,7 @@ macro_rules! test_lanes_panic {
                     lanes_61 61;
                     lanes_62 62;
                     lanes_63 63;
+                    lanes_64 64;
                 );
             }
         )*