From 961c49be8a1b0f4ba9987789664c78e38e0baf93 Mon Sep 17 00:00:00 2001 From: Havard Eidnes Date: Sun, 29 Sep 2024 15:50:59 +0000 Subject: [PATCH] Make this crate build on big-endian aarch64. As noted in https://github.com/rust-lang/stdarch/issues/1484, the NEON intrinsics are broken on big-endian aarch64. This is part of fixing rust to build for & on big-endian aarch64, following up https://github.com/rust-lang/rust/issues/129819. --- src/arch/aarch64/memchr.rs | 4 ++-- src/arch/aarch64/mod.rs | 2 ++ src/memchr.rs | 28 ++++++++++++++-------------- src/memmem/searcher.rs | 16 ++++++++-------- src/vector.rs | 2 +- 5 files changed, 27 insertions(+), 25 deletions(-) diff --git a/src/arch/aarch64/memchr.rs b/src/arch/aarch64/memchr.rs index e0053b2..4d96b16 100644 --- a/src/arch/aarch64/memchr.rs +++ b/src/arch/aarch64/memchr.rs @@ -8,7 +8,7 @@ available for `aarch64` targets.) macro_rules! defraw { ($ty:ident, $find:ident, $start:ident, $end:ident, $($needles:ident),+) => {{ - #[cfg(target_feature = "neon")] + #[cfg(all(target_feature = "neon", target_endian = "little"))] { use crate::arch::aarch64::neon::memchr::$ty; @@ -19,7 +19,7 @@ macro_rules! defraw { // enabled. $ty::new_unchecked($($needles),+).$find($start, $end) } - #[cfg(not(target_feature = "neon"))] + #[cfg(not(all(target_feature = "neon", target_endian = "little")))] { use crate::arch::all::memchr::$ty; diff --git a/src/arch/aarch64/mod.rs b/src/arch/aarch64/mod.rs index 7b32912..7262414 100644 --- a/src/arch/aarch64/mod.rs +++ b/src/arch/aarch64/mod.rs @@ -2,6 +2,8 @@ Vector algorithms for the `aarch64` target. */ +#[cfg(target_endian = "little")] pub mod neon; +#[cfg(target_endian = "little")] pub(crate) mod memchr; diff --git a/src/memchr.rs b/src/memchr.rs index 92a18bd..665adbf 100644 --- a/src/memchr.rs +++ b/src/memchr.rs @@ -518,14 +518,14 @@ unsafe fn memchr_raw( { crate::arch::wasm32::memchr::memchr_raw(needle, start, end) } - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", target_endian = "little"))] { crate::arch::aarch64::memchr::memchr_raw(needle, start, end) } #[cfg(not(any( target_arch = "x86_64", all(target_arch = "wasm32", target_feature = "simd128"), - target_arch = "aarch64" + all(target_arch = "aarch64", target_endian = "little") )))] { crate::arch::all::memchr::One::new(needle).find_raw(start, end) @@ -551,14 +551,14 @@ unsafe fn memrchr_raw( { crate::arch::wasm32::memchr::memrchr_raw(needle, start, end) } - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", target_endian = "little"))] { crate::arch::aarch64::memchr::memrchr_raw(needle, start, end) } #[cfg(not(any( target_arch = "x86_64", all(target_arch = "wasm32", target_feature = "simd128"), - target_arch = "aarch64" + all(target_arch = "aarch64", target_endian = "little") )))] { crate::arch::all::memchr::One::new(needle).rfind_raw(start, end) @@ -585,14 +585,14 @@ unsafe fn memchr2_raw( { crate::arch::wasm32::memchr::memchr2_raw(needle1, needle2, start, end) } - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", target_endian = "little"))] { crate::arch::aarch64::memchr::memchr2_raw(needle1, needle2, start, end) } #[cfg(not(any( target_arch = "x86_64", all(target_arch = "wasm32", target_feature = "simd128"), - target_arch = "aarch64" + all(target_arch = "aarch64", target_endian = "little") )))] { crate::arch::all::memchr::Two::new(needle1, needle2) @@ -620,7 +620,7 @@ unsafe fn memrchr2_raw( { crate::arch::wasm32::memchr::memrchr2_raw(needle1, needle2, start, end) } - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", target_endian = "little"))] { crate::arch::aarch64::memchr::memrchr2_raw( needle1, needle2, start, end, @@ -629,7 +629,7 @@ unsafe fn memrchr2_raw( #[cfg(not(any( target_arch = "x86_64", all(target_arch = "wasm32", target_feature = "simd128"), - target_arch = "aarch64" + all(target_arch = "aarch64", target_endian = "little") )))] { crate::arch::all::memchr::Two::new(needle1, needle2) @@ -662,7 +662,7 @@ unsafe fn memchr3_raw( needle1, needle2, needle3, start, end, ) } - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", target_endian = "little"))] { crate::arch::aarch64::memchr::memchr3_raw( needle1, needle2, needle3, start, end, @@ -671,7 +671,7 @@ unsafe fn memchr3_raw( #[cfg(not(any( target_arch = "x86_64", all(target_arch = "wasm32", target_feature = "simd128"), - target_arch = "aarch64" + all(target_arch = "aarch64", target_endian = "little") )))] { crate::arch::all::memchr::Three::new(needle1, needle2, needle3) @@ -704,7 +704,7 @@ unsafe fn memrchr3_raw( needle1, needle2, needle3, start, end, ) } - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", target_endian = "little"))] { crate::arch::aarch64::memchr::memrchr3_raw( needle1, needle2, needle3, start, end, @@ -713,7 +713,7 @@ unsafe fn memrchr3_raw( #[cfg(not(any( target_arch = "x86_64", all(target_arch = "wasm32", target_feature = "simd128"), - target_arch = "aarch64" + all(target_arch = "aarch64", target_endian = "little") )))] { crate::arch::all::memchr::Three::new(needle1, needle2, needle3) @@ -736,14 +736,14 @@ unsafe fn count_raw(needle: u8, start: *const u8, end: *const u8) -> usize { { crate::arch::wasm32::memchr::count_raw(needle, start, end) } - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", target_endian = "little"))] { crate::arch::aarch64::memchr::count_raw(needle, start, end) } #[cfg(not(any( target_arch = "x86_64", all(target_arch = "wasm32", target_feature = "simd128"), - target_arch = "aarch64" + all(target_arch = "aarch64", target_endian = "little") )))] { crate::arch::all::memchr::One::new(needle).count_raw(start, end) diff --git a/src/memmem/searcher.rs b/src/memmem/searcher.rs index 2a533e0..fbca707 100644 --- a/src/memmem/searcher.rs +++ b/src/memmem/searcher.rs @@ -3,7 +3,7 @@ use crate::arch::all::{ rabinkarp, twoway, }; -#[cfg(target_arch = "aarch64")] +#[cfg(all(target_arch = "aarch64", target_endian = "little"))] use crate::arch::aarch64::neon::packedpair as neon; #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] use crate::arch::wasm32::simd128::packedpair as simd128; @@ -129,7 +129,7 @@ impl Searcher { Searcher::twoway(needle, rabinkarp, prestrat) } } - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", target_endian = "little"))] { if let Some(pp) = neon::Finder::with_pair(needle, pair) { if do_packed_search(needle) { @@ -152,7 +152,7 @@ impl Searcher { #[cfg(not(any( all(target_arch = "x86_64", target_feature = "sse2"), all(target_arch = "wasm32", target_feature = "simd128"), - target_arch = "aarch64" + all(target_arch = "aarch64", target_endian = "little") )))] { if prefilter.is_none() { @@ -253,7 +253,7 @@ union SearcherKind { avx2: crate::arch::x86_64::avx2::packedpair::Finder, #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] simd128: crate::arch::wasm32::simd128::packedpair::Finder, - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", target_endian = "little"))] neon: crate::arch::aarch64::neon::packedpair::Finder, } @@ -421,7 +421,7 @@ unsafe fn searcher_kind_simd128( /// # Safety /// /// Callers must ensure that the `searcher.kind.neon` union field is set. -#[cfg(target_arch = "aarch64")] +#[cfg(all(target_arch = "aarch64", target_endian = "little"))] unsafe fn searcher_kind_neon( searcher: &Searcher, _prestate: &mut PrefilterState, @@ -686,7 +686,7 @@ impl Prefilter { } /// Return a prefilter using a aarch64 neon vector algorithm. - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", target_endian = "little"))] #[inline] fn neon(finder: neon::Finder, needle: &[u8]) -> Prefilter { trace!("building aarch64 neon prefilter"); @@ -763,7 +763,7 @@ union PrefilterKind { avx2: crate::arch::x86_64::avx2::packedpair::Finder, #[cfg(all(target_arch = "wasm32", target_feature = "simd128"))] simd128: crate::arch::wasm32::simd128::packedpair::Finder, - #[cfg(target_arch = "aarch64")] + #[cfg(all(target_arch = "aarch64", target_endian = "little"))] neon: crate::arch::aarch64::neon::packedpair::Finder, } @@ -852,7 +852,7 @@ unsafe fn prefilter_kind_simd128( /// # Safety /// /// Callers must ensure that the `strat.kind.neon` union field is set. -#[cfg(target_arch = "aarch64")] +#[cfg(all(target_arch = "aarch64", target_endian = "little"))] unsafe fn prefilter_kind_neon( strat: &Prefilter, haystack: &[u8], diff --git a/src/vector.rs b/src/vector.rs index d86fbca..9ab4db6 100644 --- a/src/vector.rs +++ b/src/vector.rs @@ -289,7 +289,7 @@ mod x86avx2 { } } -#[cfg(target_arch = "aarch64")] +#[cfg(all(target_arch = "aarch64", target_endian = "little"))] mod aarch64neon { use core::arch::aarch64::*;