Skip to content

Commit

Permalink
Optimize non-relaxed load/store on pre-v6 ARM Linux/Android
Browse files Browse the repository at this point in the history
  • Loading branch information
taiki-e committed Sep 24, 2023
1 parent 1748268 commit 43af507
Show file tree
Hide file tree
Showing 7 changed files with 162 additions and 12 deletions.
3 changes: 3 additions & 0 deletions bench/benches/bench.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ mod arch;
#[allow(dead_code, unused_imports)]
#[path = "../../src/imp/arm_linux.rs"]
mod arch;
#[allow(dead_code, unused_imports)]
#[path = "../../src/imp/mod.rs"]
mod imp;
#[cfg(any(target_arch = "x86_64", all(target_arch = "aarch64", target_endian = "little")))]
#[allow(dead_code, unused_imports)]
#[path = "../../src/imp/atomic128/intrinsics.rs"]
Expand Down
6 changes: 2 additions & 4 deletions bench/benches/imp/spinlock_fallback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,10 @@
//
// This module is currently only enabled on benchmark.

use core::{
cell::UnsafeCell,
sync::atomic::{AtomicUsize, Ordering},
};
use core::{cell::UnsafeCell, sync::atomic::Ordering};

use super::fallback::utils::{Backoff, CachePadded};
use crate::imp::AtomicUsize;

struct Spinlock {
state: AtomicUsize,
Expand Down
4 changes: 3 additions & 1 deletion build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -270,12 +270,14 @@ fn main() {
}
}
target_feature_if("mclass", is_mclass, &version, None, true);
let mut v5te = known && subarch.starts_with("v5te");
let v6 = known
&& (subarch.starts_with("v6")
|| subarch.starts_with("v7")
|| subarch.starts_with("v8")
|| subarch.starts_with("v9"));
target_feature_if("v6", v6, &version, None, true);
v5te |= target_feature_if("v6", v6, &version, None, true);
target_feature_if("v5te", v5te, &version, None, true);
}
"powerpc64" => {
// For Miri and ThreadSanitizer.
Expand Down
124 changes: 122 additions & 2 deletions src/imp/arm_linux.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,27 @@
// be possible to omit the dynamic kernel version check if the std feature is enabled on Rust 1.64+.
// https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html

#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[path = "fallback/outline_atomics.rs"]
mod fallback;

use core::{arch::asm, cell::UnsafeCell, mem, sync::atomic::Ordering};
use core::{arch::asm, sync::atomic::Ordering};
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
use core::{cell::UnsafeCell, mem};

use crate::utils::{Pair, U64};
use super::core_atomic::{
AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicPtr, AtomicU16, AtomicU32, AtomicU8,
AtomicUsize,
};

// https://www.kernel.org/doc/Documentation/arm/kernel_user_helpers.txt
const KUSER_HELPER_VERSION: usize = 0xFFFF0FFC;
// __kuser_helper_version >= 3 (kernel version 2.6.15+)
const KUSER_MEMORY_BARRIER: usize = 0xFFFF0FA0;
// __kuser_helper_version >= 5 (kernel version 3.1+)
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
const KUSER_CMPXCHG64: usize = 0xFFFF0F60;

#[inline]
fn __kuser_helper_version() -> i32 {
use core::sync::atomic::AtomicI32;
Expand All @@ -39,6 +49,7 @@ fn __kuser_helper_version() -> i32 {
CACHE.store(v, Ordering::Relaxed);
v
}
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[inline]
fn has_kuser_cmpxchg64() -> bool {
// Note: detect_false cfg is intended to make it easy for portable-atomic developers to
Expand All @@ -49,6 +60,7 @@ fn has_kuser_cmpxchg64() -> bool {
}
__kuser_helper_version() >= 5
}
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[inline]
unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut u64) -> bool {
// SAFETY: the caller must uphold the safety contract.
Expand All @@ -59,7 +71,107 @@ unsafe fn __kuser_cmpxchg64(old_val: *const u64, new_val: *const u64, ptr: *mut
}
}

#[cfg(any(target_feature = "v5te", portable_atomic_target_feature = "v5te"))]
macro_rules! blx {
($addr:tt) => {
concat!("blx ", $addr)
};
}
#[cfg(not(any(target_feature = "v5te", portable_atomic_target_feature = "v5te")))]
macro_rules! blx {
($addr:tt) => {
concat!("mov lr, pc", "\n", "bx ", $addr)
};
}

macro_rules! atomic_load_store {
($([$($generics:tt)*])? $atomic_type:ident, $value_type:ty, $asm_suffix:tt) => {
impl $(<$($generics)*>)? $atomic_type $(<$($generics)*>)? {
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
#[inline]
pub(crate) fn load(&self, order: Ordering) -> $value_type {
crate::utils::assert_load_ordering(order);
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
match order {
Ordering::Relaxed => self.inner.load(Ordering::Relaxed),
// Acquire and SeqCst loads are equivalent.
Ordering::Acquire | Ordering::SeqCst => {
debug_assert!(__kuser_helper_version() >= 3);
let src = self.as_ptr();
let out;
asm!(
concat!("ldr", $asm_suffix, " {out}, [{src}]"),
blx!("{kuser_memory_barrier}"),
src = in(reg) src,
out = lateout(reg) out,
kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _,
out("lr") _,
options(nostack, preserves_flags),
);
out
}
_ => unreachable!("{:?}", order),
}
}
}
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
track_caller
)]
pub(crate) fn store(&self, val: $value_type, order: Ordering) {
crate::utils::assert_store_ordering(order);
let dst = self.as_ptr();
// SAFETY: any data races are prevented by atomic intrinsics and the raw
// pointer passed in is valid because we got it from a reference.
unsafe {
macro_rules! atomic_store_release {
($acquire:expr) => {{
debug_assert!(__kuser_helper_version() >= 3);
asm!(
blx!("{kuser_memory_barrier}"),
concat!("str", $asm_suffix, " {val}, [{dst}]"),
$acquire,
dst = in(reg) dst,
val = in(reg) val,
kuser_memory_barrier = inout(reg) KUSER_MEMORY_BARRIER => _,
out("lr") _,
options(nostack, preserves_flags),
)
}};
}
match order {
Ordering::Relaxed => self.inner.store(val, Ordering::Relaxed),
Ordering::Release => atomic_store_release!(""),
Ordering::SeqCst => atomic_store_release!(blx!("{kuser_memory_barrier}")),
_ => unreachable!("{:?}", order),
}
}
}
}
};
}

atomic_load_store!(AtomicI8, i8, "b");
atomic_load_store!(AtomicU8, u8, "b");
atomic_load_store!(AtomicI16, i16, "h");
atomic_load_store!(AtomicU16, u16, "h");
atomic_load_store!(AtomicI32, i32, "");
atomic_load_store!(AtomicU32, u32, "");
atomic_load_store!(AtomicIsize, isize, "");
atomic_load_store!(AtomicUsize, usize, "");
atomic_load_store!([T] AtomicPtr, *mut T, "");

#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
use crate::utils::{Pair, U64};

// 64-bit atomic load by two 32-bit atomic loads.
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[inline]
unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 {
// SAFETY: the caller must uphold the safety contract.
Expand All @@ -77,6 +189,7 @@ unsafe fn byte_wise_atomic_load(src: *const u64) -> u64 {
}
}

#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[inline(always)]
unsafe fn atomic_update_kuser_cmpxchg64<F>(dst: *mut u64, mut f: F) -> u64
where
Expand Down Expand Up @@ -108,6 +221,7 @@ macro_rules! atomic_with_ifunc {
unsafe fn $name:ident($($arg:tt)*) $(-> $ret_ty:ty)? { $($kuser_cmpxchg64_fn_body:tt)* }
fallback = $seqcst_fallback_fn:ident
) => {
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
#[inline]
unsafe fn $name($($arg)*) $(-> $ret_ty)? {
unsafe fn kuser_cmpxchg64_fn($($arg)*) $(-> $ret_ty)? {
Expand Down Expand Up @@ -252,6 +366,7 @@ atomic_with_ifunc! {
fallback = atomic_neg_seqcst
}

#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
macro_rules! atomic64 {
($atomic_type:ident, $int_type:ident, $atomic_max:ident, $atomic_min:ident) => {
#[repr(C, align(8))]
Expand Down Expand Up @@ -441,7 +556,9 @@ macro_rules! atomic64 {
};
}

#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
atomic64!(AtomicI64, i64, atomic_max, atomic_min);
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
atomic64!(AtomicU64, u64, atomic_umax, atomic_umin);

#[allow(
Expand All @@ -462,10 +579,13 @@ mod tests {
assert_eq!(version, unsafe { (KUSER_HELPER_VERSION as *const i32).read() });
}

#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
test_atomic_int!(i64);
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
test_atomic_int!(u64);

// load/store/swap implementation is not affected by signedness, so it is
// enough to test only unsigned types.
#[cfg(all(feature = "fallback", not(portable_atomic_no_outline_atomics)))]
stress_test!(u64);
}
32 changes: 30 additions & 2 deletions src/imp/core_atomic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ unsafe impl Sync for NoRefUnwindSafe {}

#[repr(transparent)]
pub(crate) struct AtomicPtr<T> {
inner: core::sync::atomic::AtomicPtr<T>,
pub(crate) inner: core::sync::atomic::AtomicPtr<T>,
// Prevent RefUnwindSafe from being propagated from the std atomic type.
_marker: PhantomData<NoRefUnwindSafe>,
}
Expand All @@ -47,6 +47,13 @@ impl<T> AtomicPtr<T> {
pub(crate) fn into_inner(self) -> *mut T {
self.inner.into_inner()
}
#[cfg(not(all(
not(any(miri, portable_atomic_sanitize_thread)),
not(portable_atomic_no_asm),
target_arch = "arm",
any(target_os = "linux", target_os = "android"),
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
)))]
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
Expand All @@ -56,6 +63,13 @@ impl<T> AtomicPtr<T> {
crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check)
self.inner.load(order)
}
#[cfg(not(all(
not(any(miri, portable_atomic_sanitize_thread)),
not(portable_atomic_no_asm),
target_arch = "arm",
any(target_os = "linux", target_os = "android"),
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
)))]
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
Expand Down Expand Up @@ -127,7 +141,7 @@ macro_rules! atomic_int {
($atomic_type:ident, $int_type:ident) => {
#[repr(transparent)]
pub(crate) struct $atomic_type {
inner: core::sync::atomic::$atomic_type,
pub(crate) inner: core::sync::atomic::$atomic_type,
// Prevent RefUnwindSafe from being propagated from the std atomic type.
_marker: PhantomData<NoRefUnwindSafe>,
}
Expand Down Expand Up @@ -174,6 +188,13 @@ macro_rules! atomic_int {
pub(crate) fn into_inner(self) -> $int_type {
self.inner.into_inner()
}
#[cfg(not(all(
not(any(miri, portable_atomic_sanitize_thread)),
not(portable_atomic_no_asm),
target_arch = "arm",
any(target_os = "linux", target_os = "android"),
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
)))]
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
Expand All @@ -183,6 +204,13 @@ macro_rules! atomic_int {
crate::utils::assert_load_ordering(order); // for track_caller (compiler can omit double check)
self.inner.load(order)
}
#[cfg(not(all(
not(any(miri, portable_atomic_sanitize_thread)),
not(portable_atomic_no_asm),
target_arch = "arm",
any(target_os = "linux", target_os = "android"),
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
)))]
#[inline]
#[cfg_attr(
any(all(debug_assertions, not(portable_atomic_no_track_caller)), miri),
Expand Down
3 changes: 2 additions & 1 deletion src/imp/fallback/seq_lock_wide.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,11 @@

use core::{
mem::ManuallyDrop,
sync::atomic::{self, AtomicUsize, Ordering},
sync::atomic::{self, Ordering},
};

use super::utils::Backoff;
use crate::imp::AtomicUsize;

// See mod.rs for details.
pub(super) type AtomicChunk = AtomicUsize;
Expand Down
2 changes: 0 additions & 2 deletions src/imp/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -113,15 +113,13 @@ mod powerpc64;
mod s390x;

// pre-v6 ARM Linux 64-bit atomics
#[cfg(feature = "fallback")]
// Miri and Sanitizer do not support inline assembly.
#[cfg(all(
target_arch = "arm",
not(any(miri, portable_atomic_sanitize_thread)),
not(portable_atomic_no_asm),
any(target_os = "linux", target_os = "android"),
not(any(target_feature = "v6", portable_atomic_target_feature = "v6")),
not(portable_atomic_no_outline_atomics),
))]
#[cfg_attr(portable_atomic_no_cfg_target_has_atomic, cfg(portable_atomic_no_atomic_64))]
#[cfg_attr(not(portable_atomic_no_cfg_target_has_atomic), cfg(not(target_has_atomic = "64")))]
Expand Down

0 comments on commit 43af507

Please sign in to comment.