From c1dcf43d4b9a9a399cb1da2e39b27d5e7e0cf69d Mon Sep 17 00:00:00 2001 From: Putta Khunchalee Date: Sat, 14 Sep 2024 23:34:35 +0700 Subject: [PATCH] Implements wrapper type to store per-CPU value (#983) --- src/obkrnl/src/config/aarch64.rs | 4 ++- src/obkrnl/src/config/mod.rs | 2 +- src/obkrnl/src/config/x86_64.rs | 4 ++- src/obkrnl/src/context/aarch64.rs | 2 +- src/obkrnl/src/context/local.rs | 42 +++++++++++++++++++++++++++++++ src/obkrnl/src/context/mod.rs | 24 +++++++++++------- src/obkrnl/src/context/x86_64.rs | 22 ++++++---------- src/obkrnl/src/malloc/mod.rs | 2 ++ src/obkrnl/src/malloc/stage2.rs | 11 ++++---- src/obkrnl/src/uma/mod.rs | 27 +++++++------------- 10 files changed, 90 insertions(+), 50 deletions(-) create mode 100644 src/obkrnl/src/context/local.rs diff --git a/src/obkrnl/src/config/aarch64.rs b/src/obkrnl/src/config/aarch64.rs index 82e6843f..3b0c07aa 100644 --- a/src/obkrnl/src/config/aarch64.rs +++ b/src/obkrnl/src/config/aarch64.rs @@ -1 +1,3 @@ -pub const PAGE_SIZE: usize = 0x4000; +use core::num::NonZero; + +pub const PAGE_SIZE: NonZero = unsafe { NonZero::new_unchecked(0x4000) }; diff --git a/src/obkrnl/src/config/mod.rs b/src/obkrnl/src/config/mod.rs index 2d2b01c7..6637df64 100644 --- a/src/obkrnl/src/config/mod.rs +++ b/src/obkrnl/src/config/mod.rs @@ -34,4 +34,4 @@ static mut BOOT_ENV: *const BootEnv = null(); static mut CONFIG: *const Config = null(); #[elf_note(section = ".note.obkrnl.page-size", name = "obkrnl", ty = 0)] -static NOTE_PAGE_SIZE: [u8; size_of::()] = PAGE_SIZE.to_ne_bytes(); +static NOTE_PAGE_SIZE: [u8; size_of::()] = PAGE_SIZE.get().to_ne_bytes(); diff --git a/src/obkrnl/src/config/x86_64.rs b/src/obkrnl/src/config/x86_64.rs index c5c560dc..a9264d88 100644 --- a/src/obkrnl/src/config/x86_64.rs +++ b/src/obkrnl/src/config/x86_64.rs @@ -1 +1,3 @@ -pub const PAGE_SIZE: usize = 0x1000; +use core::num::NonZero; + +pub const PAGE_SIZE: NonZero = unsafe { NonZero::new_unchecked(0x1000) }; diff --git a/src/obkrnl/src/context/aarch64.rs b/src/obkrnl/src/context/aarch64.rs index 153ff13b..86453cab 100644 --- a/src/obkrnl/src/context/aarch64.rs +++ b/src/obkrnl/src/context/aarch64.rs @@ -9,6 +9,6 @@ pub unsafe fn thread() -> *const Thread { todo!(); } -pub unsafe fn current() -> *const Context { +pub unsafe fn cpu() -> usize { todo!(); } diff --git a/src/obkrnl/src/context/local.rs b/src/obkrnl/src/context/local.rs new file mode 100644 index 00000000..b7e4babc --- /dev/null +++ b/src/obkrnl/src/context/local.rs @@ -0,0 +1,42 @@ +use super::{Context, PinnedContext}; +use crate::config::config; +use alloc::vec::Vec; +use core::ops::Deref; + +/// Encapsulates per-CPU value. +pub struct CpuLocal(Vec); + +impl CpuLocal { + pub fn new(mut f: impl FnMut(usize) -> T) -> Self { + let len = config().max_cpu.get(); + let mut vec = Vec::with_capacity(len); + + for i in 0..len { + vec.push(f(i)); + } + + Self(vec) + } + + pub fn lock(&self) -> CpuLock { + let pin = Context::pin(); + let val = &self.0[unsafe { pin.cpu() }]; + + CpuLock { val, pin } + } +} + +/// RAII struct to access per-CPU value in [`CpuLocal`]. +pub struct CpuLock<'a, T> { + val: &'a T, + #[allow(dead_code)] + pin: PinnedContext, // Must be dropped last. +} + +impl<'a, T> Deref for CpuLock<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.val + } +} diff --git a/src/obkrnl/src/context/mod.rs b/src/obkrnl/src/context/mod.rs index fd25e008..363af1b3 100644 --- a/src/obkrnl/src/context/mod.rs +++ b/src/obkrnl/src/context/mod.rs @@ -2,9 +2,12 @@ use crate::proc::Thread; use alloc::sync::Arc; use core::sync::atomic::{AtomicPtr, Ordering}; +pub use self::local::*; + #[cfg_attr(target_arch = "aarch64", path = "aarch64.rs")] #[cfg_attr(target_arch = "x86_64", path = "x86_64.rs")] mod arch; +mod local; /// Implementation of `pcpu` structure. /// @@ -55,9 +58,7 @@ impl Context { unsafe { (*td).critical_sections().fetch_add(1, Ordering::Relaxed) }; - // Once the thread is in a critical section it will never be switch a CPU so it is safe to - // keep a pointer to a context here. - PinnedContext(unsafe { self::arch::current() }) + PinnedContext(td) } /// # Safety @@ -76,15 +77,20 @@ impl Drop for Context { } } -/// RAII struct to pin the current thread to current CPU. +/// RAII struct to pin the current thread to a CPU. /// /// This struct must not implement [`Send`] and [`Sync`]. Currently it stored a pointer, which will /// make it `!Send` and `!Sync`. -pub struct PinnedContext(*const Context); +pub struct PinnedContext(*const Thread); impl PinnedContext { - pub fn cpu(&self) -> usize { - unsafe { (*self.0).cpu } + /// See [`CpuLocal`] for a safe alternative if you want to store per-CPU value. + /// + /// # Safety + /// Anything that derive from the returned value will invalid when this [`PinnedContext`] + /// dropped. + pub unsafe fn cpu(&self) -> usize { + self::arch::cpu() } } @@ -92,9 +98,9 @@ impl Drop for PinnedContext { fn drop(&mut self) { // Relax ordering should be enough here since this decrement will be checked by the same CPU // when an interupt happens. - let td = unsafe { (*self.0).thread.load(Ordering::Relaxed) }; + let td = unsafe { &*self.0 }; - unsafe { (*td).critical_sections().fetch_sub(1, Ordering::Relaxed) }; + unsafe { td.critical_sections().fetch_sub(1, Ordering::Relaxed) }; // TODO: Implement td_owepreempt. } diff --git a/src/obkrnl/src/context/x86_64.rs b/src/obkrnl/src/context/x86_64.rs index a0c44902..983d99ae 100644 --- a/src/obkrnl/src/context/x86_64.rs +++ b/src/obkrnl/src/context/x86_64.rs @@ -51,23 +51,17 @@ pub unsafe fn thread() -> *const Thread { td } -pub unsafe fn current() -> *const Context { - // Load current GS. Although the "rdmsr" does not read or write to any memory but it need to - // synchronize with a critical section. - let mut edx: u32; - let mut eax: u32; +pub unsafe fn cpu() -> usize { + // SAFETY: This load load need to synchronize with a critical section. That mean we cannot use + // "pure" + "readonly" options here. + let mut cpu; asm!( - "rdmsr", - in("ecx") 0xc0000101u32, - out("edx") edx, - out("eax") eax, + "mov {out}, gs:[{off}]", + off = in(reg) offset_of!(Context, cpu), + out = out(reg) cpu, options(preserves_flags, nostack) ); - // Combine EDX and EAX. - let edx = edx as usize; - let eax = eax as usize; - - ((edx << 32) | eax) as *const Context + cpu } diff --git a/src/obkrnl/src/malloc/mod.rs b/src/obkrnl/src/malloc/mod.rs index 68a9e264..2cd16aad 100644 --- a/src/obkrnl/src/malloc/mod.rs +++ b/src/obkrnl/src/malloc/mod.rs @@ -56,6 +56,7 @@ impl Drop for KernelHeap { } unsafe impl GlobalAlloc for KernelHeap { + #[inline(never)] unsafe fn alloc(&self, layout: Layout) -> *mut u8 { // SAFETY: GlobalAlloc::alloc required layout to be non-zero. self.stage2 @@ -65,6 +66,7 @@ unsafe impl GlobalAlloc for KernelHeap { .unwrap_or_else(|| self.stage1.alloc(layout)) } + #[inline(never)] unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { if self.stage1.is_owner(ptr) { // SAFETY: GlobalAlloc::dealloc required ptr to be the same one that returned from our diff --git a/src/obkrnl/src/malloc/stage2.rs b/src/obkrnl/src/malloc/stage2.rs index 24eb312f..f2c5609c 100644 --- a/src/obkrnl/src/malloc/stage2.rs +++ b/src/obkrnl/src/malloc/stage2.rs @@ -5,6 +5,7 @@ use alloc::string::ToString; use alloc::sync::Arc; use alloc::vec::Vec; use core::alloc::Layout; +use core::num::NonZero; use core::sync::atomic::{AtomicU64, Ordering}; /// Stage 2 kernel heap. @@ -20,7 +21,7 @@ impl Stage2 { const KMEM_ZSHIFT: usize = 4; const KMEM_ZBASE: usize = 16; const KMEM_ZMASK: usize = Self::KMEM_ZBASE - 1; - const KMEM_ZSIZE: usize = PAGE_SIZE >> Self::KMEM_ZSHIFT; + const KMEM_ZSIZE: usize = PAGE_SIZE.get() >> Self::KMEM_ZSHIFT; /// See `kmeminit` on the PS4 for a reference. pub fn new() -> Self { @@ -38,7 +39,7 @@ impl Stage2 { for i in Self::KMEM_ZSHIFT.. { // Stop if size larger than page size. - let size = 1usize << i; + let size = NonZero::new(1usize << i).unwrap(); if size > PAGE_SIZE { break; @@ -47,7 +48,7 @@ impl Stage2 { // Create zone. let zone = Arc::new(UmaZone::new(size.to_string().into(), size, align - 1)); - while last <= size { + while last <= size.get() { zones.push(zone.clone()); last += Self::KMEM_ZBASE; } @@ -83,7 +84,7 @@ impl Stage2 { // Determine how to allocate. let size = layout.size(); - if size <= PAGE_SIZE { + if size <= PAGE_SIZE.get() { // Get zone to allocate from. let align = layout.align().trailing_zeros() as usize; let size = if (size & Self::KMEM_ZMASK) != 0 { @@ -100,7 +101,7 @@ impl Stage2 { // Update stats. let cx = Context::pin(); let stats = &self.stats[cx.cpu()]; - let size = if mem.is_null() { 0 } else { zone.size() }; + let size = if mem.is_null() { 0 } else { zone.size().get() }; if size != 0 { stats diff --git a/src/obkrnl/src/uma/mod.rs b/src/obkrnl/src/uma/mod.rs index f4c2dd2e..82c8113b 100644 --- a/src/obkrnl/src/uma/mod.rs +++ b/src/obkrnl/src/uma/mod.rs @@ -1,37 +1,29 @@ use self::cache::UmaCache; -use crate::config::config; -use crate::context::Context; +use crate::context::{Context, CpuLocal}; use alloc::borrow::Cow; -use alloc::vec::Vec; +use core::num::NonZero; mod bucket; mod cache; /// Implementation of `uma_zone` structure. pub struct UmaZone { - size: usize, // uz_size - caches: Vec, // uz_cpu + size: NonZero, // uz_size + caches: CpuLocal, // uz_cpu } impl UmaZone { /// See `uma_zcreate` on the PS4 for a reference. - pub fn new(_: Cow<'static, str>, size: usize, _: usize) -> Self { + pub fn new(_: Cow<'static, str>, size: NonZero, _: usize) -> Self { // Ths PS4 allocate a new uma_zone from masterzone_z but we don't have that. This method // basically an implementation of zone_ctor. - let len = config().max_cpu.get(); - let mut caches = Vec::with_capacity(len); - - for _ in 0..len { - caches.push(UmaCache::default()); - } - Self { - size, // TODO: Check if size is allowed to be zero. If not, change it to NonZero. - caches, + size, + caches: CpuLocal::new(|_| UmaCache::default()), } } - pub fn size(&self) -> usize { + pub fn size(&self) -> NonZero { self.size } @@ -45,8 +37,7 @@ impl UmaZone { } // Try to allocate from per-CPU cache. - let cx = Context::pin(); - let cache = &self.caches[cx.cpu()]; + let cache = self.caches.lock(); let bucket = cache.alloc(); while let Some(bucket) = bucket {