From c1dcf43d4b9a9a399cb1da2e39b27d5e7e0cf69d Mon Sep 17 00:00:00 2001
From: Putta Khunchalee <ultimaweapon@outlook.com>
Date: Sat, 14 Sep 2024 23:34:35 +0700
Subject: [PATCH] Implements wrapper type to store per-CPU value (#983)

---
 src/obkrnl/src/config/aarch64.rs  |  4 ++-
 src/obkrnl/src/config/mod.rs      |  2 +-
 src/obkrnl/src/config/x86_64.rs   |  4 ++-
 src/obkrnl/src/context/aarch64.rs |  2 +-
 src/obkrnl/src/context/local.rs   | 42 +++++++++++++++++++++++++++++++
 src/obkrnl/src/context/mod.rs     | 24 +++++++++++-------
 src/obkrnl/src/context/x86_64.rs  | 22 ++++++----------
 src/obkrnl/src/malloc/mod.rs      |  2 ++
 src/obkrnl/src/malloc/stage2.rs   | 11 ++++----
 src/obkrnl/src/uma/mod.rs         | 27 +++++++-------------
 10 files changed, 90 insertions(+), 50 deletions(-)
 create mode 100644 src/obkrnl/src/context/local.rs
diff --git a/src/obkrnl/src/config/aarch64.rs b/src/obkrnl/src/config/aarch64.rs
index 82e6843f..3b0c07aa 100644
--- a/src/obkrnl/src/config/aarch64.rs
+++ b/src/obkrnl/src/config/aarch64.rs
@@ -1 +1,3 @@
-pub const PAGE_SIZE: usize = 0x4000;
+use core::num::NonZero;
+
+pub const PAGE_SIZE: NonZero<usize> = unsafe { NonZero::new_unchecked(0x4000) };
diff --git a/src/obkrnl/src/config/mod.rs b/src/obkrnl/src/config/mod.rs
index 2d2b01c7..6637df64 100644
--- a/src/obkrnl/src/config/mod.rs
+++ b/src/obkrnl/src/config/mod.rs
@@ -34,4 +34,4 @@ static mut BOOT_ENV: *const BootEnv = null();
 static mut CONFIG: *const Config = null();
 
 #[elf_note(section = ".note.obkrnl.page-size", name = "obkrnl", ty = 0)]
-static NOTE_PAGE_SIZE: [u8; size_of::<usize>()] = PAGE_SIZE.to_ne_bytes();
+static NOTE_PAGE_SIZE: [u8; size_of::<usize>()] = PAGE_SIZE.get().to_ne_bytes();
diff --git a/src/obkrnl/src/config/x86_64.rs b/src/obkrnl/src/config/x86_64.rs
index c5c560dc..a9264d88 100644
--- a/src/obkrnl/src/config/x86_64.rs
+++ b/src/obkrnl/src/config/x86_64.rs
@@ -1 +1,3 @@
-pub const PAGE_SIZE: usize = 0x1000;
+use core::num::NonZero;
+
+pub const PAGE_SIZE: NonZero<usize> = unsafe { NonZero::new_unchecked(0x1000) };
diff --git a/src/obkrnl/src/context/aarch64.rs b/src/obkrnl/src/context/aarch64.rs
index 153ff13b..86453cab 100644
--- a/src/obkrnl/src/context/aarch64.rs
+++ b/src/obkrnl/src/context/aarch64.rs
@@ -9,6 +9,6 @@ pub unsafe fn thread() -> *const Thread {
     todo!();
 }
 
-pub unsafe fn current() -> *const Context {
+pub unsafe fn cpu() -> usize {
     todo!();
 }
diff --git a/src/obkrnl/src/context/local.rs b/src/obkrnl/src/context/local.rs
new file mode 100644
index 00000000..b7e4babc
--- /dev/null
+++ b/src/obkrnl/src/context/local.rs
@@ -0,0 +1,42 @@
+use super::{Context, PinnedContext};
+use crate::config::config;
+use alloc::vec::Vec;
+use core::ops::Deref;
+
+/// Encapsulates per-CPU value.
+pub struct CpuLocal<T>(Vec<T>);
+
+impl<T> CpuLocal<T> {
+    pub fn new(mut f: impl FnMut(usize) -> T) -> Self {
+        let len = config().max_cpu.get();
+        let mut vec = Vec::with_capacity(len);
+
+        for i in 0..len {
+            vec.push(f(i));
+        }
+
+        Self(vec)
+    }
+
+    pub fn lock(&self) -> CpuLock<T> {
+        let pin = Context::pin();
+        let val = &self.0[unsafe { pin.cpu() }];
+
+        CpuLock { val, pin }
+    }
+}
+
+/// RAII struct to access per-CPU value in [`CpuLocal`].
+pub struct CpuLock<'a, T> {
+    val: &'a T,
+    #[allow(dead_code)]
+    pin: PinnedContext, // Must be dropped last.
+}
+
+impl<'a, T> Deref for CpuLock<'a, T> {
+    type Target = T;
+
+    fn deref(&self) -> &Self::Target {
+        self.val
+    }
+}
diff --git a/src/obkrnl/src/context/mod.rs b/src/obkrnl/src/context/mod.rs
index fd25e008..363af1b3 100644
--- a/src/obkrnl/src/context/mod.rs
+++ b/src/obkrnl/src/context/mod.rs
@@ -2,9 +2,12 @@ use crate::proc::Thread;
 use alloc::sync::Arc;
 use core::sync::atomic::{AtomicPtr, Ordering};
 
+pub use self::local::*;
+
 #[cfg_attr(target_arch = "aarch64", path = "aarch64.rs")]
 #[cfg_attr(target_arch = "x86_64", path = "x86_64.rs")]
 mod arch;
+mod local;
 
 /// Implementation of `pcpu` structure.
 ///
@@ -55,9 +58,7 @@ impl Context {
 
         unsafe { (*td).critical_sections().fetch_add(1, Ordering::Relaxed) };
 
-        // Once the thread is in a critical section it will never be switch a CPU so it is safe to
-        // keep a pointer to a context here.
-        PinnedContext(unsafe { self::arch::current() })
+        PinnedContext(td)
     }
 
     /// # Safety
@@ -76,15 +77,20 @@ impl Drop for Context {
     }
 }
 
-/// RAII struct to pin the current thread to current CPU.
+/// RAII struct to pin the current thread to a CPU.
 ///
 /// This struct must not implement [`Send`] and [`Sync`]. Currently it stored a pointer, which will
 /// make it `!Send` and `!Sync`.
-pub struct PinnedContext(*const Context);
+pub struct PinnedContext(*const Thread);
 
 impl PinnedContext {
-    pub fn cpu(&self) -> usize {
-        unsafe { (*self.0).cpu }
+    /// See [`CpuLocal`] for a safe alternative if you want to store per-CPU value.
+    ///
+    /// # Safety
+    /// Anything that derive from the returned value will invalid when this [`PinnedContext`]
+    /// dropped.
+    pub unsafe fn cpu(&self) -> usize {
+        self::arch::cpu()
     }
 }
 
@@ -92,9 +98,9 @@ impl Drop for PinnedContext {
     fn drop(&mut self) {
         // Relax ordering should be enough here since this decrement will be checked by the same CPU
         // when an interupt happens.
-        let td = unsafe { (*self.0).thread.load(Ordering::Relaxed) };
+        let td = unsafe { &*self.0 };
 
-        unsafe { (*td).critical_sections().fetch_sub(1, Ordering::Relaxed) };
+        unsafe { td.critical_sections().fetch_sub(1, Ordering::Relaxed) };
 
         // TODO: Implement td_owepreempt.
     }
diff --git a/src/obkrnl/src/context/x86_64.rs b/src/obkrnl/src/context/x86_64.rs
index a0c44902..983d99ae 100644
--- a/src/obkrnl/src/context/x86_64.rs
+++ b/src/obkrnl/src/context/x86_64.rs
@@ -51,23 +51,17 @@ pub unsafe fn thread() -> *const Thread {
     td
 }
 
-pub unsafe fn current() -> *const Context {
-    // Load current GS. Although the "rdmsr" does not read or write to any memory but it need to
-    // synchronize with a critical section.
-    let mut edx: u32;
-    let mut eax: u32;
+pub unsafe fn cpu() -> usize {
+    // SAFETY: This load load need to synchronize with a critical section. That mean we cannot use
+    // "pure" + "readonly" options here.
+    let mut cpu;
 
     asm!(
-        "rdmsr",
-        in("ecx") 0xc0000101u32,
-        out("edx") edx,
-        out("eax") eax,
+        "mov {out}, gs:[{off}]",
+        off = in(reg) offset_of!(Context, cpu),
+        out = out(reg) cpu,
         options(preserves_flags, nostack)
     );
 
-    // Combine EDX and EAX.
-    let edx = edx as usize;
-    let eax = eax as usize;
-
-    ((edx << 32) | eax) as *const Context
+    cpu
 }
diff --git a/src/obkrnl/src/malloc/mod.rs b/src/obkrnl/src/malloc/mod.rs
index 68a9e264..2cd16aad 100644
--- a/src/obkrnl/src/malloc/mod.rs
+++ b/src/obkrnl/src/malloc/mod.rs
@@ -56,6 +56,7 @@ impl Drop for KernelHeap {
 }
 
 unsafe impl GlobalAlloc for KernelHeap {
+    #[inline(never)]
     unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
         // SAFETY: GlobalAlloc::alloc required layout to be non-zero.
         self.stage2
@@ -65,6 +66,7 @@ unsafe impl GlobalAlloc for KernelHeap {
             .unwrap_or_else(|| self.stage1.alloc(layout))
     }
 
+    #[inline(never)]
     unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
         if self.stage1.is_owner(ptr) {
             // SAFETY: GlobalAlloc::dealloc required ptr to be the same one that returned from our
diff --git a/src/obkrnl/src/malloc/stage2.rs b/src/obkrnl/src/malloc/stage2.rs
index 24eb312f..f2c5609c 100644
--- a/src/obkrnl/src/malloc/stage2.rs
+++ b/src/obkrnl/src/malloc/stage2.rs
@@ -5,6 +5,7 @@ use alloc::string::ToString;
 use alloc::sync::Arc;
 use alloc::vec::Vec;
 use core::alloc::Layout;
+use core::num::NonZero;
 use core::sync::atomic::{AtomicU64, Ordering};
 
 /// Stage 2 kernel heap.
@@ -20,7 +21,7 @@ impl Stage2 {
     const KMEM_ZSHIFT: usize = 4;
     const KMEM_ZBASE: usize = 16;
     const KMEM_ZMASK: usize = Self::KMEM_ZBASE - 1;
-    const KMEM_ZSIZE: usize = PAGE_SIZE >> Self::KMEM_ZSHIFT;
+    const KMEM_ZSIZE: usize = PAGE_SIZE.get() >> Self::KMEM_ZSHIFT;
 
     /// See `kmeminit` on the PS4 for a reference.
     pub fn new() -> Self {
@@ -38,7 +39,7 @@ impl Stage2 {
 
             for i in Self::KMEM_ZSHIFT.. {
                 // Stop if size larger than page size.
-                let size = 1usize << i;
+                let size = NonZero::new(1usize << i).unwrap();
 
                 if size > PAGE_SIZE {
                     break;
@@ -47,7 +48,7 @@ impl Stage2 {
                 // Create zone.
                 let zone = Arc::new(UmaZone::new(size.to_string().into(), size, align - 1));
 
-                while last <= size {
+                while last <= size.get() {
                     zones.push(zone.clone());
                     last += Self::KMEM_ZBASE;
                 }
@@ -83,7 +84,7 @@ impl Stage2 {
         // Determine how to allocate.
         let size = layout.size();
 
-        if size <= PAGE_SIZE {
+        if size <= PAGE_SIZE.get() {
             // Get zone to allocate from.
             let align = layout.align().trailing_zeros() as usize;
             let size = if (size & Self::KMEM_ZMASK) != 0 {
@@ -100,7 +101,7 @@ impl Stage2 {
             // Update stats.
             let cx = Context::pin();
             let stats = &self.stats[cx.cpu()];
-            let size = if mem.is_null() { 0 } else { zone.size() };
+            let size = if mem.is_null() { 0 } else { zone.size().get() };
 
             if size != 0 {
                 stats
diff --git a/src/obkrnl/src/uma/mod.rs b/src/obkrnl/src/uma/mod.rs
index f4c2dd2e..82c8113b 100644
--- a/src/obkrnl/src/uma/mod.rs
+++ b/src/obkrnl/src/uma/mod.rs
@@ -1,37 +1,29 @@
 use self::cache::UmaCache;
-use crate::config::config;
-use crate::context::Context;
+use crate::context::{Context, CpuLocal};
 use alloc::borrow::Cow;
-use alloc::vec::Vec;
+use core::num::NonZero;
 
 mod bucket;
 mod cache;
 
 /// Implementation of `uma_zone` structure.
 pub struct UmaZone {
-    size: usize,           // uz_size
-    caches: Vec<UmaCache>, // uz_cpu
+    size: NonZero<usize>,       // uz_size
+    caches: CpuLocal<UmaCache>, // uz_cpu
 }
 
 impl UmaZone {
     /// See `uma_zcreate` on the PS4 for a reference.
-    pub fn new(_: Cow<'static, str>, size: usize, _: usize) -> Self {
+    pub fn new(_: Cow<'static, str>, size: NonZero<usize>, _: usize) -> Self {
         // Ths PS4 allocate a new uma_zone from masterzone_z but we don't have that. This method
         // basically an implementation of zone_ctor.
-        let len = config().max_cpu.get();
-        let mut caches = Vec::with_capacity(len);
-
-        for _ in 0..len {
-            caches.push(UmaCache::default());
-        }
-
         Self {
-            size, // TODO: Check if size is allowed to be zero. If not, change it to NonZero<usize>.
-            caches,
+            size,
+            caches: CpuLocal::new(|_| UmaCache::default()),
         }
     }
 
-    pub fn size(&self) -> usize {
+    pub fn size(&self) -> NonZero<usize> {
         self.size
     }
 
@@ -45,8 +37,7 @@ impl UmaZone {
         }
 
         // Try to allocate from per-CPU cache.
-        let cx = Context::pin();
-        let cache = &self.caches[cx.cpu()];
+        let cache = self.caches.lock();
         let bucket = cache.alloc();
 
         while let Some(bucket) = bucket {