-
Notifications
You must be signed in to change notification settings - Fork 102
feature(smp): improve hart booting, TLS setup and per-core stack initialization #1678
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
a2dfe1b
c067c15
ee94477
dc8e3c1
c2a4a71
a324844
0ecdac0
a3b7441
42a69a3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,40 @@ | ||
use core::arch::naked_asm; | ||
use core::sync::atomic::Ordering; | ||
use core::arch::{asm, naked_asm}; | ||
use core::sync::atomic::{AtomicBool, AtomicU64, Ordering, fence}; | ||
|
||
use fdt::Fdt; | ||
use hermit_entry::Entry; | ||
use hermit_entry::boot_info::RawBootInfo; | ||
|
||
use super::{CPU_ONLINE, CURRENT_BOOT_ID, HART_MASK, NUM_CPUS, get_dtb_ptr}; | ||
use crate::arch::riscv64::kernel::CURRENT_STACK_ADDRESS; | ||
#[cfg(not(feature = "smp"))] | ||
use crate::arch::riscv64::kernel::processor; | ||
use crate::arch::riscv64::kernel::{CURRENT_STACK_ADDRESS, processor}; | ||
use crate::{KERNEL_STACK_SIZE, env}; | ||
|
||
//static mut BOOT_STACK: [u8; KERNEL_STACK_SIZE] = [0; KERNEL_STACK_SIZE]; | ||
const MAX_CORES: usize = 32; | ||
|
||
// Cache-line aligned CPU-local data | ||
#[repr(align(64))] | ||
struct PerCpuData { | ||
is_initialized: AtomicBool, | ||
local_counter: AtomicU64, | ||
#[allow(dead_code)] | ||
padding: [u8; 48], // Fill to full cache line | ||
} | ||
|
||
impl PerCpuData { | ||
const fn new() -> Self { | ||
Self { | ||
is_initialized: AtomicBool::new(false), | ||
local_counter: AtomicU64::new(0), | ||
padding: [0; 48], | ||
} | ||
} | ||
} | ||
|
||
#[allow(clippy::declare_interior_mutable_const)] | ||
static CPU_DATA: [PerCpuData; MAX_CORES] = { | ||
const CPU_LOCAL: PerCpuData = PerCpuData::new(); | ||
[CPU_LOCAL; MAX_CORES] | ||
}; | ||
Comment on lines
+12
to
+37
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why would we need this? Can we remove this? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's working, but it's experimental. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I understand using hart-local data and aligning that to cache lines. But we already have Also, the members ( Is there any reason to keep this that I oversaw? 🤔 |
||
|
||
/// Entrypoint - Initialize Stack pointer and Exception Table | ||
#[unsafe(no_mangle)] | ||
|
@@ -47,24 +70,59 @@ pub unsafe extern "C" fn _start(hart_id: usize, boot_info: Option<&'static RawBo | |
} | ||
|
||
unsafe extern "C" fn pre_init(hart_id: usize, boot_info: Option<&'static RawBootInfo>) -> ! { | ||
CURRENT_BOOT_ID.store(hart_id as u32, Ordering::Relaxed); | ||
// Sanity check: validate hart_id against HART_MASK | ||
if CPU_ONLINE.load(Ordering::Acquire) > 0 { | ||
// Faster check for Secondary-HARTs | ||
if (HART_MASK.load(Ordering::Relaxed) & (1 << hart_id)) == 0 { | ||
error!("Invalid hart ID: {hart_id}"); | ||
processor::halt(); | ||
} | ||
} | ||
|
||
// Memory Fence before ID storage | ||
fence(Ordering::Release); | ||
CURRENT_BOOT_ID.store(hart_id as u32, Ordering::Release); | ||
Comment on lines
+82
to
+84
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The memory fence is not needed, right? Why did you change the atomic ordering? Relaxed ordering should be sufficient, since we don't use |
||
|
||
if CPU_ONLINE.load(Ordering::Acquire) == 0 { | ||
unsafe { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let's put the refactoring of the reduction of |
||
env::set_boot_info(*boot_info.unwrap()); | ||
let fdt = Fdt::from_ptr(get_dtb_ptr()).expect("FDT is invalid"); | ||
// Init HART_MASK | ||
let mut hart_mask = 0; | ||
for cpu in fdt.cpus() { | ||
let hart_id = cpu.property("reg").unwrap().as_usize().unwrap(); | ||
let status = cpu.property("status").unwrap().as_str().unwrap(); | ||
|
||
if status != "disabled\u{0}" { | ||
hart_mask |= 1 << hart_id; | ||
// Boot CPU Initialization | ||
env::set_boot_info(*boot_info.unwrap()); | ||
let fdt = unsafe { Fdt::from_ptr(get_dtb_ptr()) }.expect("FDT is invalid"); | ||
|
||
// Build HART_MASK using readable conditional checks | ||
let mut hart_mask = 0u64; | ||
for cpu in fdt.cpus() { | ||
if let Some(cpu_id) = cpu.property("reg").and_then(|p| p.as_usize()) { | ||
if cpu | ||
.property("status") | ||
.and_then(|p| p.as_str()) | ||
.is_some_and(|s| s != "disabled\u{0}") | ||
{ | ||
hart_mask |= 1 << cpu_id; | ||
} | ||
} | ||
NUM_CPUS.store(fdt.cpus().count().try_into().unwrap(), Ordering::Relaxed); | ||
HART_MASK.store(hart_mask, Ordering::Relaxed); | ||
} | ||
|
||
NUM_CPUS.store(fdt.cpus().count().try_into().unwrap(), Ordering::Release); | ||
|
||
// Memory Fence before HART_MASK update | ||
fence(Ordering::Release); | ||
HART_MASK.store(hart_mask, Ordering::Release); | ||
Comment on lines
+107
to
+109
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same question as https://github.com/hermit-os/kernel/pull/1678/files#r2120516736 here. |
||
|
||
CPU_DATA[hart_id] | ||
.is_initialized | ||
.store(true, Ordering::Release); | ||
CPU_DATA[hart_id].local_counter.store(1, Ordering::Release); | ||
|
||
// Initialize TLS for boot core: | ||
if let Some(tls_info) = env::boot_info().load_info.tls_info { | ||
// Load the value into 'tp' using the mv instruction: | ||
unsafe { | ||
asm!( | ||
"mv tp, {val}", | ||
val = in(reg) tls_info.start as usize, | ||
options(nostack, nomem) | ||
); | ||
} | ||
Comment on lines
+116
to
+125
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this needed? This is done in src/arch/riscv64/kernel/scheduler.rs#L274 and src/arch/riscv64/kernel/scheduler.rs#L404. |
||
} | ||
crate::boot_processor_main() | ||
} else { | ||
|
@@ -76,6 +134,16 @@ unsafe extern "C" fn pre_init(hart_id: usize, boot_info: Option<&'static RawBoot | |
} | ||
} | ||
#[cfg(feature = "smp")] | ||
crate::application_processor_main(); | ||
{ | ||
// Optimized Secondary-HART initialization | ||
fence(Ordering::Acquire); | ||
CPU_DATA[hart_id] | ||
.is_initialized | ||
.store(true, Ordering::Release); | ||
CPU_DATA[hart_id] | ||
.local_counter | ||
.fetch_add(1, Ordering::Relaxed); | ||
crate::application_processor_main() | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think these comments should be kept. The lower one is still true, and the upper one is still to-do, since Uhyve does not support RISC-V yet.