hermit-os · mrm-develop · Apr 8, 2025 · Apr 8, 2025 · Apr 14, 2025 · Apr 14, 2025
diff --git a/src/arch/riscv64/kernel/mod.rs b/src/arch/riscv64/kernel/mod.rs
@@ -184,9 +184,12 @@ fn finish_processor_init() {
 pub fn boot_next_processor() {
 	let new_hart_mask = HART_MASK.load(Ordering::Relaxed);
 
+	debug!("Current HART_MASK: 0x{new_hart_mask:x}");
 	let next_hart_index = lsb(new_hart_mask);
 
 	if let Some(next_hart_id) = next_hart_index {
+		debug!("Preparing to start HART {next_hart_id}");
+
 		{
 			let stack = physicalmem::allocate(KERNEL_STACK_SIZE)
 				.expect("Failed to allocate boot stack for new core");
@@ -199,10 +202,8 @@ pub fn boot_next_processor() {
 			next_hart_id
 		);
 
-		// TODO: Old: Changing cpu_online will cause uhyve to start the next processor
 		CPU_ONLINE.fetch_add(1, Ordering::Release);
 
-		//When running bare-metal/QEMU we use the firmware to start the next hart
 		if !env::is_uhyve() {
 			sbi_rt::hart_start(next_hart_id as usize, start::_start as usize, 0).unwrap();
 		}

diff --git a/src/arch/riscv64/kernel/scheduler.rs b/src/arch/riscv64/kernel/scheduler.rs
@@ -281,7 +281,7 @@ impl TaskTLS {
 		let tls_start = VirtAddr::new(tls_info.start);
 		// Yes, it does, so we have to allocate TLS memory.
 		// Allocate enough space for the given size and one more variable of type usize, which holds the tls_pointer.
-		let tls_allocation_size = tls_size.align_up(32usize); // + mem::size_of::<usize>();
+		let tls_allocation_size = tls_size.align_up(tls_info.align as usize);
 		// We allocate in 128 byte granularity (= cache line size) to avoid false sharing
 		let memory_size = tls_allocation_size.align_up(128usize);
 		let layout =
@@ -311,7 +311,7 @@ impl TaskTLS {
 		}
 
 		debug!(
-			"Set up TLS at 0x{tls_pointer:x}, tdata_size 0x{tdata_size:x}, tls_size 0x{tls_size:x}"
+			"Set up TLS at {tls_pointer:#x}, tdata_size {tdata_size:#x}, tls_size {tls_size:#x}"
 		);
 
 		Some(Box::new(Self {

diff --git a/src/arch/riscv64/kernel/start.rs b/src/arch/riscv64/kernel/start.rs
@@ -1,17 +1,40 @@
-use core::arch::naked_asm;
-use core::sync::atomic::Ordering;
+use core::arch::{asm, naked_asm};
+use core::sync::atomic::{AtomicBool, AtomicU64, Ordering, fence};
 
 use fdt::Fdt;
 use hermit_entry::Entry;
 use hermit_entry::boot_info::RawBootInfo;
 
 use super::{CPU_ONLINE, CURRENT_BOOT_ID, HART_MASK, NUM_CPUS, get_dtb_ptr};
-use crate::arch::riscv64::kernel::CURRENT_STACK_ADDRESS;
-#[cfg(not(feature = "smp"))]
-use crate::arch::riscv64::kernel::processor;
+use crate::arch::riscv64::kernel::{CURRENT_STACK_ADDRESS, processor};
 use crate::{KERNEL_STACK_SIZE, env};
 
-//static mut BOOT_STACK: [u8; KERNEL_STACK_SIZE] = [0; KERNEL_STACK_SIZE];
+const MAX_CORES: usize = 32;
+
+// Cache-line aligned CPU-local data
+#[repr(align(64))]
+struct PerCpuData {
+	is_initialized: AtomicBool,
+	local_counter: AtomicU64,
+	#[allow(dead_code)]
+	padding: [u8; 48], // Fill to full cache line
+}
+
+impl PerCpuData {
+	const fn new() -> Self {
+		Self {
+			is_initialized: AtomicBool::new(false),
+			local_counter: AtomicU64::new(0),
+			padding: [0; 48],
+		}
+	}
+}
+
+#[allow(clippy::declare_interior_mutable_const)]
+static CPU_DATA: [PerCpuData; MAX_CORES] = {
+	const CPU_LOCAL: PerCpuData = PerCpuData::new();
+	[CPU_LOCAL; MAX_CORES]
+};
 
 /// Entrypoint - Initialize Stack pointer and Exception Table
 #[unsafe(no_mangle)]
@@ -47,24 +70,59 @@ pub unsafe extern "C" fn _start(hart_id: usize, boot_info: Option<&'static RawBo
 }
 
 unsafe extern "C" fn pre_init(hart_id: usize, boot_info: Option<&'static RawBootInfo>) -> ! {
-	CURRENT_BOOT_ID.store(hart_id as u32, Ordering::Relaxed);
+	// Sanity check: validate hart_id against HART_MASK
+	if CPU_ONLINE.load(Ordering::Acquire) > 0 {
+		// Faster check for Secondary-HARTs
+		if (HART_MASK.load(Ordering::Relaxed) & (1 << hart_id)) == 0 {
+			error!("Invalid hart ID: {hart_id}");
+			processor::halt();
+		}
+	}
+
+	// Memory Fence before ID storage
+	fence(Ordering::Release);
+	CURRENT_BOOT_ID.store(hart_id as u32, Ordering::Release);
 
 	if CPU_ONLINE.load(Ordering::Acquire) == 0 {
-		unsafe {
-			env::set_boot_info(*boot_info.unwrap());
-			let fdt = Fdt::from_ptr(get_dtb_ptr()).expect("FDT is invalid");
-			// Init HART_MASK
-			let mut hart_mask = 0;
-			for cpu in fdt.cpus() {
-				let hart_id = cpu.property("reg").unwrap().as_usize().unwrap();
-				let status = cpu.property("status").unwrap().as_str().unwrap();
-
-				if status != "disabled\u{0}" {
-					hart_mask |= 1 << hart_id;
+		// Boot CPU Initialization
+		env::set_boot_info(*boot_info.unwrap());
+		let fdt = unsafe { Fdt::from_ptr(get_dtb_ptr()) }.expect("FDT is invalid");
+
+		// Build HART_MASK using readable conditional checks
+		let mut hart_mask = 0u64;
+		for cpu in fdt.cpus() {
+			if let Some(cpu_id) = cpu.property("reg").and_then(|p| p.as_usize()) {
+				if cpu
+					.property("status")
+					.and_then(|p| p.as_str())
+					.is_some_and(|s| s != "disabled\u{0}")
+				{
+					hart_mask |= 1 << cpu_id;
 				}
 			}
-			NUM_CPUS.store(fdt.cpus().count().try_into().unwrap(), Ordering::Relaxed);
-			HART_MASK.store(hart_mask, Ordering::Relaxed);
+		}
+
+		NUM_CPUS.store(fdt.cpus().count().try_into().unwrap(), Ordering::Release);
+
+		// Memory Fence before HART_MASK update
+		fence(Ordering::Release);
+		HART_MASK.store(hart_mask, Ordering::Release);
+
+		CPU_DATA[hart_id]
+			.is_initialized
+			.store(true, Ordering::Release);
+		CPU_DATA[hart_id].local_counter.store(1, Ordering::Release);
+
+		// Initialize TLS for boot core:
+		if let Some(tls_info) = env::boot_info().load_info.tls_info {
+			// Load the value into 'tp' using the mv instruction:
+			unsafe {
+				asm!(
+					"mv tp, {val}",
+					val = in(reg) tls_info.start as usize,
+					options(nostack, nomem)
+				);
+			}
 		}
 		crate::boot_processor_main()
 	} else {
@@ -76,6 +134,16 @@ unsafe extern "C" fn pre_init(hart_id: usize, boot_info: Option<&'static RawBoot
 			}
 		}
 		#[cfg(feature = "smp")]
-		crate::application_processor_main();
+		{
+			// Optimized Secondary-HART initialization
+			fence(Ordering::Acquire);
+			CPU_DATA[hart_id]
+				.is_initialized
+				.store(true, Ordering::Release);
+			CPU_DATA[hart_id]
+				.local_counter
+				.fetch_add(1, Ordering::Relaxed);
+			crate::application_processor_main()
+		}
 	}
 }