diff --git a/rp2040-hal/src/multicore.rs b/rp2040-hal/src/multicore.rs index d1b018402..46d30eec0 100644 --- a/rp2040-hal/src/multicore.rs +++ b/rp2040-hal/src/multicore.rs @@ -8,7 +8,12 @@ //! # Usage //! //! ```no_run -//! use rp2040_hal::{pac, gpio::Pins, sio::Sio, multicore::{Multicore, Stack}}; +//! use rp2040_hal::{ +//! gpio::Pins, +//! multicore::{Multicore, Stack}, +//! pac, +//! sio::Sio, +//! }; //! //! static mut CORE1_STACK: Stack<4096> = Stack::new(); //! @@ -17,7 +22,7 @@ //! } //! //! fn main() -> ! { -//! let mut pac = pac::Peripherals::take().unwrap(); +//! let mut pac = hal::pac::Peripherals::take().unwrap(); //! let mut sio = Sio::new(pac.SIO); //! // Other init code above this line //! let mut mc = Multicore::new(&mut pac.PSM, &mut pac.PPB, &mut sio.fifo); @@ -27,7 +32,6 @@ //! // The rest of your application below this line //! # loop {} //! } -//! //! ``` //! //! For inter-processor communications, see [`crate::sio::SioFifo`] and [`crate::sio::Spinlock0`] @@ -132,6 +136,17 @@ impl<'p> Multicore<'p> { } } +extern "C" { + fn _core1_trampoline() -> !; +} + +core::arch::global_asm!( + ".global _core1_trampoline", + ".thumb_func", + "_core1_trampoline:", + "pop {{r0, r1, pc}}", +); + /// A handle for controlling a logical core. pub struct Core<'p> { inner: Option<( @@ -166,12 +181,8 @@ impl<'p> Core<'p> { F: FnOnce() + Send + 'static, { if let Some((psm, ppb, fifo)) = self.inner.as_mut() { - // The first two ignored `u64` parameters are there to take up all of the registers, - // which means that the rest of the arguments are taken from the stack, - // where we're able to put them from core 0. + /// Called from the asm `_core1_trampoline` function. extern "C" fn core1_startup( - _: u64, - _: u64, entry: *mut ManuallyDrop, stack_limit: *mut usize, ) -> ! { @@ -192,7 +203,7 @@ impl<'p> Core<'p> { entry(); loop { - cortex_m::asm::wfe() + crate::arch::wfe() } } @@ -203,33 +214,50 @@ impl<'p> Core<'p> { // so wouldn't be zero cost. psm.frce_off().modify(|_, w| w.proc1().set_bit()); while !psm.frce_off().read().proc1().bit_is_set() { - cortex_m::asm::nop(); + crate::arch::nop(); } psm.frce_off().modify(|_, w| w.proc1().clear_bit()); // Set up the stack // AAPCS requires in 6.2.1.2 that the stack is 8bytes aligned., we may need to trim the - // array size to guaranty that the base of the stack (the end of the array) meets that requirement. + // array size to guarantee that the base of the stack (the end of the array) meets that requirement. // The start of the array does not need to be aligned. - let mut stack_ptr = stack.as_mut_ptr_range().end; - // on rp2040, usize are 4 bytes, so align_offset(8) on a *mut usize returns either 0 or 1. - let misalignment_offset = stack_ptr.align_offset(8); + let raw_stack_top = stack.as_mut_ptr_range().end; + + // on arm, usize are 4 bytes, so align_offset(8) on a *mut usize returns either 0 or 1. + let misalignment_offset = raw_stack_top.align_offset(8); + + let aligned_stack_top = unsafe { raw_stack_top.sub(misalignment_offset) }; // We don't want to drop this, since it's getting moved to the other core. let mut entry = ManuallyDrop::new(entry); - // Push the arguments to `core1_startup` onto the stack. + let mut stack_ptr = aligned_stack_top; + + // Push `core1_startup` and its the arguments to the stack. + // + // Our stack grows downwards. We want `entry` at the lowest address, + // which is the first to be popped. unsafe { - stack_ptr = stack_ptr.sub(misalignment_offset); + // Push extern "C" wrapper function. + // + // It will get the next two values as its args thanks to + // our trampoline. + // + // This ends up in pc. + stack_ptr = stack_ptr.sub(1); + stack_ptr.write(core1_startup:: as usize); // Push `stack_limit`. + // This ends up in r1. stack_ptr = stack_ptr.sub(1); - stack_ptr.cast::<*mut usize>().write(stack.as_mut_ptr()); + stack_ptr.write(stack.as_mut_ptr() as usize); // Push `entry`. + // This ends up in r0. stack_ptr = stack_ptr.sub(1); - stack_ptr.cast::<*mut ManuallyDrop>().write(&mut entry); + stack_ptr.write(&mut entry as *mut ManuallyDrop as usize); } // Make sure the compiler does not reorder the stack writes after to after the @@ -237,7 +265,7 @@ impl<'p> Core<'p> { // core. // // From the compiler perspective, this doesn't guarantee that the second core - // actually sees those writes. However, we know that the RP2040 doesn't have + // actually sees those writes. However, we know that the rp235x doesn't have // memory caches, and writes happen in-order. compiler_fence(Ordering::Release); @@ -250,8 +278,11 @@ impl<'p> Core<'p> { 0, 1, vector_table as usize, + // This points at where we put stuff on the stack. As the + // trampoline pops it off, the address will end up back at + // aligned_stack_top. stack_ptr as usize, - core1_startup:: as usize, + _core1_trampoline as usize, ]; let mut seq = 0; @@ -260,7 +291,7 @@ impl<'p> Core<'p> { let cmd = cmd_seq[seq] as u32; if cmd == 0 { fifo.drain(); - cortex_m::asm::sev(); + crate::arch::sev(); } fifo.write_blocking(cmd); let response = fifo.read_blocking(); diff --git a/rp235x-hal-examples/src/bin/multicore_polyblink.rs b/rp235x-hal-examples/src/bin/multicore_polyblink.rs index a8ccfe9a1..cc9ff25a4 100644 --- a/rp235x-hal-examples/src/bin/multicore_polyblink.rs +++ b/rp235x-hal-examples/src/bin/multicore_polyblink.rs @@ -1,29 +1,26 @@ //! # Multicore Blinking Example //! -//! This application blinks two LEDs on GPIOs 2 and 3 at different rates (3Hz -//! and 4Hz respectively.) +//! This application blinks two LEDs on GPIOs 2 and 25 at different rates (3 Hz +//! and 1 Hz respectively.) //! //! See the `Cargo.toml` file for Copyright and licence details. #![no_std] #![no_main] -use cortex_m::delay::Delay; - // Alias for our HAL crate use rp235x_hal as hal; -use hal::clocks::Clock; -use hal::gpio::Pins; -use hal::multicore::{Multicore, Stack}; -use hal::sio::Sio; - // Ensure we halt the program on panic (if we don't mention this crate it won't // be linked) use panic_halt as _; // Some things we need -use embedded_hal::digital::StatefulOutputPin; +use embedded_hal::delay::DelayNs; +use embedded_hal::digital::{OutputPin, StatefulOutputPin}; +use hal::gpio::Pins; +use hal::multicore::{Multicore, Stack}; +use hal::sio::Sio; /// Tell the Boot ROM about our application #[link_section = ".start_block"] @@ -37,16 +34,16 @@ const XTAL_FREQ_HZ: u32 = 12_000_000u32; /// The frequency at which core 0 will blink its LED (Hz). const CORE0_FREQ: u32 = 3; /// The frequency at which core 1 will blink its LED (Hz). -const CORE1_FREQ: u32 = 4; +const CORE1_FREQ: u32 = 1; /// The delay between each toggle of core 0's LED (us). -const CORE0_DELAY: u32 = 1_000_000 / CORE0_FREQ; +const CORE0_DELAY_US: u32 = 1_000_000 / CORE0_FREQ; /// The delay between each toggle of core 1's LED (us). -const CORE1_DELAY: u32 = 1_000_000 / CORE1_FREQ; +const CORE1_DELAY_US: u32 = 1_000_000 / CORE1_FREQ; /// Stack for core 1 /// /// Core 0 gets its stack via the normal route - any memory not used by static -/// values is reserved for stack and initialised by cortex-m-rt. +/// values is reserved for stack and initialised by cortex-m-rt / riscv-rt. /// To get the same for Core 1, we would need to compile everything seperately /// and modify the linker file for both programs, and that's quite annoying. /// So instead, core1.spawn takes a [usize] which gets used for the stack. @@ -57,13 +54,12 @@ static mut CORE1_STACK: Stack<4096> = Stack::new(); /// Entry point to our bare-metal application. /// -/// The `#[hal::entry]` macro ensures the Cortex-M start-up code calls this function +/// The `#[hal::entry]` macro ensures the start-up code calls this function /// as soon as all global variables and the spinlock are initialised. #[hal::entry] fn main() -> ! { - // Grab our singleton objects + // Grab our singleton object let mut pac = hal::pac::Peripherals::take().unwrap(); - let core = cortex_m::Peripherals::take().unwrap(); // Set up the watchdog driver - needed by the clock setup code let mut watchdog = hal::watchdog::Watchdog::new(pac.WATCHDOG); @@ -88,12 +84,10 @@ fn main() -> ! { sio.gpio_bank0, &mut pac.RESETS, ); - let mut led1 = pins.gpio2.into_push_pull_output(); - let mut led2 = pins.gpio3.into_push_pull_output(); - - // Set up the delay for the first core. - let sys_freq = clocks.system_clock.freq().to_Hz(); - let mut delay = Delay::new(core.SYST, sys_freq); + let mut led0 = pins.gpio2.into_push_pull_output(); + let mut led1 = pins.gpio25.into_push_pull_output(); + let mut timer0 = hal::Timer::new_timer0(pac.TIMER0, &mut pac.RESETS, &clocks); + let mut timer1 = hal::Timer::new_timer1(pac.TIMER1, &mut pac.RESETS, &clocks); // Start up the second core to blink the second LED let mut mc = Multicore::new(&mut pac.PSM, &mut pac.PPB, &mut sio.fifo); @@ -101,24 +95,20 @@ fn main() -> ! { let core1 = &mut cores[1]; core1 .spawn(unsafe { &mut CORE1_STACK.mem }, move || { - // Get the second core's copy of the `CorePeripherals`, which are per-core. - // Unfortunately, `cortex-m` doesn't support this properly right now, - // so we have to use `steal`. - let core = unsafe { cortex_m::Peripherals::steal() }; - // Set up the delay for the second core. - let mut delay = Delay::new(core.SYST, sys_freq); - // Blink the second LED. + // Blink the second LED using Timer 1 + led1.set_high().unwrap(); loop { - led2.toggle().unwrap(); - delay.delay_us(CORE1_DELAY) + timer1.delay_us(CORE1_DELAY_US); + led1.toggle().unwrap(); } }) .unwrap(); // Blink the first LED. + led0.set_high().unwrap(); loop { - led1.toggle().unwrap(); - delay.delay_us(CORE0_DELAY) + timer0.delay_us(CORE0_DELAY_US); + led0.toggle().unwrap(); } } diff --git a/rp235x-hal/src/lib.rs b/rp235x-hal/src/lib.rs index a98908e5a..264b88365 100644 --- a/rp235x-hal/src/lib.rs +++ b/rp235x-hal/src/lib.rs @@ -50,7 +50,6 @@ pub mod dma; pub mod gpio; pub mod i2c; pub mod lposc; -#[cfg(all(target_arch = "arm", target_os = "none"))] pub mod multicore; pub mod otp; pub mod pio; diff --git a/rp235x-hal/src/multicore.rs b/rp235x-hal/src/multicore.rs index db49a6a33..ff541b78d 100644 --- a/rp235x-hal/src/multicore.rs +++ b/rp235x-hal/src/multicore.rs @@ -57,14 +57,13 @@ pub enum Error { #[inline(always)] fn install_stack_guard(_stack_limit: *mut usize) { - // TBD Cortex-M33 MPU stack guard stuff. - // See the RP2040 code. + // TBD Cortex-M33 / Hazard3 MPU stack guard stuff. See the Pico SDK for + // ideas as to what to do here. } #[inline(always)] fn core1_setup(stack_limit: *mut usize) { install_stack_guard(stack_limit); - // TODO: irq priorities } /// Multicore execution management. @@ -115,6 +114,38 @@ impl<'p> Multicore<'p> { } } +extern "C" { + fn _core1_trampoline() -> !; +} + +#[cfg(target_arch = "arm")] +core::arch::global_asm!( + ".global _core1_trampoline", + ".thumb_func", + "_core1_trampoline:", + "pop {{r0, r1, pc}}", +); + +#[cfg(not(target_arch = "arm"))] +core::arch::global_asm!( + ".align 4", + ".section .text, \"ax\"", + ".global _core1_trampoline", + "_core1_trampoline:", + // load the pointer to the closure + "lw a0, 0(sp)", + // load the stack limit + "lw a1, 4(sp)", + // load the pointer to core1_startup + "lw a2, 8(sp)", + // load and set the GP + "lw gp, 12(sp)", + // move the stack pointer back 4 words + "addi sp, sp, 16", + // core1_startup(entry, stack_limit) + "jr a2", +); + /// A handle for controlling a logical core. pub struct Core<'p> { inner: Option<( @@ -149,12 +180,8 @@ impl<'p> Core<'p> { F: FnOnce() + Send + 'static, { if let Some((psm, ppb, fifo)) = self.inner.as_mut() { - // The first two ignored `u64` parameters are there to take up all of the registers, - // which means that the rest of the arguments are taken from the stack, - // where we're able to put them from core 0. + /// Called from the asm `_core1_trampoline` function. extern "C" fn core1_startup( - _: u64, - _: u64, entry: *mut ManuallyDrop, stack_limit: *mut usize, ) -> ! { @@ -192,27 +219,55 @@ impl<'p> Core<'p> { // Set up the stack // AAPCS requires in 6.2.1.2 that the stack is 8bytes aligned., we may need to trim the - // array size to guaranty that the base of the stack (the end of the array) meets that requirement. + // array size to guarantee that the base of the stack (the end of the array) meets that requirement. // The start of the array does not need to be aligned. - let mut stack_ptr = stack.as_mut_ptr_range().end; - // on rp235x, usize are 4 bytes, so align_offset(8) on a *mut usize returns either 0 or 1. - let misalignment_offset = stack_ptr.align_offset(8); + let raw_stack_top = stack.as_mut_ptr_range().end; + + // on arm, usize are 4 bytes, so align_offset(8) on a *mut usize returns either 0 or 1. + let misalignment_offset = raw_stack_top.align_offset(8); + + let aligned_stack_top = unsafe { raw_stack_top.sub(misalignment_offset) }; // We don't want to drop this, since it's getting moved to the other core. let mut entry = ManuallyDrop::new(entry); - // Push the arguments to `core1_startup` onto the stack. + let mut stack_ptr = aligned_stack_top; + + // Push `core1_startup` and its the arguments to the stack. + // + // Our stack grows downwards. We want `entry` at the lowest address, + // which is the first to be popped. unsafe { - stack_ptr = stack_ptr.sub(misalignment_offset); + #[cfg(target_arch = "riscv32")] + { + // Push `GP`. + // + // The trampoline puts this back in gp. + let mut gp: usize; + core::arch::asm!("mv {0},gp", out(reg) gp); + stack_ptr = stack_ptr.sub(1); + stack_ptr.write(gp); + } + + // Push extern "C" wrapper function. + // + // It will get the next two values as its args thanks to + // our trampoline. + // + // This ends up in pc/a2. + stack_ptr = stack_ptr.sub(1); + stack_ptr.write(core1_startup:: as usize); // Push `stack_limit`. + // This ends up in r1/a1. stack_ptr = stack_ptr.sub(1); - stack_ptr.cast::<*mut usize>().write(stack.as_mut_ptr()); + stack_ptr.write(stack.as_mut_ptr() as usize); // Push `entry`. + // This ends up in r0/a0. stack_ptr = stack_ptr.sub(1); - stack_ptr.cast::<*mut ManuallyDrop>().write(&mut entry); + stack_ptr.write(&mut entry as *mut ManuallyDrop as usize); } // Make sure the compiler does not reorder the stack writes after to after the @@ -233,8 +288,11 @@ impl<'p> Core<'p> { 0, 1, vector_table as usize, + // This points at where we put stuff on the stack. As the + // trampoline pops it off, the address will end up back at + // aligned_stack_top. stack_ptr as usize, - core1_startup:: as usize, + _core1_trampoline as usize, ]; let mut seq = 0;