Skip to content

Commit

Permalink
Updated multicore code and multicore_polyblink example.
Browse files Browse the repository at this point in the history
It *should* now work on Arm and RISC-V, but only Arm mode works and I don't know why.

Changed RP2040 multicode code to match, which also works. Using the asm trampoline seems nicer than trying on AAPCS stacking function args in the right place.
  • Loading branch information
thejpster committed Sep 8, 2024
1 parent 4fd6e7d commit c068030
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 73 deletions.
73 changes: 52 additions & 21 deletions rp2040-hal/src/multicore.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,12 @@
//! # Usage
//!
//! ```no_run
//! use rp2040_hal::{pac, gpio::Pins, sio::Sio, multicore::{Multicore, Stack}};
//! use rp2040_hal::{
//! gpio::Pins,
//! multicore::{Multicore, Stack},
//! pac,
//! sio::Sio,
//! };
//!
//! static mut CORE1_STACK: Stack<4096> = Stack::new();
//!
Expand All @@ -17,7 +22,7 @@
//! }
//!
//! fn main() -> ! {
//! let mut pac = pac::Peripherals::take().unwrap();
//! let mut pac = hal::pac::Peripherals::take().unwrap();
//! let mut sio = Sio::new(pac.SIO);
//! // Other init code above this line
//! let mut mc = Multicore::new(&mut pac.PSM, &mut pac.PPB, &mut sio.fifo);
Expand All @@ -27,7 +32,6 @@
//! // The rest of your application below this line
//! # loop {}
//! }
//!
//! ```
//!
//! For inter-processor communications, see [`crate::sio::SioFifo`] and [`crate::sio::Spinlock0`]
Expand Down Expand Up @@ -132,6 +136,17 @@ impl<'p> Multicore<'p> {
}
}

extern "C" {
fn _core1_trampoline() -> !;
}

core::arch::global_asm!(
".global _core1_trampoline",
".thumb_func",
"_core1_trampoline:",
"pop {{r0, r1, pc}}",
);

/// A handle for controlling a logical core.
pub struct Core<'p> {
inner: Option<(
Expand Down Expand Up @@ -166,12 +181,8 @@ impl<'p> Core<'p> {
F: FnOnce() + Send + 'static,
{
if let Some((psm, ppb, fifo)) = self.inner.as_mut() {
// The first two ignored `u64` parameters are there to take up all of the registers,
// which means that the rest of the arguments are taken from the stack,
// where we're able to put them from core 0.
/// Called from the asm `_core1_trampoline` function.
extern "C" fn core1_startup<F: FnOnce()>(
_: u64,
_: u64,
entry: *mut ManuallyDrop<F>,
stack_limit: *mut usize,
) -> ! {
Expand All @@ -192,7 +203,7 @@ impl<'p> Core<'p> {

entry();
loop {
cortex_m::asm::wfe()
crate::arch::wfe()
}
}

Expand All @@ -203,41 +214,58 @@ impl<'p> Core<'p> {
// so wouldn't be zero cost.
psm.frce_off().modify(|_, w| w.proc1().set_bit());
while !psm.frce_off().read().proc1().bit_is_set() {
cortex_m::asm::nop();
crate::arch::nop();
}
psm.frce_off().modify(|_, w| w.proc1().clear_bit());

// Set up the stack
// AAPCS requires in 6.2.1.2 that the stack is 8bytes aligned., we may need to trim the
// array size to guaranty that the base of the stack (the end of the array) meets that requirement.
// array size to guarantee that the base of the stack (the end of the array) meets that requirement.
// The start of the array does not need to be aligned.

let mut stack_ptr = stack.as_mut_ptr_range().end;
// on rp2040, usize are 4 bytes, so align_offset(8) on a *mut usize returns either 0 or 1.
let misalignment_offset = stack_ptr.align_offset(8);
let raw_stack_top = stack.as_mut_ptr_range().end;

// on arm, usize are 4 bytes, so align_offset(8) on a *mut usize returns either 0 or 1.
let misalignment_offset = raw_stack_top.align_offset(8);

let aligned_stack_top = unsafe { raw_stack_top.sub(misalignment_offset) };

// We don't want to drop this, since it's getting moved to the other core.
let mut entry = ManuallyDrop::new(entry);

// Push the arguments to `core1_startup` onto the stack.
let mut stack_ptr = aligned_stack_top;

// Push `core1_startup` and its the arguments to the stack.
//
// Our stack grows downwards. We want `entry` at the lowest address,
// which is the first to be popped.
unsafe {
stack_ptr = stack_ptr.sub(misalignment_offset);
// Push extern "C" wrapper function.
//
// It will get the next two values as its args thanks to
// our trampoline.
//
// This ends up in pc.
stack_ptr = stack_ptr.sub(1);
stack_ptr.write(core1_startup::<F> as usize);

// Push `stack_limit`.
// This ends up in r1.
stack_ptr = stack_ptr.sub(1);
stack_ptr.cast::<*mut usize>().write(stack.as_mut_ptr());
stack_ptr.write(stack.as_mut_ptr() as usize);

// Push `entry`.
// This ends up in r0.
stack_ptr = stack_ptr.sub(1);
stack_ptr.cast::<*mut ManuallyDrop<F>>().write(&mut entry);
stack_ptr.write(&mut entry as *mut ManuallyDrop<F> as usize);
}

// Make sure the compiler does not reorder the stack writes after to after the
// below FIFO writes, which would result in them not being seen by the second
// core.
//
// From the compiler perspective, this doesn't guarantee that the second core
// actually sees those writes. However, we know that the RP2040 doesn't have
// actually sees those writes. However, we know that the rp235x doesn't have

This comment has been minimized.

Copy link
@jannic

jannic Sep 9, 2024

Member

RP2040

Btw this isn't true for the rp235x, right? So there we do need a stronger barrier.

// memory caches, and writes happen in-order.
compiler_fence(Ordering::Release);

Expand All @@ -250,8 +278,11 @@ impl<'p> Core<'p> {
0,
1,
vector_table as usize,
// This points at where we put stuff on the stack. As the
// trampoline pops it off, the address will end up back at
// aligned_stack_top.
stack_ptr as usize,
core1_startup::<F> as usize,
_core1_trampoline as usize,
];

let mut seq = 0;
Expand All @@ -260,7 +291,7 @@ impl<'p> Core<'p> {
let cmd = cmd_seq[seq] as u32;
if cmd == 0 {
fifo.drain();
cortex_m::asm::sev();
crate::arch::sev();
}
fifo.write_blocking(cmd);
let response = fifo.read_blocking();
Expand Down
58 changes: 24 additions & 34 deletions rp235x-hal-examples/src/bin/multicore_polyblink.rs
Original file line number Diff line number Diff line change
@@ -1,29 +1,26 @@
//! # Multicore Blinking Example
//!
//! This application blinks two LEDs on GPIOs 2 and 3 at different rates (3Hz
//! and 4Hz respectively.)
//! This application blinks two LEDs on GPIOs 2 and 25 at different rates (3 Hz
//! and 1 Hz respectively.)
//!
//! See the `Cargo.toml` file for Copyright and licence details.
#![no_std]
#![no_main]

use cortex_m::delay::Delay;

// Alias for our HAL crate
use rp235x_hal as hal;

use hal::clocks::Clock;
use hal::gpio::Pins;
use hal::multicore::{Multicore, Stack};
use hal::sio::Sio;

// Ensure we halt the program on panic (if we don't mention this crate it won't
// be linked)
use panic_halt as _;

// Some things we need
use embedded_hal::digital::StatefulOutputPin;
use embedded_hal::delay::DelayNs;
use embedded_hal::digital::{OutputPin, StatefulOutputPin};
use hal::gpio::Pins;
use hal::multicore::{Multicore, Stack};
use hal::sio::Sio;

/// Tell the Boot ROM about our application
#[link_section = ".start_block"]
Expand All @@ -37,16 +34,16 @@ const XTAL_FREQ_HZ: u32 = 12_000_000u32;
/// The frequency at which core 0 will blink its LED (Hz).
const CORE0_FREQ: u32 = 3;
/// The frequency at which core 1 will blink its LED (Hz).
const CORE1_FREQ: u32 = 4;
const CORE1_FREQ: u32 = 1;
/// The delay between each toggle of core 0's LED (us).
const CORE0_DELAY: u32 = 1_000_000 / CORE0_FREQ;
const CORE0_DELAY_US: u32 = 1_000_000 / CORE0_FREQ;
/// The delay between each toggle of core 1's LED (us).
const CORE1_DELAY: u32 = 1_000_000 / CORE1_FREQ;
const CORE1_DELAY_US: u32 = 1_000_000 / CORE1_FREQ;

/// Stack for core 1
///
/// Core 0 gets its stack via the normal route - any memory not used by static
/// values is reserved for stack and initialised by cortex-m-rt.
/// values is reserved for stack and initialised by cortex-m-rt / riscv-rt.
/// To get the same for Core 1, we would need to compile everything seperately
/// and modify the linker file for both programs, and that's quite annoying.
/// So instead, core1.spawn takes a [usize] which gets used for the stack.
Expand All @@ -57,13 +54,12 @@ static mut CORE1_STACK: Stack<4096> = Stack::new();

/// Entry point to our bare-metal application.
///
/// The `#[hal::entry]` macro ensures the Cortex-M start-up code calls this function
/// The `#[hal::entry]` macro ensures the start-up code calls this function
/// as soon as all global variables and the spinlock are initialised.
#[hal::entry]
fn main() -> ! {
// Grab our singleton objects
// Grab our singleton object
let mut pac = hal::pac::Peripherals::take().unwrap();
let core = cortex_m::Peripherals::take().unwrap();

// Set up the watchdog driver - needed by the clock setup code
let mut watchdog = hal::watchdog::Watchdog::new(pac.WATCHDOG);
Expand All @@ -88,37 +84,31 @@ fn main() -> ! {
sio.gpio_bank0,
&mut pac.RESETS,
);
let mut led1 = pins.gpio2.into_push_pull_output();
let mut led2 = pins.gpio3.into_push_pull_output();

// Set up the delay for the first core.
let sys_freq = clocks.system_clock.freq().to_Hz();
let mut delay = Delay::new(core.SYST, sys_freq);
let mut led0 = pins.gpio2.into_push_pull_output();
let mut led1 = pins.gpio25.into_push_pull_output();
let mut timer0 = hal::Timer::new_timer0(pac.TIMER0, &mut pac.RESETS, &clocks);
let mut timer1 = hal::Timer::new_timer1(pac.TIMER1, &mut pac.RESETS, &clocks);

// Start up the second core to blink the second LED
let mut mc = Multicore::new(&mut pac.PSM, &mut pac.PPB, &mut sio.fifo);
let cores = mc.cores();
let core1 = &mut cores[1];
core1
.spawn(unsafe { &mut CORE1_STACK.mem }, move || {
// Get the second core's copy of the `CorePeripherals`, which are per-core.
// Unfortunately, `cortex-m` doesn't support this properly right now,
// so we have to use `steal`.
let core = unsafe { cortex_m::Peripherals::steal() };
// Set up the delay for the second core.
let mut delay = Delay::new(core.SYST, sys_freq);
// Blink the second LED.
// Blink the second LED using Timer 1
led1.set_high().unwrap();
loop {
led2.toggle().unwrap();
delay.delay_us(CORE1_DELAY)
timer1.delay_us(CORE1_DELAY_US);
led1.toggle().unwrap();
}
})
.unwrap();

// Blink the first LED.
led0.set_high().unwrap();
loop {
led1.toggle().unwrap();
delay.delay_us(CORE0_DELAY)
timer0.delay_us(CORE0_DELAY_US);
led0.toggle().unwrap();
}
}

Expand Down
1 change: 0 additions & 1 deletion rp235x-hal/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ pub mod dma;
pub mod gpio;
pub mod i2c;
pub mod lposc;
#[cfg(all(target_arch = "arm", target_os = "none"))]
pub mod multicore;
pub mod otp;
pub mod pio;
Expand Down
Loading

0 comments on commit c068030

Please sign in to comment.