From fa143040883fcab0321e0083f3bdc63fc0b023a4 Mon Sep 17 00:00:00 2001 From: Lokathor Date: Sun, 1 Sep 2024 21:17:59 -0600 Subject: [PATCH] clothed functions (#197) * clothed functions * remove the asm_runtime module as a public module * partly fix the examples. * add RUST_IRQ_HANDLER to the prelude * make the crate work right with both a32 and t32 building. * fix incorrect mgba logging activation. * it all builds but there's bugs in here. * we had the wrong number of copies. * we can delete the old stuff we don't even want all that general memcpy stuff, we want precise functions. * try putting the irq handler in iwram to speed it up * get the correct runtime handler code from the 0.12 branch; the bug was that we need to go back to IRQ mode (not SVC mode) when finishing up the IRQ handler (which should have been obvious). * skip flipping dy when bouncing off of a paddle in the pong demo. * remove excess imports * remove unsafe code usage from examples by providing safe wrappers for those operations in the lib. * make the pointer type match the usage. * i'm just going with not having the crate provide special division support for now, compiler-builtins still provides it. * fix the pointer type. * improve the unimplemented message. * sort our crate declarations --- Cargo.toml | 3 +- examples/hello.rs | 6 +- examples/mode3_pong_example_game.rs | 239 +++++----- examples/video3_test.rs | 15 +- examples/video4_test.rs | 19 +- src/asm_runtime.rs | 356 ++++---------- src/lib.rs | 16 +- src/macros.rs | 12 + src/mem.rs | 138 ++++++ src/mem_fns.rs | 688 ---------------------------- src/prelude.rs | 1 + src/video/mod.rs | 63 ++- 12 files changed, 452 insertions(+), 1104 deletions(-) create mode 100644 src/mem.rs delete mode 100644 src/mem_fns.rs diff --git a/Cargo.toml b/Cargo.toml index 8f58f904..efc4471f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,11 +15,12 @@ fixed = ["dep:fixed"] [dependencies] voladdress = "1.3.0" bitfrob = "1" -bracer = "0.1.2" +bracer = "0.3.1" critical-section = { version = "1.1.2", features = [ "restore-state-bool", ], optional = true } fixed = { version = "1.28.0", optional = true } +bytemuck = "1.17.1" [profile.dev] opt-level = 3 diff --git a/examples/hello.rs b/examples/hello.rs index 2c98e6df..b85c7416 100644 --- a/examples/hello.rs +++ b/examples/hello.rs @@ -36,16 +36,16 @@ extern "C" fn main() -> ! { writeln!(logger, "hello!").ok(); let fx_u: Fixed = - Fixed::::wrapping_from(7) + Fixed::::from_raw(12); + Fixed::::wrapping_from(7) + Fixed::::from_bits(12); writeln!(logger, "fixed unsigned: {fx_u:?}").ok(); let fx_i1: Fixed = - Fixed::::wrapping_from(8) + Fixed::::from_raw(15); + Fixed::::wrapping_from(8) + Fixed::::from_bits(15); writeln!(logger, "fixed signed positive: {fx_i1:?}").ok(); let fx_i2: Fixed = Fixed::::wrapping_from(0) - Fixed::::wrapping_from(3) - - Fixed::::from_raw(17); + - Fixed::::from_bits(17); writeln!(logger, "fixed signed negative: {fx_i2:?}").ok(); } diff --git a/examples/mode3_pong_example_game.rs b/examples/mode3_pong_example_game.rs index 67ec9cb2..82de25ed 100644 --- a/examples/mode3_pong_example_game.rs +++ b/examples/mode3_pong_example_game.rs @@ -1,12 +1,12 @@ -/* -* Made by Evan Goemer -* Discord: @evangoemer -*/ +/* + * Made by Evan Goemer + * Discord: @evangoemer + */ #![no_std] #![no_main] -use gba::{prelude::*, mem_fns::__aeabi_memset}; +use gba::{mem::set_u32x80_unchecked, prelude::*}; const SCREEN_WIDTH: u16 = 240; const SCREEN_HEIGHT: u16 = 160; @@ -16,91 +16,87 @@ const PADDLE_HEIGHT: u16 = 20; const BALL_SIZE: u16 = 2; struct Paddle { - x: u16, - y: u16, + x: u16, + y: u16, } struct Ball { - x: u16, - y: u16, - dx: i16, - dy: i16, + x: u16, + y: u16, + dx: i16, + dy: i16, } impl Paddle { - fn new(x: u16, y: u16) -> Self { - Self { - x, - y, - } + fn new(x: u16, y: u16) -> Self { + Self { x, y } + } + + fn update(&mut self) { + let keys = KEYINPUT.read(); + if keys.up() && self.y > 1 { + self.y -= 1; } - fn update(&mut self) { - let keys = KEYINPUT.read(); - if keys.up() && self.y > 1 { - self.y -= 1; - } - - if keys.down() && self.y + PADDLE_HEIGHT + 1 < SCREEN_HEIGHT { - self.y += 1; - } + if keys.down() && self.y + PADDLE_HEIGHT + 1 < SCREEN_HEIGHT { + self.y += 1; } + } } impl Ball { - fn new(x: u16, y: u16) -> Self { - Self { x, y, dx: 1, dy: 1 } + fn new(x: u16, y: u16) -> Self { + Self { x, y, dx: 1, dy: 1 } + } + + fn update(&mut self, paddle1: &Paddle, paddle2: &Paddle) { + if self.y <= 0 || self.y + BALL_SIZE >= SCREEN_HEIGHT { + self.dy = -self.dy; + } + + if self.x + BALL_SIZE >= paddle1.x + && self.x <= paddle1.x + PADDLE_WIDTH + && self.y + BALL_SIZE >= paddle1.y + && self.y <= paddle1.y + PADDLE_HEIGHT + { + self.dx = -self.dx; + self.dy = self.dy; + } + + if self.x + BALL_SIZE >= paddle2.x + && self.x <= paddle2.x + PADDLE_WIDTH + && self.y + BALL_SIZE >= paddle2.y + && self.y <= paddle2.y + PADDLE_HEIGHT + { + self.dx = -self.dx; + self.dy = self.dy; } - fn update(&mut self, paddle1: &Paddle, paddle2: &Paddle) { - if self.y <= 0 || self.y + BALL_SIZE >= SCREEN_HEIGHT { - self.dy = -self.dy; - } - - if self.x + BALL_SIZE >= paddle1.x - && self.x <= paddle1.x + PADDLE_WIDTH - && self.y + BALL_SIZE >= paddle1.y - && self.y <= paddle1.y + PADDLE_HEIGHT - { - self.dx = -self.dx; - self.dy = -self.dy; - } - - if self.x + BALL_SIZE >= paddle2.x - && self.x <= paddle2.x + PADDLE_WIDTH - && self.y + BALL_SIZE >= paddle2.y - && self.y <= paddle2.y + PADDLE_HEIGHT - { - self.dx = -self.dx; - self.dy = -self.dy; - } - - - if self.x + BALL_SIZE <= 1 + BALL_SIZE { - self.x = SCREEN_WIDTH / 2 - BALL_SIZE / 2; - self.y = SCREEN_HEIGHT / 2 - BALL_SIZE / 2; - self.dx = 1; - self.dy = 1; - } - - if self.x >= SCREEN_WIDTH - BALL_SIZE - 1 { - self.x = SCREEN_WIDTH / 2 - BALL_SIZE / 2; - self.y = SCREEN_HEIGHT / 2 - BALL_SIZE / 2; - self.dx = -1; - self.dy = 1; - } - self.x = (self.x as i16 + self.dx) as u16; - self.y = (self.y as i16 + self.dy) as u16; + if self.x + BALL_SIZE <= 1 + BALL_SIZE { + self.x = SCREEN_WIDTH / 2 - BALL_SIZE / 2; + self.y = SCREEN_HEIGHT / 2 - BALL_SIZE / 2; + self.dx = 1; + self.dy = 1; } + + if self.x >= SCREEN_WIDTH - BALL_SIZE - 1 { + self.x = SCREEN_WIDTH / 2 - BALL_SIZE / 2; + self.y = SCREEN_HEIGHT / 2 - BALL_SIZE / 2; + self.dx = -1; + self.dy = 1; + } + self.x = (self.x as i16 + self.dx) as u16; + self.y = (self.y as i16 + self.dy) as u16; + } } static SPRITE_POSITIONS: [GbaCell; 6] = [ - GbaCell::new(0), - GbaCell::new(0), - GbaCell::new(0), - GbaCell::new(0), - GbaCell::new(0), - GbaCell::new(0), + GbaCell::new(0), + GbaCell::new(0), + GbaCell::new(0), + GbaCell::new(0), + GbaCell::new(0), + GbaCell::new(0), ]; #[panic_handler] @@ -110,50 +106,71 @@ fn panic_handler(_: &core::panic::PanicInfo) -> ! { #[no_mangle] fn main() -> ! { - DISPCNT.write( - DisplayControl::new().with_video_mode(VideoMode::_3).with_show_bg2(true), - ); - - RUST_IRQ_HANDLER.write(Some(draw_sprites)); - DISPSTAT.write(DisplayStatus::new().with_irq_vblank(true)); - IE.write(IrqBits::VBLANK); - IME.write(true); - - let mut left_paddle = Paddle::new(10, SCREEN_HEIGHT as u16 / 2 - PADDLE_HEIGHT / 2); - let mut right_paddle = Paddle::new(SCREEN_WIDTH as u16 - 10 - PADDLE_WIDTH, SCREEN_HEIGHT as u16 / 2 - PADDLE_HEIGHT / 2); - let mut ball = Ball::new(SCREEN_WIDTH as u16 / 2, SCREEN_HEIGHT as u16 / 2); - - loop { - left_paddle.update(); - right_paddle.update(); - ball.update(&left_paddle, &right_paddle); - - SPRITE_POSITIONS[0].write(left_paddle.x); - SPRITE_POSITIONS[1].write(left_paddle.y); - SPRITE_POSITIONS[2].write(right_paddle.x); - SPRITE_POSITIONS[3].write(right_paddle.y); - SPRITE_POSITIONS[4].write(ball.x); - SPRITE_POSITIONS[5].write(ball.y); - - VBlankIntrWait(); - } + DISPCNT.write( + DisplayControl::new().with_video_mode(VideoMode::_3).with_show_bg2(true), + ); + + RUST_IRQ_HANDLER.write(Some(draw_sprites)); + DISPSTAT.write(DisplayStatus::new().with_irq_vblank(true)); + IE.write(IrqBits::VBLANK); + IME.write(true); + + let mut left_paddle = + Paddle::new(10, SCREEN_HEIGHT as u16 / 2 - PADDLE_HEIGHT / 2); + let mut right_paddle = Paddle::new( + SCREEN_WIDTH as u16 - 10 - PADDLE_WIDTH, + SCREEN_HEIGHT as u16 / 2 - PADDLE_HEIGHT / 2, + ); + let mut ball = Ball::new(SCREEN_WIDTH as u16 / 2, SCREEN_HEIGHT as u16 / 2); + + loop { + left_paddle.update(); + right_paddle.update(); + ball.update(&left_paddle, &right_paddle); + + SPRITE_POSITIONS[0].write(left_paddle.x); + SPRITE_POSITIONS[1].write(left_paddle.y); + SPRITE_POSITIONS[2].write(right_paddle.x); + SPRITE_POSITIONS[3].write(right_paddle.y); + SPRITE_POSITIONS[4].write(ball.x); + SPRITE_POSITIONS[5].write(ball.y); + + VBlankIntrWait(); + } } +#[link_section = ".iwram.draw_sprites"] extern "C" fn draw_sprites(_bits: IrqBits) { - unsafe { - let p = VIDEO3_VRAM.as_usize() as *mut u8; - __aeabi_memset(p, 240*160*2, 0) - } - - draw_rect(SPRITE_POSITIONS[0].read(), SPRITE_POSITIONS[1].read(), PADDLE_WIDTH, PADDLE_HEIGHT, Color::WHITE); - draw_rect(SPRITE_POSITIONS[2].read(), SPRITE_POSITIONS[3].read(), PADDLE_WIDTH, PADDLE_HEIGHT, Color::WHITE); - draw_rect(SPRITE_POSITIONS[4].read(), SPRITE_POSITIONS[5].read(), BALL_SIZE, BALL_SIZE, Color::WHITE); + video3_clear_to(Color::BLACK); + + draw_rect( + SPRITE_POSITIONS[0].read(), + SPRITE_POSITIONS[1].read(), + PADDLE_WIDTH, + PADDLE_HEIGHT, + Color::WHITE, + ); + draw_rect( + SPRITE_POSITIONS[2].read(), + SPRITE_POSITIONS[3].read(), + PADDLE_WIDTH, + PADDLE_HEIGHT, + Color::WHITE, + ); + draw_rect( + SPRITE_POSITIONS[4].read(), + SPRITE_POSITIONS[5].read(), + BALL_SIZE, + BALL_SIZE, + Color::WHITE, + ); } +#[link_section = ".iwram.draw_rect"] fn draw_rect(x: u16, y: u16, width: u16, height: u16, color: Color) { - for i in 0..width { - for j in 0..height { - VIDEO3_VRAM.index((x + i) as usize, (y + j) as usize).write(color); - } + for i in 0..width { + for j in 0..height { + VIDEO3_VRAM.index((x + i) as usize, (y + j) as usize).write(color); } + } } diff --git a/examples/video3_test.rs b/examples/video3_test.rs index 4aa664db..8da2f456 100644 --- a/examples/video3_test.rs +++ b/examples/video3_test.rs @@ -1,7 +1,7 @@ #![no_std] #![no_main] -use gba::{mem_fns::__aeabi_memcpy, prelude::*}; +use gba::prelude::*; #[panic_handler] fn panic_handler(info: &core::panic::PanicInfo) -> ! { @@ -15,21 +15,14 @@ fn panic_handler(info: &core::panic::PanicInfo) -> ! { #[no_mangle] fn main() -> ! { - let a = TEXT_SCREENBLOCKS.get_frame(0).unwrap().as_usize(); - unsafe { - __aeabi_memcpy( - a as _, - PIXELS.as_ptr().cast(), - core::mem::size_of_val(PIXELS) as _, - ) - }; + video3_set_bitmap(&BITMAP); DISPCNT.write( DisplayControl::new().with_video_mode(VideoMode::_3).with_show_bg2(true), ); loop {} } -pub const PIXELS: &[u16] = &[ +pub static BITMAP: Video3Bitmap = Video3Bitmap::new_from_u16([ 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, @@ -4297,4 +4290,4 @@ pub const PIXELS: &[u16] = &[ 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, -]; +]); diff --git a/examples/video4_test.rs b/examples/video4_test.rs index 56cbcc41..3800517b 100644 --- a/examples/video4_test.rs +++ b/examples/video4_test.rs @@ -1,7 +1,7 @@ #![no_std] #![no_main] -use gba::{mem_fns::__aeabi_memcpy, prelude::*}; +use gba::{mem::copy_u32x8_unchecked, prelude::*}; #[panic_handler] fn panic_handler(info: &core::panic::PanicInfo) -> ! { @@ -15,14 +15,7 @@ fn panic_handler(info: &core::panic::PanicInfo) -> ! { #[no_mangle] fn main() -> ! { - let a = TEXT_SCREENBLOCKS.get_frame(0).unwrap().as_usize(); - unsafe { - __aeabi_memcpy( - a as _, - INDEXES.as_ptr().cast(), - core::mem::size_of_val(INDEXES) as _, - ) - }; + video4_set_indexmap(&INDEXES, 0); BG_PALETTE.iter().zip(PALETTE.iter()).for_each(|(va, i)| { va.write(Color(*i)); }); @@ -32,7 +25,9 @@ fn main() -> ! { loop {} } -pub const INDEXES: &[u8] = &[ +pub const PALETTE: &[u16] = &[0x77DE, 0x2E06, 0x27BE, 0x61C5, 0x2518]; + +pub static INDEXES: Video4Indexmap = Video4Indexmap([ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -1510,6 +1505,4 @@ pub const INDEXES: &[u8] = &[ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -]; - -pub const PALETTE: &[u16] = &[0x77DE, 0x2E06, 0x27BE, 0x61C5, 0x2518]; +]); diff --git a/src/asm_runtime.rs b/src/asm_runtime.rs index 63f2749b..e384ae2a 100644 --- a/src/asm_runtime.rs +++ b/src/asm_runtime.rs @@ -14,36 +14,55 @@ use crate::{ gba_cell::GbaCell, interrupts::IrqFn, mgba::MGBA_LOGGING_ENABLE_REQUEST, - mmio::{DMA3_SRC, IME, MGBA_LOG_ENABLE}, + mmio::{DMA3_SRC, IME, MGBA_LOG_ENABLE, WAITCNT}, }; -/// The function pointer that the assembly runtime calls when an interrupt -/// occurs. -pub static RUST_IRQ_HANDLER: GbaCell> = GbaCell::new(None); - const DMA_32_BIT_MEMCPY: DmaControl = DmaControl::new().with_transfer_32bit(true).with_enabled(true); const DMA3_OFFSET: usize = DMA3_SRC.as_usize() - 0x0400_0000; -const IME_OFFSET: usize = IME.as_usize() - 0x0400_0000; +const WAITCNT_OFFSET: usize = WAITCNT.as_usize() - 0x0400_0000; + +// Proc-macros can't see the target being built for, so we use this declarative +// macro to determine if we're on a thumb target (and need to force our asm into +// a32 mode) or if we're not on thumb (and our asm can pass through untouched). +#[cfg(target_feature = "thumb-mode")] +macro_rules! force_a32 { + ($($asm_line:expr),+ $(,)?) => { + bracer::t32_with_a32_scope! { + $( concat!($asm_line, "\n") ),+ , + } + } +} +#[cfg(not(target_feature = "thumb-mode"))] +macro_rules! force_a32 { + ($($asm_line:expr),+ $(,)?) => { + concat!( + $( concat!($asm_line, "\n") ),+ , + ) + } +} + +core::arch::global_asm! { + bracer::put_fn_in_section!(".text.gba_rom_header"), + ".global __start", + "__start:", -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".text.gba_rom_header"] -unsafe extern "C" fn __start() -> ! { - core::arch::asm!( + force_a32!{ + // space for the header "b 1f", ".space 0xE0", "1:", /* post header */ + + // set the waitstate control to the GBATEK suggested setting. "mov r12, #{mmio_base}", "add r0, r12, #{waitcnt_offset}", "ldr r1, ={waitcnt_setting}", "strh r1, [r0]", - /* iwram copy */ + // Initialize IWRAM "ldr r4, =__iwram_word_copy_count", - bracer::when!("r4" != "#0" [label_id=1] { + bracer::when!(("r4" != "#0")[1] { "add r3, r12, #{dma3_offset}", "mov r5, #{dma3_setting}", "ldr r0, =__iwram_start", @@ -54,9 +73,9 @@ unsafe extern "C" fn __start() -> ! { "strh r5, [r3, #10]", /* set control bits */ }), - /* ewram copy */ + // Initialize EWRAM "ldr r4, =__ewram_word_copy_count", - bracer::when!("r4" != "#0" [label_id=1] { + bracer::when!(("r4" != "#0")[1] { "add r3, r12, #{dma3_offset}", "mov r5, #{dma3_setting}", "ldr r0, =__ewram_start", @@ -67,9 +86,9 @@ unsafe extern "C" fn __start() -> ! { "strh r5, [r3, #10]", /* set control bits */ }), - /* bss zero */ + // Zero the BSS region "ldr r4, =__bss_word_clear_count", - bracer::when!("r4" != "#0" [label_id=1] { + bracer::when!(("r4" != "#0")[1] { "ldr r0, =__bss_start", "mov r2, #0", "2:", @@ -78,269 +97,70 @@ unsafe extern "C" fn __start() -> ! { "bne 2b", }), - /* assign the runtime irq handler */ - "ldr r1, ={runtime_irq_handler}", + // Tell the BIOS where our runtime's handler is. + "ldr r1, =__runtime_irq_handler", "str r1, [r12, #-4]", - /* ask for mGBA logging to be enabled. This should be harmless if we're not using mgba. */ + // Enable mGBA logging, which is harmless when not in mGBA "ldr r0, ={mgba_log_enable}", "ldr r1, ={mgba_logging_enable_request}", - "str r1, [r0]", + "strh r1, [r0]", - /* call to rust main */ + // Call the `main` function (defined by the user's program) "ldr r0, =main", "bx r0", - // main shouldn't return, but if it does just SoftReset + + // `main` shouldn't return, but if it does just SoftReset "swi #0", - mmio_base = const 0x0400_0000, - waitcnt_offset = const 0x204, - waitcnt_setting = const 0x4317 /*sram8,r0:3.1,r1:4.2,r2:8.2,no_phi,prefetch*/, - dma3_offset = const DMA3_OFFSET, - dma3_setting = const DMA_32_BIT_MEMCPY.to_u16(), - runtime_irq_handler = sym runtime_irq_handler, - mgba_log_enable = const MGBA_LOG_ENABLE.as_usize(), - mgba_logging_enable_request = const MGBA_LOGGING_ENABLE_REQUEST, - options(noreturn) - ) + }, + + // Define Our Constants + mmio_base = const 0x0400_0000, + waitcnt_offset = const WAITCNT_OFFSET, + waitcnt_setting = const 0x4317 /*sram8,r0:3.1,r1:4.2,r2:8.2,no_phi,prefetch*/, + dma3_offset = const DMA3_OFFSET, + dma3_setting = const DMA_32_BIT_MEMCPY.to_u16(), + mgba_log_enable = const MGBA_LOG_ENABLE.as_usize(), + mgba_logging_enable_request = const MGBA_LOGGING_ENABLE_REQUEST, } -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.runtime.irq.handler"] -unsafe extern "C" fn runtime_irq_handler() { +// This handler DOES NOT allow nested interrupts at this time. +core::arch::global_asm! { + bracer::put_fn_in_section!(".text.gba_rom_header"), + ".global __runtime_irq_handler", // On Entry: r0 = 0x0400_0000 (mmio_base) - core::arch::asm!( - /* swap IME off, user can turn it back on if they want */ - "add r12, r0, #{ime_offset}", - "mov r3, #0", - "swp r3, r3, [r12]", - - /* Read/Update IE and IF */ - "ldr r0, [r12, #-8]", - "and r0, r0, r0, LSR #16", - "strh r0, [r12, #-6]", - - /* Read/Update BIOS_IF */ - "sub r2, r12, #(0x208+8)", - "ldrh r1, [r2]", - "orr r1, r1, r0", - "strh r1, [r2]", - - /* Call the Rust fn pointer (if set), using System mode */ - "ldr r1, ={RUST_IRQ_HANDLER}", - "ldr r1, [r1]", - bracer::when!("r1" != "#0" [label_id=9] { - bracer::with_spsr_held_in!("r2", { - bracer::set_cpu_control!(System, irq_masked: false, fiq_masked: false), - - // Note(Lokathor): We are *SKIPPING* the part where we ensure that the - // System stack pointer is aligned to 8 during the call to the rust - // function. This is *technically* against the AAPCS ABI, but the GBA's - // ARMv4T CPU does not even support any instructions that require an - // alignment of 8. By not bothering to align the stack, we save about 5 - // cycles total. Which is neat, but if this were on the DS (which has an - // ARMv5TE CPU) you'd want to ensure the aligned stack. - - bracer::with_pushed_registers!("{{r2, r3, r12, lr}}", { - bracer::adr_lr_then_bx_to!(reg="r1", label_id=1) - }), - - bracer::set_cpu_control!(Supervisor, irq_masked: true, fiq_masked: false), - }), + // We're allowed to use the usual C ABI registers. + "__runtime_irq_handler:", + + force_a32!{ + /* A fox wizard told me how to do this one */ + // handle MMIO interrupt system + "mov r12, 0x04000000", // load r12 with a 1 cycle value + "ldr r0, [r12, #0x200]!", // load IE_IF with r12 writeback + "and r0, r0, r0, LSR #16", // bits = IE & IF + "strh r0, [r12, #2]", // write16 to just IF + // handle BIOS IntrWait system + "ldr r1, [r12, #-0x208]!", // load BIOS_IF_?? with r12 writeback + "orr r1, r1, r0", // mark `bits` as `has_occurred` + "strh r1, [r12]", // write16 to just BIOS_IF + + // Get the rust code handler fn pointer, call it if non-null. + "ldr r12, ={RUST_IRQ_HANDLER}", + "ldr r12, [r12]", + bracer::when!(("r12" != "#0")[1] { + bracer::a32_read_spsr_to!("r3"), + "push {{r3, lr}}", + bracer::a32_set_cpu_control!(System, irq_masked = true, fiq_masked = true), + bracer::a32_fake_blx!("r12"), + bracer::a32_set_cpu_control!(IRQ, irq_masked = true, fiq_masked = true), + "pop {{r3, lr}}", + bracer::a32_write_spsr_from!("r3"), }), - /* Restore initial IME setting and return */ - "swp r3, r3, [r12]", + // return to the BIOS "bx lr", - ime_offset = const IME_OFFSET, - RUST_IRQ_HANDLER = sym RUST_IRQ_HANDLER, - options(noreturn) - ) -} + }, -// For now, the division fns can just keep living here. - -/// Returns 0 in `r0`, while placing the `numerator` into `r1`. -/// -/// This is written in that slightly strange way so that `div` function and -/// `divmod` functions can share the same code path. -/// -/// See: [__aeabi_idiv0][aeabi-division-by-zero] -/// -/// [aeabi-division-by-zero]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#division-by-zero -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -// this should literally never get called for real, so we leave it in ROM -extern "C" fn __aeabi_idiv0(numerator: i32) -> i32 { - unsafe { - core::arch::asm!( - // this comment stops rustfmt from making this a one-liner - "mov r1, r0", - "mov r0, #0", - "bx lr", - options(noreturn) - ) - } -} - -/// Returns `u32 / u32` -/// -/// This implementation is *not* the fastest possible division, but it is -/// extremely compact. -/// -/// See: [__aeabi_uidiv][aeabi-integer-32-32-division] -/// -/// [aeabi-integer-32-32-division]: -/// https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.uidiv"] -extern "C" fn __aeabi_uidiv(numerator: u32, denominator: u32) -> u32 { - // Note(Lokathor): Other code in this module relies on being able to call this - // function without affecting r12, so any future implementations of this code - // **must not** destroy r12. - unsafe { - core::arch::asm!( - // Check for divide by 0 - "cmp r1, #0", - "beq {__aeabi_idiv0}", - // r3(shifted_denom) = denom - "mov r3, r1", - // while shifted_denom < (num>>1): shifted_denom =<< 1; - "cmp r3, r0, lsr #1", - "2:", - "lslls r3, r3, #1", - "cmp r3, r0, lsr #1", - "bls 2b", - // r0=quot(init 0), r1=denom, r2=num, r3=shifted_denom - "mov r2, r0", - "mov r0, #0", - // subtraction loop - "3:", - "cmp r2, r3", - "subcs r2, r2, r3", - "adc r0, r0, r0", - "mov r3, r3, lsr #1", - "cmp r3, r1", - "bcs 3b", - "bx lr", - __aeabi_idiv0 = sym __aeabi_idiv0, - options(noreturn) - ) - } -} - -/// Returns `i32 / i32` -/// -/// See: [__aeabi_idiv][aeabi-integer-32-32-division] -/// -/// [aeabi-integer-32-32-division]: -/// https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.idiv"] -extern "C" fn __aeabi_idiv(numerator: i32, denominator: i32) -> u32 { - unsafe { - core::arch::asm!( - // determine if `numerator` and `denominator` are the same sign - "eor r12, r1, r0", - // convert both values to their unsigned absolute value. - "cmp r0, #0", - "rsblt r0, r0, #0", - "cmp r1, #0", - "rsclt r1, r1, #0", - bracer::with_pushed_registers!("{{lr}}", { - // divide them using `u32` division (this will check for divide by 0) - "bl {__aeabi_uidiv}", - }), - // if they started as different signs, flip the output's sign. - "cmp r12, #0", - "rsblt r0, r0, #0", - "bx lr", - __aeabi_uidiv = sym __aeabi_uidiv, - options(noreturn) - ) - } -} - -/// Returns `(u32 / u32, u32 % u32)` in `(r0, r1)`. -/// -/// The `u64` return value is a mild lie that gets Rust to grab up both the `r0` -/// and `r1` values when the function returns. If you transmute the return value -/// into `[u32; 2]` then you can separate the two parts of the return value, and -/// it will have no runtime cost. -/// -/// See: [__aeabi_uidivmod][aeabi-integer-32-32-division] -/// -/// [aeabi-integer-32-32-division]: -/// https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.uidivmod"] -extern "C" fn __aeabi_uidivmod(numerator: u32, denominator: u32) -> u64 { - unsafe { - core::arch::asm!( - // We need to save *both* input args until after the uidiv call. One of - // them can be saved in `r12` because we know our uidiv doesn't actually - // touch `r12`, while the other will be pushed onto the stack along with - // `lr`. Since the function's output will be in `r0`, we push/pop `r1`. - "mov r12, r0", - bracer::with_pushed_registers!("{{r1, lr}}", { - "bl {__aeabi_uidiv}", - }), - // Now r0 holds the `quot`, and we use it along with the input args to - // calculate the `rem`. - "mul r2, r0, r1", - "sub r1, r12, r2", - "bx lr", - __aeabi_uidiv = sym __aeabi_uidiv, - options(noreturn) - ) - } -} - -/// Returns `(i32 / i32, i32 % i32)` in `(r0, r1)`. -/// -/// The `u64` return value is a mild lie that gets Rust to grab up both the `r0` -/// and `r1` values when the function returns. If you transmute the return value -/// into `[i32; 2]` then you can separate the two parts of the return value, and -/// it will have no runtime cost. -/// -/// See: [__aeabi_idivmod][aeabi-integer-32-32-division] -/// -/// [aeabi-integer-32-32-division]: -/// https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.idivmod"] -extern "C" fn __aeabi_idivmod(numerator: i32, denominator: i32) -> u64 { - unsafe { - core::arch::asm!( - bracer::with_pushed_registers!("{{r4, r5, lr}}", { - // store old numerator then make it the unsigned absolute - "movs r4, r0", - "rsblt r0, r0, #0", - // store old denominator then make it the unsigned absolute - "movs r5, r1", - "rsblt r1, r1, #0", - // divmod using unsigned. - "bl {__aeabi_uidivmod}", - // if signs started opposite, quot becomes negative - "eors r12, r4, r5", - "rsblt r0, r0, #0", - // if numerator started negative, rem is negative - "cmp r4, #0", - "rsblt r1, r1, #0", - }), - "bx lr", - __aeabi_uidivmod = sym __aeabi_uidivmod, - options(noreturn) - ) - } + // Define Our Constants + RUST_IRQ_HANDLER = sym crate::RUST_IRQ_HANDLER, } diff --git a/src/lib.rs b/src/lib.rs index 7388f286..06d77795 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,9 +1,8 @@ #![no_std] -#![feature(naked_functions)] -#![warn(clippy::missing_inline_in_public_items)] +#![allow(unused_imports)] #![allow(clippy::let_and_return)] #![allow(clippy::result_unit_err)] -#![allow(unused_imports)] +#![warn(clippy::missing_inline_in_public_items)] //! A crate for GBA development. //! @@ -88,10 +87,12 @@ //! break any of these assumptions, if you do that some or all of the code //! provided by this crate may become unsound. +use prelude::{GbaCell, IrqFn}; + mod macros; #[cfg(feature = "on_gba")] -pub mod asm_runtime; +mod asm_runtime; #[cfg(feature = "on_gba")] pub mod bios; pub mod builtin_art; @@ -104,8 +105,7 @@ pub mod fixed; pub mod gba_cell; pub mod interrupts; pub mod keys; -#[cfg(feature = "on_gba")] -pub mod mem_fns; +pub mod mem; #[cfg(feature = "on_gba")] pub mod mgba; #[cfg(feature = "on_gba")] @@ -116,6 +116,10 @@ pub mod sound; pub mod timers; pub mod video; +/// The function pointer that the assembly runtime calls when an interrupt +/// occurs. +pub static RUST_IRQ_HANDLER: GbaCell> = GbaCell::new(None); + /// Wraps a value to be aligned to a minimum of 4. /// /// If the size of the value held is already a multiple of 4 then this will be diff --git a/src/macros.rs b/src/macros.rs index fee6a660..46eab24d 100644 --- a/src/macros.rs +++ b/src/macros.rs @@ -1,6 +1,18 @@ #![allow(unused_macros)] #![allow(unused_imports)] +macro_rules! on_gba_or_unimplemented { + ($($token_tree:tt)*) => { + #[cfg(feature="on_gba")] + { + $($token_tree)* + } + #[cfg(not(feature="on_gba"))] + unimplemented!("Called code required to be on the GBA without `on_gba` enabled.") + } +} +pub(crate) use on_gba_or_unimplemented; + macro_rules! pub_const_fn_new_zeroed { () => { #[inline] diff --git a/src/mem.rs b/src/mem.rs new file mode 100644 index 00000000..9c5dbb75 --- /dev/null +++ b/src/mem.rs @@ -0,0 +1,138 @@ +use crate::macros::on_gba_or_unimplemented; + +/// Copies `u8` at a time between exclusive regions. +/// +/// * This will *always* copy one byte at a time, and the code is always stored +/// in IWRAM, making it suitable for use with SRAM memory. +/// +/// ## Safety +/// * As with all copying routines, the source must be readable for the size you +/// specify, and the destination must be writable for the size you specify. +/// * The regions must not overlap. +#[cfg_attr(feature = "on_gba", instruction_set(arm::a32))] +#[cfg_attr(feature = "on_gba", link_section = ".iwram.copy_u8_unchecked")] +pub unsafe extern "C" fn copy_u8_unchecked( + dest: *mut u8, src: *const u8, byte_count: usize, +) { + on_gba_or_unimplemented!(unsafe { + // Note(Lokathor): This loop setup assumes that the `byte_count` is usually + // greater than 0, and so subtracts first and then does a conditional + // load/store pair if the value (after subtracting) is greater than or equal + // to 0 (meaning that the value before the subtract *was* 1 or more). + core::arch::asm! { + "1:", + "subs {count}, {count}, #1", + "ldrbge {temp}, [{src}], #1", + "strbge {temp}, [{dest}], #1", + "bgt 1b", + temp = out(reg) _, + count = inout(reg) byte_count => _, + src = inout(reg) src => _, + dest = inout(reg) dest => _, + options(nostack) + } + }); +} + +/// Copies `[u32; 8]` sized chunks, to `dest` from `src` +/// +/// This will, in general, be slightly faster than a generic `memcpy`, but +/// slightly slower than using DMA. +/// +/// Particularly, this helps with: +/// * [`Tile4`][crate::video::Tile4] (one loop per tile). +/// * [`Tile8`][crate::video::Tile8] (two loops per tile). +/// * A palbank of [`Color`][crate::video::Color] values (one loop per palbank). +/// * A text mode screenblock (64 loops per screenblock). +/// * A Mode 3 bitmap (2400 loops). +/// * A Mode 4 bitmap (1200 loops). +/// +/// ## Safety +/// * As with all copying routines, the source must be readable for the size you +/// specify, and the destination must be writable for the size you specify. +/// * Both pointers must be aligned to 4. +/// * The regions must not overlap. +#[cfg_attr(feature = "on_gba", instruction_set(arm::a32))] +#[cfg_attr(feature = "on_gba", link_section = ".iwram.copy_u32x8_unchecked")] +pub unsafe fn copy_u32x8_unchecked( + dest: *mut [u32; 8], src: *const [u32; 8], count: usize, +) { + on_gba_or_unimplemented!(unsafe { + // Note(Lokathor): Same loop logic as `copy_u8_unchecked`, we're just + // processing bigger chunks of data at a time. + core::arch::asm!( + "1:", + "subs {count}, {count}, #1", + "ldmge {src}!, {{r3,r4,r5,r7, r8,r9,r12,lr}}", + "stmge {dest}!, {{r3,r4,r5,r7, r8,r9,r12,lr}}", + "bgt 1b", + + // Note(Lokathor): LLVM will always put `lr` on the stack as part of the + // push/pop for the function, even if we don't use `lr`, so we might as + // well use `lr`, because if we use a different register (such as `r10`) + // that would only add to the amount of push/pop LLVM does. + count = inout(reg) count => _, + dest = inout(reg) dest => _, + src = inout(reg) src => _, + out("r3") _, + out("r4") _, + out("r5") _, + out("r7") _, + out("r8") _, + out("r9") _, + out("r12") _, + out("lr") _, + options(nostack) + ) + }); +} + +/// Sets `word` in blocks of 80 per loop. +/// +/// This is intended for clearing VRAM to a particular color when using +/// background modes 3, 4, and 5. +/// * To clear the Mode 3 bitmap, pass `240` as the count. +/// * To clear a Mode 4 frame pass `120`. +/// * To clear a Mode 5 frame pass `128`. +#[cfg_attr(feature = "on_gba", instruction_set(arm::a32))] +#[cfg_attr(feature = "on_gba", link_section = ".iwram.set_u32x80_unchecked")] +pub unsafe extern "C" fn set_u32x80_unchecked( + dest: *mut [u32; 80], word: u32, count: usize, +) { + on_gba_or_unimplemented!(unsafe { + core::arch::asm!( + // Note(Lokathor): Same loop logic as `copy_u8_unchecked`, we're just + // processing bigger chunks of data at a time, and also setting rather + // than copying. + "1:", + "subs {count}, {count}, #1", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "stmge {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}", + "bgt 1b", + + // The assembler will give us a warning (that we can't easily disable) + // if the reg_list for `stm` doesn't give the registers in order from + // low to high, so we just manually pick registers. The count register + // and the pointer register can be anything else. + in("r1") word, + in("r3") word, + in("r4") word, + in("r5") word, + in("r7") word, + in("r8") word, + in("r12") word, + in("lr") word, + dest = inout(reg) dest => _, + count = inout(reg) count => _, + options(nostack), + ) + }); +} diff --git a/src/mem_fns.rs b/src/mem_fns.rs deleted file mode 100644 index bd925ad1..00000000 --- a/src/mem_fns.rs +++ /dev/null @@ -1,688 +0,0 @@ -//! Module for direct memory operations. -//! -//! Generally you don't need to call these yourself. Instead, the compiler will -//! insert calls to the functions defined here as necessary. - -use core::ffi::c_void; - -/// Byte copy between exclusive regions. -/// -/// * This will *always* copy one byte at a time, making it suitable for use -/// with SRAM memory. -/// -/// ## Safety -/// * If `byte_count` is zero then the pointers are not used and they can be any value. -/// * If `byte_count` is non-zero then: -/// * Both pointers must be valid for the number of bytes given. -/// * The two regions must either be *entirely* disjoint or *entirely* overlapping. -/// Partial overlap is not allowed. -#[inline] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memcpy1"] -pub unsafe extern "C" fn __aeabi_memcpy1( - dest: *mut u8, src: *const u8, byte_count: usize, -) { - core::arch::asm! { - "1:", - "subs {count}, {count}, #1", - "ldrbge {temp}, [{src}], #1", - "strbge {temp}, [{dest}], #1", - "bgt 1b", - temp = out(reg) _, - count = inout(reg) byte_count => _, - src = inout(reg) src => _, - dest = inout(reg) dest => _, - options(nostack) - } -} - -/// Halfword copy between exclusive regions. -/// -/// * If the `byte_count` is odd then a single byte copy will happen at the end. -/// -/// ## Safety -/// * If `byte_count` is zero then the pointers are not used and they can be any value. -/// * If `byte_count` is non-zero then: -/// * Both pointers must be valid for the span used and aligned to 2. -/// * The two regions must either be *entirely* disjoint or *entirely* overlapping. -/// Partial overlap is not allowed. -#[inline] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memcpy2"] -pub unsafe extern "C" fn __aeabi_memcpy2( - mut dest: *mut u16, mut src: *const u16, mut byte_count: usize, -) { - core::arch::asm! { - "1:", - "subs {count}, {count}, #2", - "ldrhge {temp}, [{src}], #2", - "strhge {temp}, [{dest}], #2", - "bgt 1b", - temp = out(reg) _, - count = inout(reg) byte_count, - src = inout(reg) src, - dest = inout(reg) dest, - options(nostack) - } - if byte_count != 0 { - let dest = dest.cast::(); - let src = src.cast::(); - dest.write_volatile(src.read_volatile()); - } -} - -/// Word copy between exclusive regions. -/// -/// * If `byte_count` is not a multiple of 4 then a halfword and/or byte copy -/// will happen at the end. -/// -/// ## Safety -/// * If `byte_count` is zero then the pointers are not used and they can be any value. -/// * If `byte_count` is non-zero then: -/// * Both pointers must be valid for the span used and aligned to 4. -/// * The two regions must either be *entirely* disjoint or *entirely* overlapping. -/// Partial overlap is not allowed. -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memcpy4"] -pub unsafe extern "C" fn __aeabi_memcpy4( - dest: *mut u32, src: *const u32, byte_count: usize, -) { - core::arch::asm! { - bracer::when!( "r2" >=u "#32" [label_id=2] { - bracer::with_pushed_registers!("{{r4-r9}}", { - "1:", - "subs r2, r2, #32", - "ldmge r1!, {{r3-r9, r12}}", - "stmge r0!, {{r3-r9, r12}}", - "bgt 1b", - }), - "bxeq lr", - }), - - // copy 4 words, two at a time - "tst r2, #0b10000", - "ldmne r1!, {{r3, r12}}", - "stmne r0!, {{r3, r12}}", - "ldmne r1!, {{r3, r12}}", - "stmne r0!, {{r3, r12}}", - "bics r2, r2, #0b10000", - "bxeq lr", - - // copy 2 and/or 1 words - "lsls r3, r2, #29", - "ldmcs r1!, {{r3, r12}}", - "stmcs r0!, {{r3, r12}}", - "ldrmi r3, [r1], #4", - "strmi r3, [r0], #4", - "bics r2, r2, #0b1100", - "bxeq lr", - - // copy halfword and/or byte - "lsls r3, r2, #31", - "ldrhcs r3, [r1], #2", - "strhcs r3, [r0], #2", - "ldrbmi r3, [r1], #1", - "strbmi r3, [r0], #1", - "bx lr", - options(noreturn), - } -} - -/// Just call [`__aeabi_memcpy4`] instead. -/// -/// This function is provided only for API completeness, because in some cases -/// the compiler might automatically generate a call to this function. -#[inline] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memcpy8"] -pub unsafe extern "C" fn __aeabi_memcpy8( - dest: *mut u32, src: *const u32, byte_count: usize, -) { - __aeabi_memcpy4(dest, src, byte_count); -} - -/// Arbitrary-width copy between exclusive regions. -/// -/// ## Safety -/// * If `byte_count` is zero then the pointers are not used and they can be any value. -/// * If `byte_count` is non-zero then: -/// * Both pointers must be valid for the span used (no required alignment). -/// * The two regions must either be *entirely* disjoint or *entirely* overlapping. -/// Partial overlap is not allowed. -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memcpy"] -pub unsafe extern "C" fn __aeabi_memcpy( - dest: *mut u8, src: *const u8, byte_count: usize, -) { - core::arch::asm! { - "cmp r2, #7", // if count <= (fix+word): just byte copy - "ble {__aeabi_memcpy1}", - - // check max coalign - "eor r3, r0, r1", - "lsls r3, r3, #31", - "bmi {__aeabi_memcpy1}", - "bcs 2f", - - // max coalign4, possible fixup and jump - "lsls r3, r0, #31", - "submi r2, r2, #1", - "ldrbmi r3, [r1], #1", - "strbmi r3, [r0], #1", - "subcs r2, r2, #2", - "ldrhcs r3, [r1], #2", - "strhcs r3, [r0], #2", - "b {__aeabi_memcpy4}", - - // max coalign2, possible fixup and jump - "2:", - "lsls r3, r0, #31", - "submi r2, r2, #1", - "ldrbmi r3, [r1], #1", - "strbmi r3, [r0], #1", - "b {__aeabi_memcpy2}", - - // - __aeabi_memcpy4 = sym __aeabi_memcpy4, - __aeabi_memcpy2 = sym __aeabi_memcpy2, - __aeabi_memcpy1 = sym __aeabi_memcpy1, - options(noreturn) - } -} - -/// Copy between exclusive regions, prefer [`__aeabi_memcpy`] if possible. -/// -/// This is the libc version of a memory copy. It's required to return the -/// `dest` pointer at the end of the call, which makes it need an extra -/// push/pop compared to a direct call to `__aeabi_memcpy`. -/// -/// * **Returns:** The `dest` pointer. -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.memcpy"] -pub unsafe extern "C" fn memcpy( - dest: *mut u8, src: *const u8, byte_count: usize, -) -> *mut u8 { - // I've seen a standard call to `__aeabi_memcpy` give weird codegen, - // so we (currently) do the call manually. - core::arch::asm! { - bracer::with_pushed_registers!("{{r0, lr}}", { - "bl {__aeabi_memcpy}", - }), - "bx lr", - __aeabi_memcpy = sym __aeabi_memcpy, - options(noreturn) - } -} - -// MOVE - -// used by `__aeabi_memmove` in some cases -#[inline] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.reverse_copy_u8"] -unsafe extern "C" fn reverse_copy_u8( - dest: *mut u8, src: *const u8, byte_count: usize, -) { - core::arch::asm! { - "1:", - "subs {count}, {count}, #1", - "ldrbge {temp}, [{src}, #-1]!", - "strbge {temp}, [{dest}, #-1]!", - "bgt 1b", - temp = out(reg) _, - count = inout(reg) byte_count => _, - src = inout(reg) src => _, - dest = inout(reg) dest => _, - options(nostack) - } -} - -// used by `__aeabi_memmove` in some cases -#[inline] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.reverse_copy_u16"] -unsafe extern "C" fn reverse_copy_u16( - mut dest: *mut u16, mut src: *const u16, mut byte_count: usize, -) { - core::arch::asm! { - "1:", - "subs {count}, {count}, #2", - "ldrhge {temp}, [{src}, #-2]!", - "strhge {temp}, [{dest}, #-2]!", - "bgt 1b", - temp = out(reg) _, - count = inout(reg) byte_count, - src = inout(reg) src, - dest = inout(reg) dest, - options(nostack) - } - if byte_count != 0 { - let dest = dest.cast::().sub(1); - let src = src.cast::().sub(1); - dest.write_volatile(src.read_volatile()); - } -} - -// used by `__aeabi_memmove` in some cases -#[naked] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.reverse_copy_u32"] -unsafe extern "C" fn reverse_copy_u32( - dest: *mut u32, src: *const u32, byte_count: usize, -) { - core::arch::asm! { - bracer::when!( "r2" >=u "#32" [label_id=2] { - bracer::with_pushed_registers!("{{r4-r9}}", { - "1:", - "subs r2, r2, #32", - "ldmdbcs r1!, {{r3-r9, r12}}", - "stmdbcs r0!, {{r3-r9, r12}}", - "bgt 1b", - }), - "bxeq lr", - }), - - // copy 4 words, two at a time - "tst r2, #0b10000", - "ldmdbne r1!, {{r3, r12}}", - "stmdbne r0!, {{r3, r12}}", - "ldmdbne r1!, {{r3, r12}}", - "stmdbne r0!, {{r3, r12}}", - "bics r2, r2, #0b10000", - "bxeq lr", - - // copy 2 and/or 1 words - "lsls r3, r2, #29", - "ldmdbcs r1!, {{r3, r12}}", - "stmdbcs r0!, {{r3, r12}}", - "ldrmi r3, [r1, #-4]!", - "strmi r3, [r0, #-4]!", - "bxeq lr", - - // copy halfword and/or byte - "lsls r2, r2, #31", - "ldrhcs r3, [r1, #-2]!", - "strhcs r3, [r0, #-2]!", - "ldrbmi r3, [r1, #-1]!", - "strbmi r3, [r0, #-1]!", - "bx lr", - options(noreturn), - } -} - -/// Copy between non-exclusive regions, prefer [`__aeabi_memmove`] if possible. -/// -/// This function is provided only for API completeness, because in some cases -/// the compiler might automatically generate a call to this function. -#[inline] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memmove4"] -pub unsafe extern "C" fn __aeabi_memmove4( - dest: *mut u32, src: *const u32, byte_count: usize, -) { - __aeabi_memmove(dest.cast(), src.cast(), byte_count) -} - -/// Copy between non-exclusive regions, prefer [`__aeabi_memmove`] if possible. -/// -/// This function is provided only for API completeness, because in some cases -/// the compiler might automatically generate a call to this function. -#[inline] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memmove8"] -pub unsafe extern "C" fn __aeabi_memmove8( - dest: *mut u32, src: *const u32, byte_count: usize, -) { - __aeabi_memmove(dest.cast(), src.cast(), byte_count) -} - -/// Copy between non-exclusive regions. -/// -/// * The pointers do not have a minimum alignment. The function will -/// automatically detect the best type of copy to perform. -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memmove"] -pub unsafe extern "C" fn __aeabi_memmove( - dest: *mut u8, src: *const u8, byte_count: usize, -) { - core::arch::asm! { - // when d > s we need to copy back-to-front - bracer::when!("r0" >=u "r1" [label_id=1] { - "add r0, r0, r2", - "add r1, r1, r2", - "eor r3, r0, r1", - "lsls r3, r3, #31", - "bmi {reverse_copy_u8}", - "bcs 2f", - - // max coalign4, possible fixup and jump - "lsls r3, r0, #31", - "submi r2, r2, #1", - "ldrbmi r3, [r1, #-1]!", - "strbmi r3, [r0, #-1]!", - "subcs r2, r2, #2", - "ldrhcs r3, [r1, #-2]!", - "strhcs r3, [r0, #-2]!", - "b {reverse_copy_u32}", - - // max coalign2, possible fixup and jump - "2:", - "tst r0, #1", - "sub r2, r2, #1", - "ldrb r3, [r1, #-1]!", - "strb r3, [r0, #-1]!", - "b {reverse_copy_u16}", - }), - // forward copy is a normal memcpy - "b {__aeabi_memcpy}", - __aeabi_memcpy = sym __aeabi_memcpy, - reverse_copy_u8 = sym reverse_copy_u8, - reverse_copy_u16 = sym reverse_copy_u16, - reverse_copy_u32 = sym reverse_copy_u32, - options(noreturn), - } -} - -/// Copy between non-exclusive regions, prefer [`__aeabi_memmove`] if possible. -/// -/// This is the libc version of a memory move. It's required to return the -/// `dest` pointer at the end of the call, which makes it need an extra -/// push/pop compared to a direct call to `__aeabi_memmove`. -/// -/// * **Returns:** The `dest` pointer. -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.memmove"] -pub unsafe extern "C" fn memmove( - dest: *mut u8, src: *const u8, byte_count: usize, -) -> *mut u8 { - core::arch::asm! { - bracer::with_pushed_registers!("{{r0, lr}}", { - "bl {__aeabi_memmove}", - }), - "bx lr", - __aeabi_memmove = sym __aeabi_memmove, - options(noreturn) - } -} - -// SET - -/// Copy between non-exclusive regions, prefer [`__aeabi_memset`] if possible. -/// -/// This function is provided only for API completeness, because in some cases -/// the compiler might automatically generate a call to this function. -#[inline] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memset4"] -pub unsafe extern "C" fn __aeabi_memset4( - dest: *mut u32, byte_count: usize, byte: i32, -) { - __aeabi_memset(dest.cast(), byte_count, byte) -} - -/// Copy between non-exclusive regions, prefer [`__aeabi_memset`] if possible. -/// -/// This function is provided only for API completeness, because in some cases -/// the compiler might automatically generate a call to this function. -#[inline] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memset8"] -pub unsafe extern "C" fn __aeabi_memset8( - dest: *mut u32, byte_count: usize, byte: i32, -) { - __aeabi_memset(dest.cast(), byte_count, byte) -} - -/// Sets all bytes in the region to the `byte` given. -/// -/// Because of historical reasons, the byte is passed in as an `i32`, but only -/// the lowest 8 bits are used. -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memset"] -pub unsafe extern "C" fn __aeabi_memset( - dest: *mut u8, byte_count: usize, byte: i32, -) { - core::arch::asm! { - bracer::when!("r1" >=u "#8" [label_id=7] { - // duplicate the byte across all of r2 and r3 - "and r2, r2, #0xFF", - "orr r2, r2, r2, lsl #8", - "orr r2, r2, r2, lsl #16", - "mov r3, r2", - - // align the pointer for word ops - "tst r0, #0b1", - "subne r1, r1, #1", - "strbne r2, [r0], #1", - "tst r0, #0b10", - "subne r1, r1, #2", - "strhne r2, [r0], #2", - - bracer::when!("r1" >=u "#32" [label_id=8] { - bracer::with_pushed_registers!("{{r4-r9}}", { - "mov r4, r2", - "mov r5, r2", - "mov r6, r2", - "mov r7, r2", - "mov r8, r2", - "mov r9, r2", - "1:", - "subs r1, r1, #32", - "stmge r0!, {{r2-r9}}", - "bgt 1b", - }), - "bxeq lr", - }), - - // set 4 words - "tst r1, #0b10000", - "stmne r0!, {{r2, r3}}", - "stmne r0!, {{r2, r3}}", - - // set 2 and/or 1 words - "lsls r12, r1, #29", - "stmcs r0!, {{r2, r3}}", - "strmi r2, [r0], #4", - - // set halfword and/or byte - "lsls r12, r1, #31", - "strhcs r2, [r0], #2", - "strbmi r2, [r0], #1", - "bx lr", - }), - // byte loop - "9:", - "subs r1, r1, #1", - "strbcs r2, [r0], #1", - "bgt 9b", - "bx lr", - options(noreturn) - } -} - -/// Write a value to all bytes in the region, prefer [`__aeabi_memset`] if -/// possible. -/// -/// This is the libc version of a memory set. It's required to return the `dest` -/// pointer at the end of the call, which makes it need an extra push/pop -/// compared to a direct call to `__aeabi_memset`. Also, the argument ordering -/// is swapped, so shuffling registers costs a few cycles. -/// -/// * **Returns:** The `dest` pointer. -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.memset"] -pub unsafe extern "C" fn memset( - dest: *mut u8, byte: i32, byte_count: usize, -) -> *mut u8 { - core::arch::asm! { - bracer::with_pushed_registers!("{{r0, lr}}", { - "mov r3, r2", - "mov r2, r1", - "mov r1, r3", - "bl {__aeabi_memset}", - }), - "bx lr", - __aeabi_memset = sym __aeabi_memset, - options(noreturn) - } -} - -// CLEAR - -/// Just call [`__aeabi_memset`] with 0 as the `byte` instead. -/// -/// This function is provided only for API completeness, because in some cases -/// the compiler might automatically generate a call to this function. -#[inline] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memclr4"] -pub unsafe extern "C" fn __aeabi_memclr4(dest: *mut u32, byte_count: usize) { - __aeabi_memset(dest.cast(), byte_count, 0) -} - -/// Just call [`__aeabi_memset`] with 0 as the `byte` instead. -/// -/// This function is provided only for API completeness, because in some cases -/// the compiler might automatically generate a call to this function. -#[inline] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memclr8"] -pub unsafe extern "C" fn __aeabi_memclr8(dest: *mut u32, byte_count: usize) { - __aeabi_memset(dest.cast(), byte_count, 0) -} - -/// Just call [`__aeabi_memset`] with 0 as the `byte` instead. -/// -/// This function is provided only for API completeness, because in some cases -/// the compiler might automatically generate a call to this function. -#[inline] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.__aeabi_memclr"] -pub unsafe extern "C" fn __aeabi_memclr(dest: *mut u8, byte_count: usize) { - __aeabi_memset(dest, byte_count, 0) -} - -/// Reads 4 bytes, starting at the address given. -/// -/// See [__aeabi_uread4] -/// -/// [__aeabi_uread4]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.uread4"] -unsafe extern "C" fn __aeabi_uread4(address: *const c_void) -> u32 { - core::arch::asm!( - "ldrb r2, [r0]", - "ldrb r3, [r0, #1]", - "orr r2, r2, r3, lsl #8", - "ldrb r3, [r0, #2]", - "orr r2, r2, r3, lsl #16", - "ldrb r3, [r0, #3]", - "orr r2, r2, r3, lsl #24", - "mov r0, r2", - "bx lr", - options(noreturn), - ) -} - -/// Writes 4 bytes, starting at the address given. -/// -/// See [__aeabi_uwrite4] -/// -/// [__aeabi_uwrite4]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.uwrite4"] -unsafe extern "C" fn __aeabi_uwrite4(value: u32, address: *mut c_void) { - core::arch::asm!( - "strb r0, [r1]", - "lsr r2, r0, #8", - "strb r2, [r1, #1]", - "lsr r2, r2, #8", - "strb r2, [r1, #2]", - "lsr r2, r2, #8", - "strb r2, [r1, #3]", - "bx lr", - options(noreturn), - ) -} - -/// Reads 8 bytes, starting at the address given. -/// -/// See [__aeabi_uread8] -/// -/// [__aeabi_uread8]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.uread8"] -unsafe extern "C" fn __aeabi_uread8(address: *const c_void) -> u64 { - core::arch::asm!( - "ldrb r1, [r0, #4]", - "ldrb r2, [r0, #5]", - "orr r1, r1, r2, lsl #8", - "ldrb r2, [r0, #6]", - "orr r1, r1, r2, lsl #16", - "ldrb r2, [r0, #7]", - "orr r1, r1, r2, lsl #24", - "b {__aeabi_uread4}", - __aeabi_uread4 = sym __aeabi_uread4, - options(noreturn), - ) -} - -/// Writes 8 bytes, starting at the address given. -/// -/// See [__aeabi_uwrite8] -/// -/// [__aeabi_uwrite8]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access -#[naked] -#[no_mangle] -#[instruction_set(arm::a32)] -#[link_section = ".iwram.aeabi.uwrite8"] -unsafe extern "C" fn __aeabi_uwrite8(value: u64, address: *mut c_void) { - core::arch::asm!( - "strb r0, [r2]", - "lsr r3, r0, #8", - "strb r3, [r2, #1]", - "lsr r3, r3, #8", - "strb r3, [r2, #2]", - "lsr r3, r3, #8", - "strb r3, [r2, #3]", - "strb r1, [r2, #4]", - "lsr r3, r1, #8", - "strb r3, [r2, #5]", - "lsr r3, r3, #8", - "strb r3, [r2, #6]", - "lsr r3, r3, #8", - "strb r3, [r2, #7]", - "bx lr", - options(noreturn), - ) -} diff --git a/src/prelude.rs b/src/prelude.rs index 45b30dc5..a2d9a6a8 100644 --- a/src/prelude.rs +++ b/src/prelude.rs @@ -3,6 +3,7 @@ #[cfg(feature = "on_gba")] pub use crate::{ asm_runtime::*, bios::*, dma::*, gba_cell::*, mgba::*, mmio::*, + RUST_IRQ_HANDLER, }; pub use crate::{ diff --git a/src/video/mod.rs b/src/video/mod.rs index 866b22a1..b876afb1 100644 --- a/src/video/mod.rs +++ b/src/video/mod.rs @@ -97,11 +97,16 @@ //! sort your object entries so that any lower priority objects are also the //! lower index objects. -use crate::macros::{ - pub_const_fn_new_zeroed, u16_bool_field, u16_enum_field, u16_int_field, -}; +use bytemuck::{Pod, TransparentWrapper, Zeroable}; + #[allow(unused_imports)] use crate::prelude::*; +use crate::{ + macros::{ + pub_const_fn_new_zeroed, u16_bool_field, u16_enum_field, u16_int_field, + }, + mem::{copy_u32x8_unchecked, set_u32x80_unchecked}, +}; pub mod obj; @@ -134,6 +139,10 @@ impl Color { } } +unsafe impl Zeroable for Color {} +unsafe impl Pod for Color {} +unsafe impl TransparentWrapper for Color {} + /// The video mode controls how each background layer will operate. #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)] #[repr(u16)] @@ -315,3 +324,51 @@ impl TextEntry { Self(id & 0b11_1111_1111) } } + +#[inline] +pub fn video3_clear_to(c: Color) { + let u = u32::from(c.0) << 16 | u32::from(c.0); + unsafe { + let p = VIDEO3_VRAM.as_usize() as *mut _; + set_u32x80_unchecked(p, u, 240_usize); + } +} + +#[repr(C, align(4))] +pub struct Video3Bitmap(pub [Color; 240 * 160]); +impl Video3Bitmap { + /// Wraps an array of raw color bit data as a Video Mode 3 bitmap. + /// + /// This is intended for generating static values at compile time. You should + /// not attempt to call this function at runtime, because the argument to the + /// function is larger than the GBA's stack space. + #[inline] + #[must_use] + pub const fn new_from_u16(bits: [u16; 240 * 160]) -> Self { + Self(unsafe { core::mem::transmute(bits) }) + } +} + +#[inline] +pub fn video3_set_bitmap(bitmap: &Video3Bitmap) { + let p = VIDEO3_VRAM.as_usize() as *mut _; + unsafe { + copy_u32x8_unchecked(p, bitmap as *const _ as *const _, 2400_usize) + }; +} + +#[repr(C, align(4))] +pub struct Video4Indexmap(pub [u8; 240 * 160]); + +/// Sets the indexmap of the frame requested. +/// +/// ## Panics +/// Only frames 0 and 1 exist, if `frame` is 2 or more this will panic. +#[inline] +pub fn video4_set_indexmap(indexes: &Video4Indexmap, frame: usize) { + let p = VIDEO4_VRAM.get_frame(usize::from(frame)).unwrap().as_usize() + as *mut [u32; 8]; + unsafe { + copy_u32x8_unchecked(p, indexes as *const _ as *const _, 1200_usize) + }; +}