diff --git a/Cargo.toml b/Cargo.toml
index 8f58f904..efc4471f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -15,11 +15,12 @@ fixed = ["dep:fixed"]
 [dependencies]
 voladdress = "1.3.0"
 bitfrob = "1"
-bracer = "0.1.2"
+bracer = "0.3.1"
 critical-section = { version = "1.1.2", features = [
   "restore-state-bool",
 ], optional = true }
 fixed = { version = "1.28.0", optional = true }
+bytemuck = "1.17.1"
 
 [profile.dev]
 opt-level = 3
diff --git a/examples/hello.rs b/examples/hello.rs
index 2c98e6df..b85c7416 100644
--- a/examples/hello.rs
+++ b/examples/hello.rs
@@ -36,16 +36,16 @@ extern "C" fn main() -> ! {
     writeln!(logger, "hello!").ok();
 
     let fx_u: Fixed<u32, 8> =
-      Fixed::<u32, 8>::wrapping_from(7) + Fixed::<u32, 8>::from_raw(12);
+      Fixed::<u32, 8>::wrapping_from(7) + Fixed::<u32, 8>::from_bits(12);
     writeln!(logger, "fixed unsigned: {fx_u:?}").ok();
 
     let fx_i1: Fixed<i32, 8> =
-      Fixed::<i32, 8>::wrapping_from(8) + Fixed::<i32, 8>::from_raw(15);
+      Fixed::<i32, 8>::wrapping_from(8) + Fixed::<i32, 8>::from_bits(15);
     writeln!(logger, "fixed signed positive: {fx_i1:?}").ok();
 
     let fx_i2: Fixed<i32, 8> = Fixed::<i32, 8>::wrapping_from(0)
       - Fixed::<i32, 8>::wrapping_from(3)
-      - Fixed::<i32, 8>::from_raw(17);
+      - Fixed::<i32, 8>::from_bits(17);
     writeln!(logger, "fixed signed negative: {fx_i2:?}").ok();
   }
 
diff --git a/examples/mode3_pong_example_game.rs b/examples/mode3_pong_example_game.rs
index 67ec9cb2..82de25ed 100644
--- a/examples/mode3_pong_example_game.rs
+++ b/examples/mode3_pong_example_game.rs
@@ -1,12 +1,12 @@
-/* 
-* Made by Evan Goemer
-* Discord: @evangoemer
-*/
+/*
+ * Made by Evan Goemer
+ * Discord: @evangoemer
+ */
 
 #![no_std]
 #![no_main]
 
-use gba::{prelude::*, mem_fns::__aeabi_memset};
+use gba::{mem::set_u32x80_unchecked, prelude::*};
 
 const SCREEN_WIDTH: u16 = 240;
 const SCREEN_HEIGHT: u16 = 160;
@@ -16,91 +16,87 @@ const PADDLE_HEIGHT: u16 = 20;
 const BALL_SIZE: u16 = 2;
 
 struct Paddle {
-    x: u16,
-    y: u16,
+  x: u16,
+  y: u16,
 }
 
 struct Ball {
-    x: u16,
-    y: u16,
-    dx: i16,
-    dy: i16,
+  x: u16,
+  y: u16,
+  dx: i16,
+  dy: i16,
 }
 
 impl Paddle {
-    fn new(x: u16, y: u16) -> Self {
-        Self {
-            x,
-            y,
-        }
+  fn new(x: u16, y: u16) -> Self {
+    Self { x, y }
+  }
+
+  fn update(&mut self) {
+    let keys = KEYINPUT.read();
+    if keys.up() && self.y > 1 {
+      self.y -= 1;
     }
 
-    fn update(&mut self) {
-        let keys = KEYINPUT.read();
-        if keys.up() && self.y > 1 {
-            self.y -= 1;
-        }
-
-        if keys.down() && self.y + PADDLE_HEIGHT + 1 < SCREEN_HEIGHT {
-            self.y += 1;
-        }
+    if keys.down() && self.y + PADDLE_HEIGHT + 1 < SCREEN_HEIGHT {
+      self.y += 1;
     }
+  }
 }
 
 impl Ball {
-    fn new(x: u16, y: u16) -> Self {
-        Self { x, y, dx: 1, dy: 1 }
+  fn new(x: u16, y: u16) -> Self {
+    Self { x, y, dx: 1, dy: 1 }
+  }
+
+  fn update(&mut self, paddle1: &Paddle, paddle2: &Paddle) {
+    if self.y <= 0 || self.y + BALL_SIZE >= SCREEN_HEIGHT {
+      self.dy = -self.dy;
+    }
+
+    if self.x + BALL_SIZE >= paddle1.x
+      && self.x <= paddle1.x + PADDLE_WIDTH
+      && self.y + BALL_SIZE >= paddle1.y
+      && self.y <= paddle1.y + PADDLE_HEIGHT
+    {
+      self.dx = -self.dx;
+      self.dy = self.dy;
+    }
+
+    if self.x + BALL_SIZE >= paddle2.x
+      && self.x <= paddle2.x + PADDLE_WIDTH
+      && self.y + BALL_SIZE >= paddle2.y
+      && self.y <= paddle2.y + PADDLE_HEIGHT
+    {
+      self.dx = -self.dx;
+      self.dy = self.dy;
     }
 
-    fn update(&mut self, paddle1: &Paddle, paddle2: &Paddle) {
-        if self.y <= 0 || self.y + BALL_SIZE >= SCREEN_HEIGHT {
-            self.dy = -self.dy;
-        }
-
-        if self.x + BALL_SIZE >= paddle1.x
-            && self.x <= paddle1.x + PADDLE_WIDTH
-            && self.y + BALL_SIZE >= paddle1.y
-            && self.y <= paddle1.y + PADDLE_HEIGHT
-        {
-            self.dx = -self.dx;
-            self.dy = -self.dy;
-        }
-
-        if self.x + BALL_SIZE >= paddle2.x
-            && self.x <= paddle2.x + PADDLE_WIDTH
-            && self.y + BALL_SIZE >= paddle2.y
-            && self.y <= paddle2.y + PADDLE_HEIGHT
-        {
-            self.dx = -self.dx;
-            self.dy = -self.dy;
-        }
-
-
-        if self.x + BALL_SIZE <= 1 + BALL_SIZE {
-            self.x = SCREEN_WIDTH / 2 - BALL_SIZE / 2;
-            self.y = SCREEN_HEIGHT / 2 - BALL_SIZE / 2;
-            self.dx = 1;
-            self.dy = 1;
-        }
-        
-        if self.x >= SCREEN_WIDTH - BALL_SIZE - 1 {
-            self.x = SCREEN_WIDTH / 2 - BALL_SIZE / 2;
-            self.y = SCREEN_HEIGHT / 2 - BALL_SIZE / 2;
-            self.dx = -1;
-            self.dy = 1;
-        }
-        self.x = (self.x as i16 + self.dx) as u16;
-        self.y = (self.y as i16 + self.dy) as u16;
+    if self.x + BALL_SIZE <= 1 + BALL_SIZE {
+      self.x = SCREEN_WIDTH / 2 - BALL_SIZE / 2;
+      self.y = SCREEN_HEIGHT / 2 - BALL_SIZE / 2;
+      self.dx = 1;
+      self.dy = 1;
     }
+
+    if self.x >= SCREEN_WIDTH - BALL_SIZE - 1 {
+      self.x = SCREEN_WIDTH / 2 - BALL_SIZE / 2;
+      self.y = SCREEN_HEIGHT / 2 - BALL_SIZE / 2;
+      self.dx = -1;
+      self.dy = 1;
+    }
+    self.x = (self.x as i16 + self.dx) as u16;
+    self.y = (self.y as i16 + self.dy) as u16;
+  }
 }
 
 static SPRITE_POSITIONS: [GbaCell<u16>; 6] = [
-    GbaCell::new(0),
-    GbaCell::new(0),
-    GbaCell::new(0),
-    GbaCell::new(0),
-    GbaCell::new(0),
-    GbaCell::new(0),
+  GbaCell::new(0),
+  GbaCell::new(0),
+  GbaCell::new(0),
+  GbaCell::new(0),
+  GbaCell::new(0),
+  GbaCell::new(0),
 ];
 
 #[panic_handler]
@@ -110,50 +106,71 @@ fn panic_handler(_: &core::panic::PanicInfo) -> ! {
 
 #[no_mangle]
 fn main() -> ! {
-    DISPCNT.write(
-        DisplayControl::new().with_video_mode(VideoMode::_3).with_show_bg2(true),
-    );
-
-    RUST_IRQ_HANDLER.write(Some(draw_sprites));
-    DISPSTAT.write(DisplayStatus::new().with_irq_vblank(true));
-    IE.write(IrqBits::VBLANK);
-    IME.write(true);
-
-    let mut left_paddle = Paddle::new(10, SCREEN_HEIGHT as u16 / 2 - PADDLE_HEIGHT / 2);
-    let mut right_paddle = Paddle::new(SCREEN_WIDTH as u16 - 10 - PADDLE_WIDTH, SCREEN_HEIGHT as u16 / 2 - PADDLE_HEIGHT / 2);
-    let mut ball = Ball::new(SCREEN_WIDTH as u16 / 2, SCREEN_HEIGHT as u16 / 2);
-
-    loop {
-        left_paddle.update();
-        right_paddle.update();
-        ball.update(&left_paddle, &right_paddle);
-
-        SPRITE_POSITIONS[0].write(left_paddle.x);
-        SPRITE_POSITIONS[1].write(left_paddle.y);
-        SPRITE_POSITIONS[2].write(right_paddle.x);
-        SPRITE_POSITIONS[3].write(right_paddle.y);
-        SPRITE_POSITIONS[4].write(ball.x);
-        SPRITE_POSITIONS[5].write(ball.y);
-
-        VBlankIntrWait();
-    }
+  DISPCNT.write(
+    DisplayControl::new().with_video_mode(VideoMode::_3).with_show_bg2(true),
+  );
+
+  RUST_IRQ_HANDLER.write(Some(draw_sprites));
+  DISPSTAT.write(DisplayStatus::new().with_irq_vblank(true));
+  IE.write(IrqBits::VBLANK);
+  IME.write(true);
+
+  let mut left_paddle =
+    Paddle::new(10, SCREEN_HEIGHT as u16 / 2 - PADDLE_HEIGHT / 2);
+  let mut right_paddle = Paddle::new(
+    SCREEN_WIDTH as u16 - 10 - PADDLE_WIDTH,
+    SCREEN_HEIGHT as u16 / 2 - PADDLE_HEIGHT / 2,
+  );
+  let mut ball = Ball::new(SCREEN_WIDTH as u16 / 2, SCREEN_HEIGHT as u16 / 2);
+
+  loop {
+    left_paddle.update();
+    right_paddle.update();
+    ball.update(&left_paddle, &right_paddle);
+
+    SPRITE_POSITIONS[0].write(left_paddle.x);
+    SPRITE_POSITIONS[1].write(left_paddle.y);
+    SPRITE_POSITIONS[2].write(right_paddle.x);
+    SPRITE_POSITIONS[3].write(right_paddle.y);
+    SPRITE_POSITIONS[4].write(ball.x);
+    SPRITE_POSITIONS[5].write(ball.y);
+
+    VBlankIntrWait();
+  }
 }
 
+#[link_section = ".iwram.draw_sprites"]
 extern "C" fn draw_sprites(_bits: IrqBits) {
-    unsafe {
-        let p = VIDEO3_VRAM.as_usize() as *mut u8;
-        __aeabi_memset(p, 240*160*2, 0)
-    }
-
-    draw_rect(SPRITE_POSITIONS[0].read(), SPRITE_POSITIONS[1].read(), PADDLE_WIDTH, PADDLE_HEIGHT, Color::WHITE);
-    draw_rect(SPRITE_POSITIONS[2].read(), SPRITE_POSITIONS[3].read(), PADDLE_WIDTH, PADDLE_HEIGHT, Color::WHITE);
-    draw_rect(SPRITE_POSITIONS[4].read(), SPRITE_POSITIONS[5].read(), BALL_SIZE, BALL_SIZE, Color::WHITE);
+  video3_clear_to(Color::BLACK);
+
+  draw_rect(
+    SPRITE_POSITIONS[0].read(),
+    SPRITE_POSITIONS[1].read(),
+    PADDLE_WIDTH,
+    PADDLE_HEIGHT,
+    Color::WHITE,
+  );
+  draw_rect(
+    SPRITE_POSITIONS[2].read(),
+    SPRITE_POSITIONS[3].read(),
+    PADDLE_WIDTH,
+    PADDLE_HEIGHT,
+    Color::WHITE,
+  );
+  draw_rect(
+    SPRITE_POSITIONS[4].read(),
+    SPRITE_POSITIONS[5].read(),
+    BALL_SIZE,
+    BALL_SIZE,
+    Color::WHITE,
+  );
 }
 
+#[link_section = ".iwram.draw_rect"]
 fn draw_rect(x: u16, y: u16, width: u16, height: u16, color: Color) {
-    for i in 0..width {
-        for j in 0..height {
-            VIDEO3_VRAM.index((x + i) as usize, (y + j) as usize).write(color);
-        }
+  for i in 0..width {
+    for j in 0..height {
+      VIDEO3_VRAM.index((x + i) as usize, (y + j) as usize).write(color);
     }
+  }
 }
diff --git a/examples/video3_test.rs b/examples/video3_test.rs
index 4aa664db..8da2f456 100644
--- a/examples/video3_test.rs
+++ b/examples/video3_test.rs
@@ -1,7 +1,7 @@
 #![no_std]
 #![no_main]
 
-use gba::{mem_fns::__aeabi_memcpy, prelude::*};
+use gba::prelude::*;
 
 #[panic_handler]
 fn panic_handler(info: &core::panic::PanicInfo) -> ! {
@@ -15,21 +15,14 @@ fn panic_handler(info: &core::panic::PanicInfo) -> ! {
 
 #[no_mangle]
 fn main() -> ! {
-  let a = TEXT_SCREENBLOCKS.get_frame(0).unwrap().as_usize();
-  unsafe {
-    __aeabi_memcpy(
-      a as _,
-      PIXELS.as_ptr().cast(),
-      core::mem::size_of_val(PIXELS) as _,
-    )
-  };
+  video3_set_bitmap(&BITMAP);
   DISPCNT.write(
     DisplayControl::new().with_video_mode(VideoMode::_3).with_show_bg2(true),
   );
   loop {}
 }
 
-pub const PIXELS: &[u16] = &[
+pub static BITMAP: Video3Bitmap = Video3Bitmap::new_from_u16([
   0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE,
   0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE,
   0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE,
@@ -4297,4 +4290,4 @@ pub const PIXELS: &[u16] = &[
   0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE,
   0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE,
   0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE, 0x77DE,
-];
+]);
diff --git a/examples/video4_test.rs b/examples/video4_test.rs
index 56cbcc41..3800517b 100644
--- a/examples/video4_test.rs
+++ b/examples/video4_test.rs
@@ -1,7 +1,7 @@
 #![no_std]
 #![no_main]
 
-use gba::{mem_fns::__aeabi_memcpy, prelude::*};
+use gba::{mem::copy_u32x8_unchecked, prelude::*};
 
 #[panic_handler]
 fn panic_handler(info: &core::panic::PanicInfo) -> ! {
@@ -15,14 +15,7 @@ fn panic_handler(info: &core::panic::PanicInfo) -> ! {
 
 #[no_mangle]
 fn main() -> ! {
-  let a = TEXT_SCREENBLOCKS.get_frame(0).unwrap().as_usize();
-  unsafe {
-    __aeabi_memcpy(
-      a as _,
-      INDEXES.as_ptr().cast(),
-      core::mem::size_of_val(INDEXES) as _,
-    )
-  };
+  video4_set_indexmap(&INDEXES, 0);
   BG_PALETTE.iter().zip(PALETTE.iter()).for_each(|(va, i)| {
     va.write(Color(*i));
   });
@@ -32,7 +25,9 @@ fn main() -> ! {
   loop {}
 }
 
-pub const INDEXES: &[u8] = &[
+pub const PALETTE: &[u16] = &[0x77DE, 0x2E06, 0x27BE, 0x61C5, 0x2518];
+
+pub static INDEXES: Video4Indexmap = Video4Indexmap([
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -1510,6 +1505,4 @@ pub const INDEXES: &[u8] = &[
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-];
-
-pub const PALETTE: &[u16] = &[0x77DE, 0x2E06, 0x27BE, 0x61C5, 0x2518];
+]);
diff --git a/src/asm_runtime.rs b/src/asm_runtime.rs
index 63f2749b..e384ae2a 100644
--- a/src/asm_runtime.rs
+++ b/src/asm_runtime.rs
@@ -14,36 +14,55 @@ use crate::{
   gba_cell::GbaCell,
   interrupts::IrqFn,
   mgba::MGBA_LOGGING_ENABLE_REQUEST,
-  mmio::{DMA3_SRC, IME, MGBA_LOG_ENABLE},
+  mmio::{DMA3_SRC, IME, MGBA_LOG_ENABLE, WAITCNT},
 };
 
-/// The function pointer that the assembly runtime calls when an interrupt
-/// occurs.
-pub static RUST_IRQ_HANDLER: GbaCell<Option<IrqFn>> = GbaCell::new(None);
-
 const DMA_32_BIT_MEMCPY: DmaControl =
   DmaControl::new().with_transfer_32bit(true).with_enabled(true);
 
 const DMA3_OFFSET: usize = DMA3_SRC.as_usize() - 0x0400_0000;
-const IME_OFFSET: usize = IME.as_usize() - 0x0400_0000;
+const WAITCNT_OFFSET: usize = WAITCNT.as_usize() - 0x0400_0000;
+
+// Proc-macros can't see the target being built for, so we use this declarative
+// macro to determine if we're on a thumb target (and need to force our asm into
+// a32 mode) or if we're not on thumb (and our asm can pass through untouched).
+#[cfg(target_feature = "thumb-mode")]
+macro_rules! force_a32 {
+  ($($asm_line:expr),+ $(,)?) => {
+    bracer::t32_with_a32_scope! {
+      $( concat!($asm_line, "\n") ),+ ,
+    }
+  }
+}
+#[cfg(not(target_feature = "thumb-mode"))]
+macro_rules! force_a32 {
+  ($($asm_line:expr),+ $(,)?) => {
+    concat!(
+      $( concat!($asm_line, "\n") ),+ ,
+    )
+  }
+}
+
+core::arch::global_asm! {
+  bracer::put_fn_in_section!(".text.gba_rom_header"),
+  ".global __start",
+  "__start:",
 
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".text.gba_rom_header"]
-unsafe extern "C" fn __start() -> ! {
-  core::arch::asm!(
+  force_a32!{
+    // space for the header
     "b 1f",
     ".space 0xE0",
     "1:", /* post header */
+
+    // set the waitstate control to the GBATEK suggested setting.
     "mov r12, #{mmio_base}",
     "add r0, r12, #{waitcnt_offset}",
     "ldr r1, ={waitcnt_setting}",
     "strh r1, [r0]",
 
-    /* iwram copy */
+    // Initialize IWRAM
     "ldr r4, =__iwram_word_copy_count",
-    bracer::when!("r4" != "#0" [label_id=1] {
+    bracer::when!(("r4" != "#0")[1] {
       "add r3, r12, #{dma3_offset}",
       "mov r5, #{dma3_setting}",
       "ldr r0, =__iwram_start",
@@ -54,9 +73,9 @@ unsafe extern "C" fn __start() -> ! {
       "strh r5, [r3, #10]", /* set control bits */
     }),
 
-    /* ewram copy */
+    // Initialize EWRAM
     "ldr r4, =__ewram_word_copy_count",
-    bracer::when!("r4" != "#0" [label_id=1] {
+    bracer::when!(("r4" != "#0")[1] {
       "add r3, r12, #{dma3_offset}",
       "mov r5, #{dma3_setting}",
       "ldr r0, =__ewram_start",
@@ -67,9 +86,9 @@ unsafe extern "C" fn __start() -> ! {
       "strh r5, [r3, #10]", /* set control bits */
     }),
 
-    /* bss zero */
+    // Zero the BSS region
     "ldr r4, =__bss_word_clear_count",
-    bracer::when!("r4" != "#0" [label_id=1] {
+    bracer::when!(("r4" != "#0")[1] {
       "ldr r0, =__bss_start",
       "mov r2, #0",
       "2:",
@@ -78,269 +97,70 @@ unsafe extern "C" fn __start() -> ! {
       "bne 2b",
     }),
 
-    /* assign the runtime irq handler */
-    "ldr r1, ={runtime_irq_handler}",
+    // Tell the BIOS where our runtime's handler is.
+    "ldr r1, =__runtime_irq_handler",
     "str r1, [r12, #-4]",
 
-    /* ask for mGBA logging to be enabled. This should be harmless if we're not using mgba. */
+    // Enable mGBA logging, which is harmless when not in mGBA
     "ldr r0, ={mgba_log_enable}",
     "ldr r1, ={mgba_logging_enable_request}",
-    "str r1, [r0]",
+    "strh r1, [r0]",
 
-    /* call to rust main */
+    // Call the `main` function (defined by the user's program)
     "ldr r0, =main",
     "bx r0",
-    // main shouldn't return, but if it does just SoftReset
+
+    // `main` shouldn't return, but if it does just SoftReset
     "swi #0",
-    mmio_base = const 0x0400_0000,
-    waitcnt_offset = const 0x204,
-    waitcnt_setting = const 0x4317 /*sram8,r0:3.1,r1:4.2,r2:8.2,no_phi,prefetch*/,
-    dma3_offset = const DMA3_OFFSET,
-    dma3_setting = const DMA_32_BIT_MEMCPY.to_u16(),
-    runtime_irq_handler = sym runtime_irq_handler,
-    mgba_log_enable = const MGBA_LOG_ENABLE.as_usize(),
-    mgba_logging_enable_request = const MGBA_LOGGING_ENABLE_REQUEST,
-    options(noreturn)
-  )
+  },
+
+  // Define Our Constants
+  mmio_base = const 0x0400_0000,
+  waitcnt_offset = const WAITCNT_OFFSET,
+  waitcnt_setting = const 0x4317 /*sram8,r0:3.1,r1:4.2,r2:8.2,no_phi,prefetch*/,
+  dma3_offset = const DMA3_OFFSET,
+  dma3_setting = const DMA_32_BIT_MEMCPY.to_u16(),
+  mgba_log_enable = const MGBA_LOG_ENABLE.as_usize(),
+  mgba_logging_enable_request = const MGBA_LOGGING_ENABLE_REQUEST,
 }
 
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.runtime.irq.handler"]
-unsafe extern "C" fn runtime_irq_handler() {
+// This handler DOES NOT allow nested interrupts at this time.
+core::arch::global_asm! {
+  bracer::put_fn_in_section!(".text.gba_rom_header"),
+  ".global __runtime_irq_handler",
   // On Entry: r0 = 0x0400_0000 (mmio_base)
-  core::arch::asm!(
-    /* swap IME off, user can turn it back on if they want */
-    "add r12, r0, #{ime_offset}",
-    "mov r3, #0",
-    "swp r3, r3, [r12]",
-
-    /* Read/Update IE and IF */
-    "ldr r0, [r12, #-8]",
-    "and r0, r0, r0, LSR #16",
-    "strh r0, [r12, #-6]",
-
-    /* Read/Update BIOS_IF */
-    "sub  r2, r12, #(0x208+8)",
-    "ldrh r1, [r2]",
-    "orr  r1, r1, r0",
-    "strh r1, [r2]",
-
-    /* Call the Rust fn pointer (if set), using System mode */
-    "ldr r1, ={RUST_IRQ_HANDLER}",
-    "ldr r1, [r1]",
-    bracer::when!("r1" != "#0" [label_id=9] {
-      bracer::with_spsr_held_in!("r2", {
-        bracer::set_cpu_control!(System, irq_masked: false, fiq_masked: false),
-
-        // Note(Lokathor): We are *SKIPPING* the part where we ensure that the
-        // System stack pointer is aligned to 8 during the call to the rust
-        // function. This is *technically* against the AAPCS ABI, but the GBA's
-        // ARMv4T CPU does not even support any instructions that require an
-        // alignment of 8. By not bothering to align the stack, we save about 5
-        // cycles total. Which is neat, but if this were on the DS (which has an
-        // ARMv5TE CPU) you'd want to ensure the aligned stack.
-
-        bracer::with_pushed_registers!("{{r2, r3, r12, lr}}", {
-          bracer::adr_lr_then_bx_to!(reg="r1", label_id=1)
-        }),
-
-        bracer::set_cpu_control!(Supervisor, irq_masked: true, fiq_masked: false),
-      }),
+  // We're allowed to use the usual C ABI registers.
+  "__runtime_irq_handler:",
+
+  force_a32!{
+    /* A fox wizard told me how to do this one */
+    // handle MMIO interrupt system
+    "mov  r12, 0x04000000",     // load r12 with a 1 cycle value
+    "ldr  r0, [r12, #0x200]!",  // load IE_IF with r12 writeback
+    "and  r0, r0, r0, LSR #16", // bits = IE & IF
+    "strh r0, [r12, #2]",       // write16 to just IF
+    // handle BIOS IntrWait system
+    "ldr  r1, [r12, #-0x208]!", // load BIOS_IF_?? with r12 writeback
+    "orr  r1, r1, r0",          // mark `bits` as `has_occurred`
+    "strh r1, [r12]",           // write16 to just BIOS_IF
+
+    // Get the rust code handler fn pointer, call it if non-null.
+    "ldr r12, ={RUST_IRQ_HANDLER}",
+    "ldr r12, [r12]",
+    bracer::when!(("r12" != "#0")[1] {
+      bracer::a32_read_spsr_to!("r3"),
+      "push {{r3, lr}}",
+      bracer::a32_set_cpu_control!(System, irq_masked = true, fiq_masked = true),
+      bracer::a32_fake_blx!("r12"),
+      bracer::a32_set_cpu_control!(IRQ, irq_masked = true, fiq_masked = true),
+      "pop {{r3, lr}}",
+      bracer::a32_write_spsr_from!("r3"),
     }),
 
-    /* Restore initial IME setting and return */
-    "swp r3, r3, [r12]",
+    // return to the BIOS
     "bx lr",
-    ime_offset = const IME_OFFSET,
-    RUST_IRQ_HANDLER = sym RUST_IRQ_HANDLER,
-    options(noreturn)
-  )
-}
+  },
 
-// For now, the division fns can just keep living here.
-
-/// Returns 0 in `r0`, while placing the `numerator` into `r1`.
-///
-/// This is written in that slightly strange way so that `div` function and
-/// `divmod` functions can share the same code path.
-///
-/// See: [__aeabi_idiv0][aeabi-division-by-zero]
-///
-/// [aeabi-division-by-zero]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#division-by-zero
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-// this should literally never get called for real, so we leave it in ROM
-extern "C" fn __aeabi_idiv0(numerator: i32) -> i32 {
-  unsafe {
-    core::arch::asm!(
-      // this comment stops rustfmt from making this a one-liner
-      "mov r1, r0",
-      "mov r0, #0",
-      "bx  lr",
-      options(noreturn)
-    )
-  }
-}
-
-/// Returns `u32 / u32`
-///
-/// This implementation is *not* the fastest possible division, but it is
-/// extremely compact.
-///
-/// See: [__aeabi_uidiv][aeabi-integer-32-32-division]
-///
-/// [aeabi-integer-32-32-division]:
-///     https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.aeabi.uidiv"]
-extern "C" fn __aeabi_uidiv(numerator: u32, denominator: u32) -> u32 {
-  // Note(Lokathor): Other code in this module relies on being able to call this
-  // function without affecting r12, so any future implementations of this code
-  // **must not** destroy r12.
-  unsafe {
-    core::arch::asm!(
-      // Check for divide by 0
-      "cmp   r1, #0",
-      "beq   {__aeabi_idiv0}",
-      // r3(shifted_denom) = denom
-      "mov   r3, r1",
-      // while shifted_denom < (num>>1): shifted_denom =<< 1;
-      "cmp   r3, r0, lsr #1",
-      "2:",
-      "lslls r3, r3, #1",
-      "cmp   r3, r0, lsr #1",
-      "bls   2b",
-      // r0=quot(init 0), r1=denom, r2=num, r3=shifted_denom
-      "mov   r2, r0",
-      "mov   r0, #0",
-      // subtraction loop
-      "3:",
-      "cmp   r2, r3",
-      "subcs r2, r2, r3",
-      "adc   r0, r0, r0",
-      "mov   r3, r3, lsr #1",
-      "cmp   r3, r1",
-      "bcs   3b",
-      "bx    lr",
-      __aeabi_idiv0 = sym __aeabi_idiv0,
-      options(noreturn)
-    )
-  }
-}
-
-/// Returns `i32 / i32`
-///
-/// See: [__aeabi_idiv][aeabi-integer-32-32-division]
-///
-/// [aeabi-integer-32-32-division]:
-///     https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.aeabi.idiv"]
-extern "C" fn __aeabi_idiv(numerator: i32, denominator: i32) -> u32 {
-  unsafe {
-    core::arch::asm!(
-      // determine if `numerator` and `denominator` are the same sign
-      "eor   r12, r1, r0",
-      // convert both values to their unsigned absolute value.
-      "cmp   r0, #0",
-      "rsblt r0, r0, #0",
-      "cmp   r1, #0",
-      "rsclt r1, r1, #0",
-      bracer::with_pushed_registers!("{{lr}}", {
-        // divide them using `u32` division (this will check for divide by 0)
-        "bl    {__aeabi_uidiv}",
-      }),
-      // if they started as different signs, flip the output's sign.
-      "cmp   r12, #0",
-      "rsblt r0, r0, #0",
-      "bx    lr",
-      __aeabi_uidiv = sym __aeabi_uidiv,
-      options(noreturn)
-    )
-  }
-}
-
-/// Returns `(u32 / u32, u32 % u32)` in `(r0, r1)`.
-///
-/// The `u64` return value is a mild lie that gets Rust to grab up both the `r0`
-/// and `r1` values when the function returns. If you transmute the return value
-/// into `[u32; 2]` then you can separate the two parts of the return value, and
-/// it will have no runtime cost.
-///
-/// See: [__aeabi_uidivmod][aeabi-integer-32-32-division]
-///
-/// [aeabi-integer-32-32-division]:
-///     https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.aeabi.uidivmod"]
-extern "C" fn __aeabi_uidivmod(numerator: u32, denominator: u32) -> u64 {
-  unsafe {
-    core::arch::asm!(
-      // We need to save *both* input args until after the uidiv call. One of
-      // them can be saved in `r12` because we know our uidiv doesn't actually
-      // touch `r12`, while the other will be pushed onto the stack along with
-      // `lr`. Since the function's output will be in `r0`, we push/pop `r1`.
-      "mov   r12, r0",
-      bracer::with_pushed_registers!("{{r1, lr}}", {
-        "bl    {__aeabi_uidiv}",
-      }),
-      // Now r0 holds the `quot`, and we use it along with the input args to
-      // calculate the `rem`.
-      "mul   r2, r0, r1",
-      "sub   r1, r12, r2",
-      "bx    lr",
-      __aeabi_uidiv = sym __aeabi_uidiv,
-      options(noreturn)
-    )
-  }
-}
-
-/// Returns `(i32 / i32, i32 % i32)` in `(r0, r1)`.
-///
-/// The `u64` return value is a mild lie that gets Rust to grab up both the `r0`
-/// and `r1` values when the function returns. If you transmute the return value
-/// into `[i32; 2]` then you can separate the two parts of the return value, and
-/// it will have no runtime cost.
-///
-/// See: [__aeabi_idivmod][aeabi-integer-32-32-division]
-///
-/// [aeabi-integer-32-32-division]:
-///     https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#integer-32-32-32-division-functions
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.aeabi.idivmod"]
-extern "C" fn __aeabi_idivmod(numerator: i32, denominator: i32) -> u64 {
-  unsafe {
-    core::arch::asm!(
-      bracer::with_pushed_registers!("{{r4, r5, lr}}", {
-        // store old numerator then make it the unsigned absolute
-        "movs  r4, r0",
-        "rsblt r0, r0, #0",
-        // store old denominator then make it the unsigned absolute
-        "movs  r5, r1",
-        "rsblt r1, r1, #0",
-        // divmod using unsigned.
-        "bl    {__aeabi_uidivmod}",
-        // if signs started opposite, quot becomes negative
-        "eors  r12, r4, r5",
-        "rsblt r0, r0, #0",
-        // if numerator started negative, rem is negative
-        "cmp   r4, #0",
-        "rsblt r1, r1, #0",
-      }),
-      "bx    lr",
-      __aeabi_uidivmod = sym __aeabi_uidivmod,
-      options(noreturn)
-    )
-  }
+  // Define Our Constants
+  RUST_IRQ_HANDLER = sym crate::RUST_IRQ_HANDLER,
 }
diff --git a/src/lib.rs b/src/lib.rs
index 7388f286..06d77795 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,9 +1,8 @@
 #![no_std]
-#![feature(naked_functions)]
-#![warn(clippy::missing_inline_in_public_items)]
+#![allow(unused_imports)]
 #![allow(clippy::let_and_return)]
 #![allow(clippy::result_unit_err)]
-#![allow(unused_imports)]
+#![warn(clippy::missing_inline_in_public_items)]
 
 //! A crate for GBA development.
 //!
@@ -88,10 +87,12 @@
 //! break any of these assumptions, if you do that some or all of the code
 //! provided by this crate may become unsound.
 
+use prelude::{GbaCell, IrqFn};
+
 mod macros;
 
 #[cfg(feature = "on_gba")]
-pub mod asm_runtime;
+mod asm_runtime;
 #[cfg(feature = "on_gba")]
 pub mod bios;
 pub mod builtin_art;
@@ -104,8 +105,7 @@ pub mod fixed;
 pub mod gba_cell;
 pub mod interrupts;
 pub mod keys;
-#[cfg(feature = "on_gba")]
-pub mod mem_fns;
+pub mod mem;
 #[cfg(feature = "on_gba")]
 pub mod mgba;
 #[cfg(feature = "on_gba")]
@@ -116,6 +116,10 @@ pub mod sound;
 pub mod timers;
 pub mod video;
 
+/// The function pointer that the assembly runtime calls when an interrupt
+/// occurs.
+pub static RUST_IRQ_HANDLER: GbaCell<Option<IrqFn>> = GbaCell::new(None);
+
 /// Wraps a value to be aligned to a minimum of 4.
 ///
 /// If the size of the value held is already a multiple of 4 then this will be
diff --git a/src/macros.rs b/src/macros.rs
index fee6a660..46eab24d 100644
--- a/src/macros.rs
+++ b/src/macros.rs
@@ -1,6 +1,18 @@
 #![allow(unused_macros)]
 #![allow(unused_imports)]
 
+macro_rules! on_gba_or_unimplemented {
+  ($($token_tree:tt)*) => {
+    #[cfg(feature="on_gba")]
+    {
+      $($token_tree)*
+    }
+    #[cfg(not(feature="on_gba"))]
+    unimplemented!("Called code required to be on the GBA without `on_gba` enabled.")
+  }
+}
+pub(crate) use on_gba_or_unimplemented;
+
 macro_rules! pub_const_fn_new_zeroed {
   () => {
     #[inline]
diff --git a/src/mem.rs b/src/mem.rs
new file mode 100644
index 00000000..9c5dbb75
--- /dev/null
+++ b/src/mem.rs
@@ -0,0 +1,138 @@
+use crate::macros::on_gba_or_unimplemented;
+
+/// Copies `u8` at a time between exclusive regions.
+///
+/// * This will *always* copy one byte at a time, and the code is always stored
+///   in IWRAM, making it suitable for use with SRAM memory.
+///
+/// ## Safety
+/// * As with all copying routines, the source must be readable for the size you
+///   specify, and the destination must be writable for the size you specify.
+/// * The regions must not overlap.
+#[cfg_attr(feature = "on_gba", instruction_set(arm::a32))]
+#[cfg_attr(feature = "on_gba", link_section = ".iwram.copy_u8_unchecked")]
+pub unsafe extern "C" fn copy_u8_unchecked(
+  dest: *mut u8, src: *const u8, byte_count: usize,
+) {
+  on_gba_or_unimplemented!(unsafe {
+    // Note(Lokathor): This loop setup assumes that the `byte_count` is usually
+    // greater than 0, and so subtracts first and then does a conditional
+    // load/store pair if the value (after subtracting) is greater than or equal
+    // to 0 (meaning that the value before the subtract *was* 1 or more).
+    core::arch::asm! {
+      "1:",
+      "subs    {count}, {count}, #1",
+      "ldrbge  {temp}, [{src}], #1",
+      "strbge  {temp}, [{dest}], #1",
+      "bgt     1b",
+      temp = out(reg) _,
+      count = inout(reg) byte_count => _,
+      src = inout(reg) src => _,
+      dest = inout(reg) dest => _,
+      options(nostack)
+    }
+  });
+}
+
+/// Copies `[u32; 8]` sized chunks, to `dest` from `src`
+///
+/// This will, in general, be slightly faster than a generic `memcpy`, but
+/// slightly slower than using DMA.
+///
+/// Particularly, this helps with:
+/// * [`Tile4`][crate::video::Tile4] (one loop per tile).
+/// * [`Tile8`][crate::video::Tile8] (two loops per tile).
+/// * A palbank of [`Color`][crate::video::Color] values (one loop per palbank).
+/// * A text mode screenblock (64 loops per screenblock).
+/// * A Mode 3 bitmap (2400 loops).
+/// * A Mode 4 bitmap (1200 loops).
+///
+/// ## Safety
+/// * As with all copying routines, the source must be readable for the size you
+///   specify, and the destination must be writable for the size you specify.
+/// * Both pointers must be aligned to 4.
+/// * The regions must not overlap.
+#[cfg_attr(feature = "on_gba", instruction_set(arm::a32))]
+#[cfg_attr(feature = "on_gba", link_section = ".iwram.copy_u32x8_unchecked")]
+pub unsafe fn copy_u32x8_unchecked(
+  dest: *mut [u32; 8], src: *const [u32; 8], count: usize,
+) {
+  on_gba_or_unimplemented!(unsafe {
+    // Note(Lokathor): Same loop logic as `copy_u8_unchecked`, we're just
+    // processing bigger chunks of data at a time.
+    core::arch::asm!(
+      "1:",
+      "subs  {count}, {count}, #1",
+      "ldmge {src}!, {{r3,r4,r5,r7, r8,r9,r12,lr}}",
+      "stmge {dest}!, {{r3,r4,r5,r7, r8,r9,r12,lr}}",
+      "bgt   1b",
+
+      // Note(Lokathor): LLVM will always put `lr` on the stack as part of the
+      // push/pop for the function, even if we don't use `lr`, so we might as
+      // well use `lr`, because if we use a different register (such as `r10`)
+      // that would only add to the amount of push/pop LLVM does.
+      count = inout(reg) count => _,
+      dest = inout(reg) dest => _,
+      src = inout(reg) src => _,
+      out("r3") _,
+      out("r4") _,
+      out("r5") _,
+      out("r7") _,
+      out("r8") _,
+      out("r9") _,
+      out("r12") _,
+      out("lr") _,
+      options(nostack)
+    )
+  });
+}
+
+/// Sets `word` in blocks of 80 per loop.
+///
+/// This is intended for clearing VRAM to a particular color when using
+/// background modes 3, 4, and 5.
+/// * To clear the Mode 3 bitmap, pass `240` as the count.
+/// * To clear a Mode 4 frame pass `120`.
+/// * To clear a Mode 5 frame pass `128`.
+#[cfg_attr(feature = "on_gba", instruction_set(arm::a32))]
+#[cfg_attr(feature = "on_gba", link_section = ".iwram.set_u32x80_unchecked")]
+pub unsafe extern "C" fn set_u32x80_unchecked(
+  dest: *mut [u32; 80], word: u32, count: usize,
+) {
+  on_gba_or_unimplemented!(unsafe {
+    core::arch::asm!(
+      // Note(Lokathor): Same loop logic as `copy_u8_unchecked`, we're just
+      // processing bigger chunks of data at a time, and also setting rather
+      // than copying.
+      "1:",
+      "subs {count}, {count}, #1",
+      "stmge  {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}",
+      "stmge  {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}",
+      "stmge  {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}",
+      "stmge  {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}",
+      "stmge  {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}",
+      "stmge  {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}",
+      "stmge  {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}",
+      "stmge  {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}",
+      "stmge  {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}",
+      "stmge  {dest}!, {{r1,r3,r4,r5, r7,r8,r12,lr}}",
+      "bgt   1b",
+
+      // The assembler will give us a warning (that we can't easily disable)
+      // if the reg_list for `stm` doesn't give the registers in order from
+      // low to high, so we just manually pick registers. The count register
+      // and the pointer register can be anything else.
+      in("r1") word,
+      in("r3") word,
+      in("r4") word,
+      in("r5") word,
+      in("r7") word,
+      in("r8") word,
+      in("r12") word,
+      in("lr") word,
+      dest = inout(reg) dest => _,
+      count = inout(reg) count => _,
+      options(nostack),
+    )
+  });
+}
diff --git a/src/mem_fns.rs b/src/mem_fns.rs
deleted file mode 100644
index bd925ad1..00000000
--- a/src/mem_fns.rs
+++ /dev/null
@@ -1,688 +0,0 @@
-//! Module for direct memory operations.
-//!
-//! Generally you don't need to call these yourself. Instead, the compiler will
-//! insert calls to the functions defined here as necessary.
-
-use core::ffi::c_void;
-
-/// Byte copy between exclusive regions.
-///
-/// * This will *always* copy one byte at a time, making it suitable for use
-///   with SRAM memory.
-///
-/// ## Safety
-/// * If `byte_count` is zero then the pointers are not used and they can be any value.
-/// * If `byte_count` is non-zero then:
-///   * Both pointers must be valid for the number of bytes given.
-///   * The two regions must either be *entirely* disjoint or *entirely* overlapping.
-///     Partial overlap is not allowed.
-#[inline]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memcpy1"]
-pub unsafe extern "C" fn __aeabi_memcpy1(
-  dest: *mut u8, src: *const u8, byte_count: usize,
-) {
-  core::arch::asm! {
-    "1:",
-    "subs    {count}, {count}, #1",
-    "ldrbge  {temp}, [{src}], #1",
-    "strbge  {temp}, [{dest}], #1",
-    "bgt     1b",
-    temp = out(reg) _,
-    count = inout(reg) byte_count => _,
-    src = inout(reg) src => _,
-    dest = inout(reg) dest => _,
-    options(nostack)
-  }
-}
-
-/// Halfword copy between exclusive regions.
-///
-/// * If the `byte_count` is odd then a single byte copy will happen at the end.
-///
-/// ## Safety
-/// * If `byte_count` is zero then the pointers are not used and they can be any value.
-/// * If `byte_count` is non-zero then:
-///   * Both pointers must be valid for the span used and aligned to 2.
-///   * The two regions must either be *entirely* disjoint or *entirely* overlapping.
-///     Partial overlap is not allowed.
-#[inline]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memcpy2"]
-pub unsafe extern "C" fn __aeabi_memcpy2(
-  mut dest: *mut u16, mut src: *const u16, mut byte_count: usize,
-) {
-  core::arch::asm! {
-    "1:",
-    "subs    {count}, {count}, #2",
-    "ldrhge  {temp}, [{src}], #2",
-    "strhge  {temp}, [{dest}], #2",
-    "bgt     1b",
-    temp = out(reg) _,
-    count = inout(reg) byte_count,
-    src = inout(reg) src,
-    dest = inout(reg) dest,
-    options(nostack)
-  }
-  if byte_count != 0 {
-    let dest = dest.cast::<u8>();
-    let src = src.cast::<u8>();
-    dest.write_volatile(src.read_volatile());
-  }
-}
-
-/// Word copy between exclusive regions.
-///
-/// * If `byte_count` is not a multiple of 4 then a halfword and/or byte copy
-///   will happen at the end.
-///
-/// ## Safety
-/// * If `byte_count` is zero then the pointers are not used and they can be any value.
-/// * If `byte_count` is non-zero then:
-///   * Both pointers must be valid for the span used and aligned to 4.
-///   * The two regions must either be *entirely* disjoint or *entirely* overlapping.
-///     Partial overlap is not allowed.
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memcpy4"]
-pub unsafe extern "C" fn __aeabi_memcpy4(
-  dest: *mut u32, src: *const u32, byte_count: usize,
-) {
-  core::arch::asm! {
-    bracer::when!( "r2" >=u "#32" [label_id=2] {
-      bracer::with_pushed_registers!("{{r4-r9}}", {
-        "1:",
-        "subs   r2, r2, #32",
-        "ldmge  r1!, {{r3-r9, r12}}",
-        "stmge  r0!, {{r3-r9, r12}}",
-        "bgt    1b",
-      }),
-      "bxeq   lr",
-    }),
-
-    // copy 4 words, two at a time
-    "tst    r2, #0b10000",
-    "ldmne  r1!, {{r3, r12}}",
-    "stmne  r0!, {{r3, r12}}",
-    "ldmne  r1!, {{r3, r12}}",
-    "stmne  r0!, {{r3, r12}}",
-    "bics   r2, r2, #0b10000",
-    "bxeq   lr",
-
-    // copy 2 and/or 1 words
-    "lsls   r3, r2, #29",
-    "ldmcs  r1!, {{r3, r12}}",
-    "stmcs  r0!, {{r3, r12}}",
-    "ldrmi  r3, [r1], #4",
-    "strmi  r3, [r0], #4",
-    "bics   r2, r2, #0b1100",
-    "bxeq   lr",
-
-    // copy halfword and/or byte
-    "lsls   r3, r2, #31",
-    "ldrhcs r3, [r1], #2",
-    "strhcs r3, [r0], #2",
-    "ldrbmi r3, [r1], #1",
-    "strbmi r3, [r0], #1",
-    "bx     lr",
-    options(noreturn),
-  }
-}
-
-/// Just call [`__aeabi_memcpy4`] instead.
-///
-/// This function is provided only for API completeness, because in some cases
-/// the compiler might automatically generate a call to this function.
-#[inline]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memcpy8"]
-pub unsafe extern "C" fn __aeabi_memcpy8(
-  dest: *mut u32, src: *const u32, byte_count: usize,
-) {
-  __aeabi_memcpy4(dest, src, byte_count);
-}
-
-/// Arbitrary-width copy between exclusive regions.
-///
-/// ## Safety
-/// * If `byte_count` is zero then the pointers are not used and they can be any value.
-/// * If `byte_count` is non-zero then:
-///   * Both pointers must be valid for the span used (no required alignment).
-///   * The two regions must either be *entirely* disjoint or *entirely* overlapping.
-///     Partial overlap is not allowed.
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memcpy"]
-pub unsafe extern "C" fn __aeabi_memcpy(
-  dest: *mut u8, src: *const u8, byte_count: usize,
-) {
-  core::arch::asm! {
-    "cmp    r2, #7", // if count <= (fix+word): just byte copy
-    "ble    {__aeabi_memcpy1}",
-
-    // check max coalign
-    "eor    r3, r0, r1",
-    "lsls   r3, r3, #31",
-    "bmi    {__aeabi_memcpy1}",
-    "bcs    2f",
-
-    // max coalign4, possible fixup and jump
-    "lsls   r3, r0, #31",
-    "submi  r2, r2, #1",
-    "ldrbmi r3, [r1], #1",
-    "strbmi r3, [r0], #1",
-    "subcs  r2, r2, #2",
-    "ldrhcs r3, [r1], #2",
-    "strhcs r3, [r0], #2",
-    "b      {__aeabi_memcpy4}",
-
-    // max coalign2, possible fixup and jump
-    "2:",
-    "lsls   r3, r0, #31",
-    "submi  r2, r2, #1",
-    "ldrbmi r3, [r1], #1",
-    "strbmi r3, [r0], #1",
-    "b      {__aeabi_memcpy2}",
-
-    //
-    __aeabi_memcpy4 = sym __aeabi_memcpy4,
-    __aeabi_memcpy2 = sym __aeabi_memcpy2,
-    __aeabi_memcpy1 = sym __aeabi_memcpy1,
-    options(noreturn)
-  }
-}
-
-/// Copy between exclusive regions, prefer [`__aeabi_memcpy`] if possible.
-///
-/// This is the libc version of a memory copy. It's required to return the
-/// `dest` pointer at the end of the call, which makes it need an extra
-/// push/pop compared to a direct call to `__aeabi_memcpy`.
-///
-/// * **Returns:** The `dest` pointer.
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.memcpy"]
-pub unsafe extern "C" fn memcpy(
-  dest: *mut u8, src: *const u8, byte_count: usize,
-) -> *mut u8 {
-  // I've seen a standard call to `__aeabi_memcpy` give weird codegen,
-  // so we (currently) do the call manually.
-  core::arch::asm! {
-    bracer::with_pushed_registers!("{{r0, lr}}", {
-      "bl {__aeabi_memcpy}",
-    }),
-    "bx lr",
-    __aeabi_memcpy = sym __aeabi_memcpy,
-    options(noreturn)
-  }
-}
-
-// MOVE
-
-// used by `__aeabi_memmove` in some cases
-#[inline]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.reverse_copy_u8"]
-unsafe extern "C" fn reverse_copy_u8(
-  dest: *mut u8, src: *const u8, byte_count: usize,
-) {
-  core::arch::asm! {
-    "1:",
-    "subs    {count}, {count}, #1",
-    "ldrbge  {temp}, [{src}, #-1]!",
-    "strbge  {temp}, [{dest}, #-1]!",
-    "bgt     1b",
-    temp = out(reg) _,
-    count = inout(reg) byte_count => _,
-    src = inout(reg) src => _,
-    dest = inout(reg) dest => _,
-    options(nostack)
-  }
-}
-
-// used by `__aeabi_memmove` in some cases
-#[inline]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.reverse_copy_u16"]
-unsafe extern "C" fn reverse_copy_u16(
-  mut dest: *mut u16, mut src: *const u16, mut byte_count: usize,
-) {
-  core::arch::asm! {
-    "1:",
-    "subs    {count}, {count}, #2",
-    "ldrhge  {temp}, [{src}, #-2]!",
-    "strhge  {temp}, [{dest}, #-2]!",
-    "bgt     1b",
-    temp = out(reg) _,
-    count = inout(reg) byte_count,
-    src = inout(reg) src,
-    dest = inout(reg) dest,
-    options(nostack)
-  }
-  if byte_count != 0 {
-    let dest = dest.cast::<u8>().sub(1);
-    let src = src.cast::<u8>().sub(1);
-    dest.write_volatile(src.read_volatile());
-  }
-}
-
-// used by `__aeabi_memmove` in some cases
-#[naked]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.reverse_copy_u32"]
-unsafe extern "C" fn reverse_copy_u32(
-  dest: *mut u32, src: *const u32, byte_count: usize,
-) {
-  core::arch::asm! {
-    bracer::when!( "r2" >=u "#32" [label_id=2] {
-      bracer::with_pushed_registers!("{{r4-r9}}", {
-        "1:",
-        "subs    r2, r2, #32",
-        "ldmdbcs r1!, {{r3-r9, r12}}",
-        "stmdbcs r0!, {{r3-r9, r12}}",
-        "bgt     1b",
-      }),
-      "bxeq   lr",
-    }),
-
-    // copy 4 words, two at a time
-    "tst     r2, #0b10000",
-    "ldmdbne r1!, {{r3, r12}}",
-    "stmdbne r0!, {{r3, r12}}",
-    "ldmdbne r1!, {{r3, r12}}",
-    "stmdbne r0!, {{r3, r12}}",
-    "bics    r2, r2, #0b10000",
-    "bxeq    lr",
-
-    // copy 2 and/or 1 words
-    "lsls    r3, r2, #29",
-    "ldmdbcs r1!, {{r3, r12}}",
-    "stmdbcs r0!, {{r3, r12}}",
-    "ldrmi   r3, [r1, #-4]!",
-    "strmi   r3, [r0, #-4]!",
-    "bxeq    lr",
-
-    // copy halfword and/or byte
-    "lsls    r2, r2, #31",
-    "ldrhcs  r3, [r1, #-2]!",
-    "strhcs  r3, [r0, #-2]!",
-    "ldrbmi  r3, [r1, #-1]!",
-    "strbmi  r3, [r0, #-1]!",
-    "bx      lr",
-    options(noreturn),
-  }
-}
-
-/// Copy between non-exclusive regions, prefer [`__aeabi_memmove`] if possible.
-///
-/// This function is provided only for API completeness, because in some cases
-/// the compiler might automatically generate a call to this function.
-#[inline]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memmove4"]
-pub unsafe extern "C" fn __aeabi_memmove4(
-  dest: *mut u32, src: *const u32, byte_count: usize,
-) {
-  __aeabi_memmove(dest.cast(), src.cast(), byte_count)
-}
-
-/// Copy between non-exclusive regions, prefer [`__aeabi_memmove`] if possible.
-///
-/// This function is provided only for API completeness, because in some cases
-/// the compiler might automatically generate a call to this function.
-#[inline]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memmove8"]
-pub unsafe extern "C" fn __aeabi_memmove8(
-  dest: *mut u32, src: *const u32, byte_count: usize,
-) {
-  __aeabi_memmove(dest.cast(), src.cast(), byte_count)
-}
-
-/// Copy between non-exclusive regions.
-///
-/// * The pointers do not have a minimum alignment. The function will
-///   automatically detect the best type of copy to perform.
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memmove"]
-pub unsafe extern "C" fn __aeabi_memmove(
-  dest: *mut u8, src: *const u8, byte_count: usize,
-) {
-  core::arch::asm! {
-    // when d > s we need to copy back-to-front
-    bracer::when!("r0" >=u "r1" [label_id=1] {
-      "add     r0, r0, r2",
-      "add     r1, r1, r2",
-      "eor     r3, r0, r1",
-      "lsls    r3, r3, #31",
-      "bmi     {reverse_copy_u8}",
-      "bcs     2f",
-
-      // max coalign4, possible fixup and jump
-      "lsls    r3, r0, #31",
-      "submi   r2, r2, #1",
-      "ldrbmi  r3, [r1, #-1]!",
-      "strbmi  r3, [r0, #-1]!",
-      "subcs   r2, r2, #2",
-      "ldrhcs  r3, [r1, #-2]!",
-      "strhcs  r3, [r0, #-2]!",
-      "b       {reverse_copy_u32}",
-
-      // max coalign2, possible fixup and jump
-      "2:",
-      "tst     r0, #1",
-      "sub     r2, r2, #1",
-      "ldrb    r3, [r1, #-1]!",
-      "strb    r3, [r0, #-1]!",
-      "b       {reverse_copy_u16}",
-    }),
-    // forward copy is a normal memcpy
-    "b      {__aeabi_memcpy}",
-    __aeabi_memcpy = sym __aeabi_memcpy,
-    reverse_copy_u8 = sym reverse_copy_u8,
-    reverse_copy_u16 = sym reverse_copy_u16,
-    reverse_copy_u32 = sym reverse_copy_u32,
-    options(noreturn),
-  }
-}
-
-/// Copy between non-exclusive regions, prefer [`__aeabi_memmove`] if possible.
-///
-/// This is the libc version of a memory move. It's required to return the
-/// `dest` pointer at the end of the call, which makes it need an extra
-/// push/pop compared to a direct call to `__aeabi_memmove`.
-///
-/// * **Returns:** The `dest` pointer.
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.memmove"]
-pub unsafe extern "C" fn memmove(
-  dest: *mut u8, src: *const u8, byte_count: usize,
-) -> *mut u8 {
-  core::arch::asm! {
-    bracer::with_pushed_registers!("{{r0, lr}}", {
-      "bl {__aeabi_memmove}",
-    }),
-    "bx lr",
-    __aeabi_memmove = sym __aeabi_memmove,
-    options(noreturn)
-  }
-}
-
-// SET
-
-/// Copy between non-exclusive regions, prefer [`__aeabi_memset`] if possible.
-///
-/// This function is provided only for API completeness, because in some cases
-/// the compiler might automatically generate a call to this function.
-#[inline]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memset4"]
-pub unsafe extern "C" fn __aeabi_memset4(
-  dest: *mut u32, byte_count: usize, byte: i32,
-) {
-  __aeabi_memset(dest.cast(), byte_count, byte)
-}
-
-/// Copy between non-exclusive regions, prefer [`__aeabi_memset`] if possible.
-///
-/// This function is provided only for API completeness, because in some cases
-/// the compiler might automatically generate a call to this function.
-#[inline]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memset8"]
-pub unsafe extern "C" fn __aeabi_memset8(
-  dest: *mut u32, byte_count: usize, byte: i32,
-) {
-  __aeabi_memset(dest.cast(), byte_count, byte)
-}
-
-/// Sets all bytes in the region to the `byte` given.
-///
-/// Because of historical reasons, the byte is passed in as an `i32`, but only
-/// the lowest 8 bits are used.
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memset"]
-pub unsafe extern "C" fn __aeabi_memset(
-  dest: *mut u8, byte_count: usize, byte: i32,
-) {
-  core::arch::asm! {
-    bracer::when!("r1" >=u "#8" [label_id=7] {
-      // duplicate the byte across all of r2 and r3
-      "and    r2, r2, #0xFF",
-      "orr    r2, r2, r2, lsl #8",
-      "orr    r2, r2, r2, lsl #16",
-      "mov    r3, r2",
-
-      // align the pointer for word ops
-      "tst    r0, #0b1",
-      "subne  r1, r1, #1",
-      "strbne r2, [r0], #1",
-      "tst    r0, #0b10",
-      "subne  r1, r1, #2",
-      "strhne r2, [r0], #2",
-
-      bracer::when!("r1" >=u "#32" [label_id=8] {
-        bracer::with_pushed_registers!("{{r4-r9}}", {
-          "mov    r4, r2",
-          "mov    r5, r2",
-          "mov    r6, r2",
-          "mov    r7, r2",
-          "mov    r8, r2",
-          "mov    r9, r2",
-          "1:",
-          "subs   r1, r1, #32",
-          "stmge  r0!, {{r2-r9}}",
-          "bgt    1b",
-        }),
-        "bxeq   lr",
-      }),
-
-      // set 4 words
-      "tst    r1, #0b10000",
-      "stmne  r0!, {{r2, r3}}",
-      "stmne  r0!, {{r2, r3}}",
-
-      // set 2 and/or 1 words
-      "lsls   r12, r1, #29",
-      "stmcs  r0!, {{r2, r3}}",
-      "strmi  r2, [r0], #4",
-
-      // set halfword and/or byte
-      "lsls   r12, r1, #31",
-      "strhcs r2, [r0], #2",
-      "strbmi r2, [r0], #1",
-      "bx     lr",
-    }),
-    // byte loop
-    "9:",
-    "subs   r1, r1, #1",
-    "strbcs r2, [r0], #1",
-    "bgt    9b",
-    "bx     lr",
-    options(noreturn)
-  }
-}
-
-/// Write a value to all bytes in the region, prefer [`__aeabi_memset`] if
-/// possible.
-///
-/// This is the libc version of a memory set. It's required to return the `dest`
-/// pointer at the end of the call, which makes it need an extra push/pop
-/// compared to a direct call to `__aeabi_memset`. Also, the argument ordering
-/// is swapped, so shuffling registers costs a few cycles.
-///
-/// * **Returns:** The `dest` pointer.
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.memset"]
-pub unsafe extern "C" fn memset(
-  dest: *mut u8, byte: i32, byte_count: usize,
-) -> *mut u8 {
-  core::arch::asm! {
-    bracer::with_pushed_registers!("{{r0, lr}}", {
-      "mov r3, r2",
-      "mov r2, r1",
-      "mov r1, r3",
-      "bl {__aeabi_memset}",
-    }),
-    "bx lr",
-    __aeabi_memset = sym __aeabi_memset,
-    options(noreturn)
-  }
-}
-
-// CLEAR
-
-/// Just call [`__aeabi_memset`] with 0 as the `byte` instead.
-///
-/// This function is provided only for API completeness, because in some cases
-/// the compiler might automatically generate a call to this function.
-#[inline]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memclr4"]
-pub unsafe extern "C" fn __aeabi_memclr4(dest: *mut u32, byte_count: usize) {
-  __aeabi_memset(dest.cast(), byte_count, 0)
-}
-
-/// Just call [`__aeabi_memset`] with 0 as the `byte` instead.
-///
-/// This function is provided only for API completeness, because in some cases
-/// the compiler might automatically generate a call to this function.
-#[inline]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memclr8"]
-pub unsafe extern "C" fn __aeabi_memclr8(dest: *mut u32, byte_count: usize) {
-  __aeabi_memset(dest.cast(), byte_count, 0)
-}
-
-/// Just call [`__aeabi_memset`] with 0 as the `byte` instead.
-///
-/// This function is provided only for API completeness, because in some cases
-/// the compiler might automatically generate a call to this function.
-#[inline]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.__aeabi_memclr"]
-pub unsafe extern "C" fn __aeabi_memclr(dest: *mut u8, byte_count: usize) {
-  __aeabi_memset(dest, byte_count, 0)
-}
-
-/// Reads 4 bytes, starting at the address given.
-///
-/// See [__aeabi_uread4]
-///
-/// [__aeabi_uread4]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.aeabi.uread4"]
-unsafe extern "C" fn __aeabi_uread4(address: *const c_void) -> u32 {
-  core::arch::asm!(
-    "ldrb r2, [r0]",
-    "ldrb r3, [r0, #1]",
-    "orr  r2, r2, r3, lsl #8",
-    "ldrb r3, [r0, #2]",
-    "orr  r2, r2, r3, lsl #16",
-    "ldrb r3, [r0, #3]",
-    "orr  r2, r2, r3, lsl #24",
-    "mov  r0, r2",
-    "bx   lr",
-    options(noreturn),
-  )
-}
-
-/// Writes 4 bytes, starting at the address given.
-///
-/// See [__aeabi_uwrite4]
-///
-/// [__aeabi_uwrite4]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.aeabi.uwrite4"]
-unsafe extern "C" fn __aeabi_uwrite4(value: u32, address: *mut c_void) {
-  core::arch::asm!(
-    "strb r0, [r1]",
-    "lsr  r2, r0, #8",
-    "strb r2, [r1, #1]",
-    "lsr  r2, r2, #8",
-    "strb r2, [r1, #2]",
-    "lsr  r2, r2, #8",
-    "strb r2, [r1, #3]",
-    "bx   lr",
-    options(noreturn),
-  )
-}
-
-/// Reads 8 bytes, starting at the address given.
-///
-/// See [__aeabi_uread8]
-///
-/// [__aeabi_uread8]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.aeabi.uread8"]
-unsafe extern "C" fn __aeabi_uread8(address: *const c_void) -> u64 {
-  core::arch::asm!(
-    "ldrb r1, [r0, #4]",
-    "ldrb r2, [r0, #5]",
-    "orr  r1, r1, r2, lsl #8",
-    "ldrb r2, [r0, #6]",
-    "orr  r1, r1, r2, lsl #16",
-    "ldrb r2, [r0, #7]",
-    "orr  r1, r1, r2, lsl #24",
-    "b    {__aeabi_uread4}",
-    __aeabi_uread4 = sym __aeabi_uread4,
-    options(noreturn),
-  )
-}
-
-/// Writes 8 bytes, starting at the address given.
-///
-/// See [__aeabi_uwrite8]
-///
-/// [__aeabi_uwrite8]: https://github.com/ARM-software/abi-aa/blob/main/rtabi32/rtabi32.rst#unaligned-memory-access
-#[naked]
-#[no_mangle]
-#[instruction_set(arm::a32)]
-#[link_section = ".iwram.aeabi.uwrite8"]
-unsafe extern "C" fn __aeabi_uwrite8(value: u64, address: *mut c_void) {
-  core::arch::asm!(
-    "strb r0, [r2]",
-    "lsr  r3, r0, #8",
-    "strb r3, [r2, #1]",
-    "lsr  r3, r3, #8",
-    "strb r3, [r2, #2]",
-    "lsr  r3, r3, #8",
-    "strb r3, [r2, #3]",
-    "strb r1, [r2, #4]",
-    "lsr  r3, r1, #8",
-    "strb r3, [r2, #5]",
-    "lsr  r3, r3, #8",
-    "strb r3, [r2, #6]",
-    "lsr  r3, r3, #8",
-    "strb r3, [r2, #7]",
-    "bx   lr",
-    options(noreturn),
-  )
-}
diff --git a/src/prelude.rs b/src/prelude.rs
index 45b30dc5..a2d9a6a8 100644
--- a/src/prelude.rs
+++ b/src/prelude.rs
@@ -3,6 +3,7 @@
 #[cfg(feature = "on_gba")]
 pub use crate::{
   asm_runtime::*, bios::*, dma::*, gba_cell::*, mgba::*, mmio::*,
+  RUST_IRQ_HANDLER,
 };
 
 pub use crate::{
diff --git a/src/video/mod.rs b/src/video/mod.rs
index 866b22a1..b876afb1 100644
--- a/src/video/mod.rs
+++ b/src/video/mod.rs
@@ -97,11 +97,16 @@
 //! sort your object entries so that any lower priority objects are also the
 //! lower index objects.
 
-use crate::macros::{
-  pub_const_fn_new_zeroed, u16_bool_field, u16_enum_field, u16_int_field,
-};
+use bytemuck::{Pod, TransparentWrapper, Zeroable};
+
 #[allow(unused_imports)]
 use crate::prelude::*;
+use crate::{
+  macros::{
+    pub_const_fn_new_zeroed, u16_bool_field, u16_enum_field, u16_int_field,
+  },
+  mem::{copy_u32x8_unchecked, set_u32x80_unchecked},
+};
 
 pub mod obj;
 
@@ -134,6 +139,10 @@ impl Color {
   }
 }
 
+unsafe impl Zeroable for Color {}
+unsafe impl Pod for Color {}
+unsafe impl TransparentWrapper<u16> for Color {}
+
 /// The video mode controls how each background layer will operate.
 #[derive(Debug, Clone, Copy, Default, PartialEq, Eq, PartialOrd, Ord, Hash)]
 #[repr(u16)]
@@ -315,3 +324,51 @@ impl TextEntry {
     Self(id & 0b11_1111_1111)
   }
 }
+
+#[inline]
+pub fn video3_clear_to(c: Color) {
+  let u = u32::from(c.0) << 16 | u32::from(c.0);
+  unsafe {
+    let p = VIDEO3_VRAM.as_usize() as *mut _;
+    set_u32x80_unchecked(p, u, 240_usize);
+  }
+}
+
+#[repr(C, align(4))]
+pub struct Video3Bitmap(pub [Color; 240 * 160]);
+impl Video3Bitmap {
+  /// Wraps an array of raw color bit data as a Video Mode 3 bitmap.
+  ///
+  /// This is intended for generating static values at compile time. You should
+  /// not attempt to call this function at runtime, because the argument to the
+  /// function is larger than the GBA's stack space.
+  #[inline]
+  #[must_use]
+  pub const fn new_from_u16(bits: [u16; 240 * 160]) -> Self {
+    Self(unsafe { core::mem::transmute(bits) })
+  }
+}
+
+#[inline]
+pub fn video3_set_bitmap(bitmap: &Video3Bitmap) {
+  let p = VIDEO3_VRAM.as_usize() as *mut _;
+  unsafe {
+    copy_u32x8_unchecked(p, bitmap as *const _ as *const _, 2400_usize)
+  };
+}
+
+#[repr(C, align(4))]
+pub struct Video4Indexmap(pub [u8; 240 * 160]);
+
+/// Sets the indexmap of the frame requested.
+///
+/// ## Panics
+/// Only frames 0 and 1 exist, if `frame` is 2 or more this will panic.
+#[inline]
+pub fn video4_set_indexmap(indexes: &Video4Indexmap, frame: usize) {
+  let p = VIDEO4_VRAM.get_frame(usize::from(frame)).unwrap().as_usize()
+    as *mut [u32; 8];
+  unsafe {
+    copy_u32x8_unchecked(p, indexes as *const _ as *const _, 1200_usize)
+  };
+}