diff --git a/Cargo.lock b/Cargo.lock index 5c4f057..ba8af6f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -246,6 +246,27 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6184e33543162437515c2e2b48714794e37845ec9851711914eec9d308f6ebe8" +[[package]] +name = "dirs-next" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1" +dependencies = [ + "cfg-if", + "dirs-sys-next", +] + +[[package]] +name = "dirs-sys-next" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d" +dependencies = [ + "libc", + "redox_users", + "winapi", +] + [[package]] name = "doc-comment" version = "0.3.3" @@ -322,6 +343,17 @@ dependencies = [ "byteorder", ] +[[package]] +name = "getrandom" +version = "0.2.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "gimli" version = "0.31.0" @@ -405,6 +437,7 @@ dependencies = [ "clap", "colored", "console", + "dirs-next", "fxhash", "hotwatch", "lazy_static", @@ -664,6 +697,17 @@ dependencies = [ "bitflags 2.6.0", ] +[[package]] +name = "redox_users" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" +dependencies = [ + "getrandom", + "libredox", + "thiserror", +] + [[package]] name = "regex" version = "1.10.6" @@ -906,6 +950,22 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + [[package]] name = "winapi-util" version = "0.1.9" @@ -915,6 +975,12 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + [[package]] name = "windows-sys" version = "0.48.0" diff --git a/Cargo.toml b/Cargo.toml index 2560e2b..583815a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,6 +15,7 @@ miette = { version = "7.2.0", features = ["fancy"] } fxhash = "0.2.1" hotwatch = "0.5.0" console = "0.15.8" +dirs-next = "2.0.0" [dev-dependencies] assert_cmd = "2.0.14" diff --git a/src/air.rs b/src/air.rs index 91f4e43..3de4fb2 100644 --- a/src/air.rs +++ b/src/air.rs @@ -2,21 +2,31 @@ use std::{i16, u16, u32}; use miette::{bail, Result, Severity}; -use crate::symbol::{Flag, Label, Register}; +use crate::{ + debugger::Breakpoints, + symbol::{Flag, Label, Register, Span}, +}; /// Assembly intermediate representation, contains starting address and list of instructions +#[derive(Clone)] pub struct Air { /// Memory address to start program at orig: Option, /// AIR - ast: Vec, + pub ast: Vec, + + pub breakpoints: Breakpoints, + + pub src: &'static str, } impl Air { - pub fn new() -> Self { + pub fn new(src: &'static str) -> Self { Air { orig: None, ast: Vec::new(), + breakpoints: Breakpoints::new(), + src, } } @@ -34,9 +44,9 @@ impl Air { self.orig } - pub fn add_stmt(&mut self, stmt: AirStmt) { + pub fn add_stmt(&mut self, stmt: AirStmt, span: Span) { self.ast - .push(AsmLine::new((self.ast.len() + 1) as u16, stmt)) + .push(AsmLine::new((self.ast.len() + 1) as u16, stmt, span)) } pub fn get(&self, idx: usize) -> &AsmLine { @@ -55,17 +65,17 @@ impl Air { } } -impl IntoIterator for Air { - type Item = AsmLine; - type IntoIter = std::vec::IntoIter; +impl<'a> IntoIterator for &'a Air { + type Item = &'a AsmLine; + type IntoIter = std::slice::Iter<'a, AsmLine>; fn into_iter(self) -> Self::IntoIter { - self.ast.into_iter() + (&self.ast).into_iter() } } /// Single LC3 statement. Has optional labels. -#[derive(PartialEq, Eq, Debug)] +#[derive(PartialEq, Eq, Debug, Clone)] pub enum AirStmt { /// Add src_reg with src_reg_imm and store in dest Add { @@ -122,17 +132,11 @@ pub enum AirStmt { offset: u8, }, /// Push onto stack (extended dialect) - Push { - src_reg: Register, - }, + Push { src_reg: Register }, /// Pop from stack (extended dialect) - Pop { - dest_reg: Register, - }, + Pop { dest_reg: Register }, /// Jump to subroutine and push onto stack (extended dialect) - Call { - dest_label: Label, - }, + Call { dest_label: Label }, /// Return from subroutine using stack (extended dialect) Rets, /// A raw value created during preprocessing @@ -143,7 +147,7 @@ pub enum AirStmt { /// Used for ADD and AND commands as they support either 5-bit immediate values or registers as the /// last operand. -#[derive(PartialEq, Eq, Debug)] +#[derive(PartialEq, Eq, Debug, Clone, Copy)] pub enum ImmediateOrReg { Reg(Register), Imm5(u8), @@ -164,15 +168,16 @@ impl ImmediateOrReg { pub struct RawWord(pub u16); /// A line (16 bits) of assembly. -#[derive(PartialEq, Eq, Debug)] +#[derive(PartialEq, Eq, Debug, Clone)] pub struct AsmLine { pub line: u16, pub stmt: AirStmt, + pub span: Span, } impl AsmLine { - pub fn new(line: u16, stmt: AirStmt) -> Self { - AsmLine { line, stmt } + pub fn new(line: u16, stmt: AirStmt, span: Span) -> Self { + AsmLine { line, stmt, span } } /// Fill label references using values from symbol table @@ -340,7 +345,7 @@ impl AsmLine { // 6. Continued offset when call // // There are 10 bits of offset precision when using a call instruction. - // There also isn't really a way to work around this setup if other instructions + // There also isn't really a way to work around this setup if other instructions // are to be left untouched. AirStmt::Push { src_reg } => { let mut raw = 0xD000; @@ -394,7 +399,11 @@ impl AsmLine { #[cfg(test)] mod test { use super::*; - use crate::{air::AirStmt, parser::AsmParser, symbol::Flag}; + use crate::{ + air::AirStmt, + parser::AsmParser, + symbol::{Flag, SrcOffset}, + }; // Backpatching tests #[test] @@ -418,7 +427,15 @@ mod test { stmt: AirStmt::Branch { flag: Flag::Nzp, dest_label: Label::Ref(2) - } + }, + span: Span::new( + SrcOffset( + r#" + "# + .len() + ), + "br label".len() + ) } ); } @@ -439,6 +456,7 @@ mod test { src_reg: Register::R2, src_reg_imm: ImmediateOrReg::Reg(Register::R3), }, + span: Span::dummy(), }; assert_eq!(asm.emit().unwrap(), 0x1283) } @@ -452,6 +470,7 @@ mod test { src_reg: Register::R2, src_reg_imm: ImmediateOrReg::Imm5(0b01111), }, + span: Span::dummy(), }; assert_eq!(asm.emit().unwrap(), 0x12AF) } @@ -464,6 +483,7 @@ mod test { flag: Flag::Nzp, dest_label: Label::Ref(4), }, + span: Span::dummy(), }; assert_eq!(asm.emit().unwrap(), 0b0000111000000010) } @@ -476,6 +496,7 @@ mod test { flag: Flag::Nzp, dest_label: Label::Ref(1), }, + span: Span::dummy(), }; assert_eq!(asm.emit().unwrap(), 0b0000111111111100) } @@ -488,6 +509,7 @@ mod test { flag: Flag::Nzp, dest_label: Label::Ref(258), }, + span: Span::dummy(), }; assert!(asm.emit().is_err()); let asm = AsmLine { @@ -496,6 +518,7 @@ mod test { flag: Flag::Nzp, dest_label: Label::Ref(1), }, + span: Span::dummy(), }; assert!(asm.emit().is_err()) } @@ -510,6 +533,7 @@ mod test { src_reg: Register::R4, src_reg_imm: ImmediateOrReg::Imm5((-1i8) as u8), }, + span: Span::dummy(), }; assert_eq!(asm.emit().unwrap(), 0x193f); } diff --git a/src/debugger/breakpoint.rs b/src/debugger/breakpoint.rs new file mode 100644 index 0000000..15dff65 --- /dev/null +++ b/src/debugger/breakpoint.rs @@ -0,0 +1,96 @@ +/// Wrapper for list of [`Breakpoint`]s. +/// +/// Could be another collection, but [`Vec`] was used for simplicity. +/// +/// List must remain sorted, and 2 breakpoints cannot have the same address. +#[derive(Clone, Debug)] +pub struct Breakpoints(Vec); + +/// A [`Breakpoint`] is just an address, and a flag for whether it was 'predefined'. +/// +/// Predefined here meaning it was registered in the assembly code, with the `.BREAK` directive, +/// as opposed to being registered with a debugger command (`break add`). +#[derive(Clone, Copy, Debug)] +pub struct Breakpoint { + pub address: u16, + pub is_predefined: bool, +} + +impl Breakpoints { + pub fn new() -> Self { + Self(Vec::new()) + } + + /// Get the [`Breakpoint`] with the given address. + /// + /// Returns `None` if no breakpoint exists. + pub fn get(&self, address: u16) -> Option { + for breakpoint in &self.0 { + if breakpoint.address == address { + return Some(*breakpoint); + } + } + None + } + + /// Insert a new breakpoint, keeping list sorted. + /// + /// Returns `true` if breakpoint already exists with that address (new breakpoint will not be + /// inserted). + pub fn insert(&mut self, breakpoint: Breakpoint) -> bool { + let mut index = self.len(); + for (i, other) in self.iter().enumerate() { + if other.address == breakpoint.address { + return true; + } + if other.address >= breakpoint.address { + index = i; + break; + } + } + self.0.insert(index, breakpoint); + return false; + } + + /// Removes every breakpoint with given address, keeping list sorted. + /// + /// Returns whether any breakpoint was found with given address. + pub fn remove(&mut self, address: u16) -> bool { + let initial_len = self.0.len(); + self.0.retain(|breakpoint| breakpoint.address != address); + initial_len != self.0.len() + } + + /// Add the `orig` address to each [`Breakpoint`] item. + /// + /// Should only be called once per program run. + pub fn with_orig(mut self, orig: u16) -> Self { + for breakpoint in &mut self.0 { + breakpoint.address += orig; + } + self + } + + pub fn len(&self) -> usize { + self.0.len() + } + pub fn is_empty(&self) -> bool { + self.len() == 0 + } + + pub fn iter(&self) -> impl Iterator { + self.0.iter() + } + pub fn iter_mut(&mut self) -> impl Iterator { + self.0.iter_mut() + } +} + +impl<'a> IntoIterator for &'a Breakpoints { + type Item = &'a Breakpoint; + type IntoIter = std::slice::Iter<'a, Breakpoint>; + + fn into_iter(self) -> Self::IntoIter { + self.0.iter() + } +} diff --git a/src/debugger/command.rs b/src/debugger/command.rs new file mode 100644 index 0000000..371518c --- /dev/null +++ b/src/debugger/command.rs @@ -0,0 +1,199 @@ +use std::fmt; + +use super::{ + error::{ArgumentError, CommandError}, + parse::CommandIter, +}; +use crate::symbol::Register; + +#[allow(dead_code)] +#[derive(Debug)] +pub enum Command { + Help, + Step { count: u16 }, + Next, + Continue, + Finish, + Quit, + Exit, + BreakList, + BreakAdd { location: MemoryLocation }, + BreakRemove { location: MemoryLocation }, + Get { location: Location }, + Set { location: Location, value: u16 }, + Jump { location: MemoryLocation }, + Registers, + Reset, + Source { location: MemoryLocation }, + // This can be `String` bc it will be allocated later regardless to get a &'static str + // Unless parsing code is changed, and can accept a non-static string + Eval { instruction: String }, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub(super) enum CommandName { + Help, + Step, + Next, + Continue, + Finish, + Quit, + Exit, + BreakList, + BreakAdd, + BreakRemove, + Get, + Set, + Jump, + Registers, + Reset, + Source, + Eval, +} + +impl fmt::Display for CommandName { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Help => write!(f, "help"), + Self::Step => write!(f, "step"), + Self::Next => write!(f, "next"), + Self::Continue => write!(f, "continue"), + Self::Finish => write!(f, "finish"), + Self::Quit => write!(f, "quit"), + Self::Exit => write!(f, "exit"), + Self::BreakList => write!(f, "break list"), + Self::BreakAdd => write!(f, "break add"), + Self::BreakRemove => write!(f, "break remove"), + Self::Get => write!(f, "get"), + Self::Set => write!(f, "set"), + Self::Jump => write!(f, "jump"), + Self::Registers => write!(f, "registers"), + Self::Reset => write!(f, "reset"), + Self::Source => write!(f, "source"), + Self::Eval => write!(f, "eval"), + } + } +} + +/// Register or memory location. +#[derive(Debug)] +pub enum Location { + Register(Register), + Memory(MemoryLocation), +} + +#[derive(Debug)] +pub enum MemoryLocation { + PCOffset(i16), + Address(u16), + Label(Label), +} + +/// Label with word offset. +// TODO(opt): could use `&'static str` which refers to a key of the static symbol table... +// ...this means it would need to be resolved inside some parsing function +#[derive(Debug, PartialEq)] +pub struct Label { + pub name: String, + pub offset: i16, +} + +impl TryFrom<&str> for Command { + type Error = CommandError; + + /// Assumes line is non-empty. + fn try_from(line: &str) -> std::result::Result { + let mut iter = CommandIter::from(line); + + let command_name = iter.get_command_name()?; + Command::parse_arguments(command_name, iter).map_err(|error| { + CommandError::InvalidArgument { + command_name, + error, + } + }) + } +} + +impl Command { + fn parse_arguments( + name: CommandName, + mut iter: CommandIter<'_>, + ) -> Result { + let mut expected_args = 0; + + let command = match name { + // Allow trailing arguments + CommandName::Help => return Ok(Self::Help), + + CommandName::Continue => Self::Continue, + CommandName::Finish => Self::Finish, + CommandName::Exit => Self::Exit, + CommandName::Quit => Self::Quit, + CommandName::Registers => Self::Registers, + CommandName::Reset => Self::Reset, + + CommandName::Step => { + expected_args = 1; + let count = iter.next_positive_integer_or_default("count")?; + Self::Step { count } + } + CommandName::Next => Self::Next, + + CommandName::Get => { + expected_args = 1; + let location = iter.next_location("location", expected_args)?; + Self::Get { location } + } + CommandName::Set => { + expected_args = 2; + let location = iter.next_location("location", expected_args)?; + let value = iter.next_integer("value", expected_args)?; + Self::Set { location, value } + } + + CommandName::Jump => { + expected_args = 1; + let location = iter.next_memory_location("location", expected_args)?; + Self::Jump { location } + } + + CommandName::BreakList => Self::BreakList, + CommandName::BreakAdd => { + expected_args = 1; + let location = iter.next_memory_location_or_default("location")?; + Self::BreakAdd { location } + } + CommandName::BreakRemove => { + expected_args = 1; + let location = iter.next_memory_location_or_default("location")?; + Self::BreakRemove { location } + } + + CommandName::Source => { + expected_args = 1; + let location = iter.next_memory_location_or_default("location")?; + Self::Source { location } + } + + CommandName::Eval => { + let instruction = iter.collect_rest(); + if instruction.is_empty() { + return Err(ArgumentError::MissingArgumentList { + argument_name: "instruction", + }); + } + // Don't return `Err` for invalid argument count, as this shouldn't happen + debug_assert!( + iter.expect_end_of_command(0, 0).is_ok(), + "no more arguments should exist", + ); + return Ok(Self::Eval { instruction }); + } + }; + + iter.expect_end_of_command(expected_args, iter.arg_count() + 1)?; + + Ok(command) + } +} diff --git a/src/debugger/error.rs b/src/debugger/error.rs new file mode 100644 index 0000000..8e034a7 --- /dev/null +++ b/src/debugger/error.rs @@ -0,0 +1,146 @@ +use std::fmt; + +use super::command::CommandName; + +/// Error parsing a command. +// TODO(opt): Most `String` fields could be `&str` (with difficulty, no doubt) +#[derive(Debug, PartialEq)] +pub enum CommandError { + InvalidCommand { + command_name: String, + }, + MissingSubcommand { + command_name: &'static str, + }, + InvalidSubcommand { + command_name: &'static str, + subcommand_name: String, + }, + InvalidArgument { + command_name: CommandName, + error: ArgumentError, + }, +} + +// TODO(rename): Type names and variants +#[derive(Debug, PartialEq)] +pub enum ArgumentError { + /// For `eval`. + MissingArgumentList { argument_name: &'static str }, + MissingArgument { + argument_name: &'static str, + expected_count: u8, + actual_count: u8, + }, + TooManyArguments { + expected_count: u8, + actual_count: u8, + }, + InvalidValue { + argument_name: &'static str, + error: ValueError, + }, +} + +#[derive(Debug, PartialEq)] +pub enum ValueError { + MismatchedType { + expected_type: &'static str, + actual_type: &'static str, + }, + MalformedValue {}, + MalformedInteger {}, + MalformedLabel {}, + IntegerTooLarge {}, +} + +impl std::error::Error for CommandError {} + +impl fmt::Display for CommandError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::InvalidCommand { command_name } => { + write!(f, "Not a command: `{}`.", command_name) + } + Self::InvalidSubcommand { + command_name, + subcommand_name, + } => write!( + f, + "Invalid subcommand `{}` for command `{}`.", + subcommand_name, command_name + ), + Self::MissingSubcommand { command_name } => { + write!(f, "Missing subcommand for `{}`.", command_name) + } + + Self::InvalidArgument { + command_name, + error, + } => { + write!(f, "In command `{}`: ", command_name)?; + + match error { + ArgumentError::MissingArgumentList { argument_name } => { + write!(f, "Missing argument list `{}`", argument_name)?; + } + ArgumentError::MissingArgument { + argument_name, + expected_count, + actual_count, + } => { + write!( + f, + "Missing argument `{}` (expected {}, found {})", + argument_name, expected_count, actual_count + )?; + } + ArgumentError::TooManyArguments { + expected_count, + actual_count, + } => { + write!( + f, + "Too many arguments (expected {}, found {})", + expected_count, actual_count + )?; + } + + ArgumentError::InvalidValue { + argument_name, + error, + } => { + write!(f, "For argument `{}`: ", argument_name)?; + + match error { + ValueError::MismatchedType { + expected_type, + actual_type, + } => { + write!( + f, + "Incorrect value type (expected {}, found {})", + expected_type, actual_type + )?; + } + ValueError::MalformedValue {} => { + write!(f, "Invalid value")?; + } + ValueError::MalformedInteger {} => { + write!(f, "Malformed integer")?; + } + ValueError::MalformedLabel {} => { + write!(f, "Malformed label")?; + } + ValueError::IntegerTooLarge {} => { + write!(f, "Integer too large")?; + } + } + } + } + + write!(f, ".") + } + } + } +} diff --git a/src/debugger/eval.rs b/src/debugger/eval.rs new file mode 100644 index 0000000..eb37da1 --- /dev/null +++ b/src/debugger/eval.rs @@ -0,0 +1,77 @@ +use miette::Result; + +use crate::air::{AirStmt, AsmLine}; +use crate::runtime::RunState; +use crate::symbol::Span; +use crate::{dprintln, AsmParser}; + +pub fn eval(state: &mut RunState, line: String) { + // Required to make temporarily 'static + // Automatically dropped at end of scope + let line = StaticStr::from(line); + + // Note that error cannot be returned from this function, without the caller + // being responsible for dropping `line` + if let Err(err) = eval_inner(state, unsafe { line.as_str() }) { + eprintln!("{:?}", err); + } +} + +// Wrapper to group errors into one location +fn eval_inner(state: &mut RunState, line: &'static str) -> Result<()> { + // Parse + let stmt = AsmParser::new_simple(line)?.parse_simple()?; + + // Don't allow any branch instructions + // Since CC is set to 0b000 at start, this could lead to confusion when `BR` instructions are + // not executed + if let AirStmt::Branch { .. } = stmt { + dprintln!( + Always, + Error, + "Evaluation of `BR*` instructions is not supported." + ); + dprintln!(Always, Error, "Consider using `jump` command instead."); + return Ok(()); + } + + // Check labels + let mut asm = AsmLine::new(0, stmt, Span::dummy()); + asm.backpatch()?; + + // Emit + let instr = asm.emit()?; + // Execute + RunState::OP_TABLE[(instr >> 12) as usize](state, instr); + + Ok(()) +} + +/// Get an unsafe `&'static str` from a `Box`, for temporary use +struct StaticStr { + ptr: *mut str, +} + +impl StaticStr { + pub unsafe fn as_str(&self) -> &'static str { + &*self.ptr + } +} + +impl From for StaticStr +where + T: Into>, +{ + fn from(string: T) -> Self { + Self { + ptr: Box::into_raw(string.into()), + } + } +} + +impl Drop for StaticStr { + fn drop(&mut self) { + let boxed = unsafe { Box::from_raw(self.ptr) }; + drop(boxed); + } +} diff --git a/src/debugger/help.txt b/src/debugger/help.txt new file mode 100644 index 0000000..d9a3f19 --- /dev/null +++ b/src/debugger/help.txt @@ -0,0 +1,68 @@ +-------{1} LACE DEBUGGER {0}------- +Available Commands: + + {35;1}help{0;35;2}({0;35}h{2}){0} + Show this message. + + {35;1}progress{0;35;2}({0;35}p{2}){0} {32;3}COUNT{2}?{0} + Execute next instruction or jump into subroutine. + {2}-{0} {3}COUNT{0}: {36}Integer{0} {3}(default: 1){0} + + {35;1}next{0;35;2}({0;35}n{2}){0} + Execute next instruction or whole subroutine (including any nested + subroutine calls). + + {35;1}continue{0;35;2}({0;35}c{2}){0} + Continue execution until breakpoint or {1}HALT{0}. + + {35;1}finish{0;35;2}({0;35}f{2}){0} + Continue execution until end of subroutine, breakpoint, or {1}HALT{0}. + + {35;1}get{0;35;2}({0;35}g{2}){0} {32;3}LOCATION{2}?{0} + Print the value at a register or address. + {2}-{0} {3}LOCATION{0}: {36}Register{0} {2}|{0} {36}Address{2}+{0} + + {35;1}set{0;35;2}({0;35}s{2}){0} {32;3}LOCATION VALUE{0} + Set the value at a register or address. + {2}-{0} {3}LOCATION{0}: {36}Register{0} {2}|{0} {36}Address{2}+{0} + {2}-{0} {3}VALUE{0}: Integer + + {35;1}registers{0;35;2}({0;35}r{2}){0} + Print the value of all registers. + + {35;1}jump{0;35;2}({0;35}j{2}){0} {32;3}LOCATION{0} + Unconditionally jump to an address. + {2}-{0} {3}LOCATION{0}: {36}Address{2}+{0} {2}|{0} + + {35;1}break add{0;35;2}({0;35}ba{2}){0} {32;3}LOCATION{2}?{0} + Add breakpoint at an address. + {2}-{0} {3}LOCATION{0}: {36}Address{2}+{0} {2}|{0} {3}(default: PC){0} + + {35;1}break remove{0;35;2}({0;35}ba{2}){0} {32;3}LOCATION{2}?{0} + Remove breakpoint at an address. + {2}-{0} {3}LOCATION{0}: {36}Address{2}+{0} {2}|{0} {3}(default: PC){0} + + {35;1}break list{0;35;2}({0;35}ba{2}){0} + List all breakpoints. + + {35;1}assembly{0;35;2}({0;35}a{2}){0} + Print corresponding line of assembly source code from address. + {2}-{0} {3}LOCATION{0}: {36}Address{2}+{0} {2}|{0} {3}(default: PC){0} + + {35;1}eval{0;35;2}({0;35}e{2}){0} + Simulate an instruction. Note that labels cannot be created or modified. + {2}-{0} Instruction: {36}OPCODE OPERANDS{2}...{0} + + {35;1}reset{0} + Reset all memory and registers to original state. + + {35;1}quit{0;35;2}({0;35}q{2}){0} + Stop debugger and continue execution as normal. + + {35;1}exit{0} + Exit debugger and simulator. + +{3}Note:{0} A {36}Address{2}+{0} argument can be one of the following: + {1}1.{0} An absolute address value. {36}x3010{0} + {1}2.{0} A label with an optional offset. {36}Foo{0;2}, {0;36}Hello+4{0} + {1}3.{0} An offset from the program counter. {36}^3{0;2}, {0;36}^-x10{0} diff --git a/src/debugger/mod.rs b/src/debugger/mod.rs new file mode 100644 index 0000000..6214dbb --- /dev/null +++ b/src/debugger/mod.rs @@ -0,0 +1,608 @@ +mod breakpoint; +mod command; +mod error; +mod eval; +mod parse; +mod source; + +pub use self::breakpoint::{Breakpoint, Breakpoints}; +use self::command::{Command, Label, Location, MemoryLocation}; +use self::source::{Source, SourceRead}; +use crate::air::AsmLine; +use crate::output::{Condition, Output}; +use crate::runtime::{RunState, HALT_ADDRESS, USER_MEMORY_END}; +use crate::symbol::with_symbol_table; +use crate::{dprint, dprintln}; + +// TODO(fix): Decide which messages should be `Sometimes` + +/// Leave this as a struct, in case more options are added in the future. Plus it is more explicit. +#[derive(Debug)] +pub struct DebuggerOptions { + pub command: Option, +} + +pub struct Debugger { + /// Must not be mutated. + initial_state: RunState, + asm_source: AsmSource, + + status: Status, + command_source: Source, + + breakpoints: Breakpoints, + /// Used to allow breakpoint to be passed on second attempt. + /// + /// Compare this with a `HALT` instruction, which is NEVER passed with basic commands + /// (`progress`, `next`, `continue`, `finish`). + current_breakpoint: Option, + + /// Amount of instructions executed since last command. + instruction_count: u32, + /// Whether PC should be displayed on next command prompt. + should_echo_pc: bool, +} + +/// Reference to assembly source code. +/// +/// Used by `assembly` and `break list` commands. +struct AsmSource { + orig: u16, + ast: Vec, + src: &'static str, +} + +/// The current status of the debugger execution loop. +#[derive(Debug, Default)] +enum Status { + /// Keep executing user commands, until one changes the debugger status. + #[default] + WaitForAction, + /// Execute `count` instructions. + /// + /// Stop execution early if breakpoint or `HALT` is reached. + /// + /// Subroutines are not treated specially; `JSR`/`JSRR` and `RET` instructions are treated as + /// any other instruction is. + Step { count: u16 }, + /// Execute the next instruction, or the whole subroutine if next instruction is `JSR`/`JSRR`, + /// until a breakpoint or `HALT`. + /// + /// Stop execution early if breakpoint or `HALT` is reached. + Next { return_addr: u16 }, + /// Execute all instructions until breakpoint or `HALT` is reached. + Continue, + /// Execute all instructions until `RET` instruction, breakpoint, or `HALT` is reached. + /// + /// Used to 'finish' a subroutine. + Finish, +} + +/// An action, which the debugger passes to the runtime loop. +#[derive(Debug)] +pub enum Action { + /// Keep executing as normal (with the debugger active). + Proceed, + /// Disable the debugger, keep executing. + StopDebugger, + /// Exit the entire program, simulating a uninterrupted `HALT`. + ExitProgram, +} + +/// An instruction, which is relevant to the debugger, specifically the `finish` and `continue` +/// commands. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum SignificantInstr { + /// Return from a subroutine. + /// Used by `finish`. + Ret, + /// Halt. + /// Used by `continue` and `finish`. + TrapHalt, +} + +impl TryFrom for SignificantInstr { + type Error = (); + fn try_from(instr: u16) -> Result { + let opcode = instr >> 12; + match opcode { + // `RET` is `JMP R7` + 0xC if (instr >> 6) & 0b111 == 7 => Ok(SignificantInstr::Ret), + // `HALT` is `TRAP 0x25` + 0xF if instr & 0xFF == 0x25 => Ok(SignificantInstr::TrapHalt), + _ => Err(()), + } + } +} + +impl Debugger { + /// Must only be called *once* per process. + pub(super) fn new( + opts: DebuggerOptions, + initial_state: RunState, + breakpoints: impl Into, + ast: Vec, + src: &'static str, + ) -> Self { + let orig = initial_state.pc(); + Self { + initial_state, + asm_source: AsmSource { orig, ast, src }, + + status: Status::default(), + command_source: Source::from(opts.command), + + breakpoints: breakpoints.into(), + current_breakpoint: None, + + instruction_count: 0, + should_echo_pc: true, + } + } + + /// Read and execute user commands, until an [`Action`] is raised. + pub(super) fn next_action(&mut self, state: &mut RunState) -> Action { + if state.pc() < self.orig() { + // This shouldn't occur anyway + dprintln!( + Always, + Error, + // TODO(feat): Better message + "Out of bounds of user program memory. Pausing execution." + ); + self.status = Status::WaitForAction; + } + if state.pc() >= USER_MEMORY_END && state.pc() != HALT_ADDRESS { + dprintln!( + Always, + Error, + "Reached end of user program memory. Pausing execution." + ); + self.status = Status::WaitForAction; + } + + let instr = SignificantInstr::try_from(state.mem(state.pc())).ok(); + self.check_interrupts(state.pc(), instr); + + // `HALT` and breakpoints should be already handled by caller + loop { + match &mut self.status { + Status::WaitForAction => { + // Continue loop until action is given + if let Some(action) = self.run_command(state) { + return action; + } + } + + Status::Step { count } => { + if *count > 0 { + *count -= 1; + } else { + self.status = Status::WaitForAction; + } + return Action::Proceed; + } + + Status::Next { return_addr } => { + if state.pc() == *return_addr { + // If subroutine was excecuted (for `JSR`/`JSRR` + `RET`) + // As opposed to a single instruction + if self.instruction_count > 1 { + dprintln!( + Always, + Warning, + "Reached end of subroutine. Pausing execution." + ); + } + self.status = Status::WaitForAction; + continue; + } + return Action::Proceed; + } + + Status::Continue => { + return Action::Proceed; + } + + Status::Finish => { + if instr == Some(SignificantInstr::Ret) { + dprintln!( + Always, + Warning, + "Reached end of subroutine. Pausing execution." + ); + // Execute `RET` before prompting command again + self.status = Status::Step { count: 0 }; + } + return Action::Proceed; + } + } + } + } + + /// An 'interrupt' here is a breakpoint or `HALT` trap. + fn check_interrupts(&mut self, pc: u16, instr: Option) { + // Always break from `continue|finish|step|next` on a breakpoint or `HALT` + // Breaking on `RET` (for `finish`), and end of `step` and `next` is handled later + + // Remember if previous cycle paused on the same breakpoint + // If so, don't break now + if let Some(breakpoint) = self + .breakpoints + .get(pc) + .filter(|_| self.current_breakpoint != Some(pc)) + { + if breakpoint.is_predefined { + dprintln!( + Always, + Warning, + "Reached predefined breakpoint. Pausing execution." + ); + } else { + dprintln!(Always, Warning, "Reached breakpoint. Pausing execution."); + } + self.current_breakpoint = Some(pc); + self.status = Status::WaitForAction; + return; + } + + // Always break on `HALT` (unlike breakpoints) + if instr == Some(SignificantInstr::TrapHalt) { + dprintln!(Always, Warning, "Reached HALT. Pausing execution."); + self.status = Status::WaitForAction; + return; + } + + // Only reset current breakpoint if not interrupted. + self.current_breakpoint = None; + } + + /// Read and execute the next [`Command`], returning an [`Action`] if it is raised. + fn run_command(&mut self, state: &mut RunState) -> Option { + debug_assert!( + matches!(self.status, Status::WaitForAction), + "`run_command` must only be called if `status == WaitForAction`", + ); + + Output::Debugger(Condition::Always, Default::default()).start_new_line(); + + if self.should_echo_pc { + dprintln!(Sometimes, Info, "Program counter at: 0x{:04x}.", state.pc()); + self.should_echo_pc = false; + } + if self.instruction_count > 0 { + dprintln!( + Always, + Info, + "Executed {} instruction{}.", + self.instruction_count, + if self.instruction_count == 1 { "" } else { "s" }, + ); + self.instruction_count = 0; + } + + // Convert `EOF` to `quit` command + let command = self.next_command().unwrap_or(Command::Quit); + + match command { + Command::Quit => return Some(Action::StopDebugger), + Command::Exit => return Some(Action::ExitProgram), + + Command::Reset => { + *state = self.initial_state.clone(); + self.should_echo_pc = true; + // Other fields either: + // - Shouldn't be mutated/reset: `initial_state`, `asm_source`, `command_source`, `breakpoints` + // - Or would be redundant to do so: `status`, `current_breakpoint`, `instruction_count` + dprintln!(Always, Warning, "Reset program to initial state."); + } + + Command::Help => { + dprintln!(Always, Special, "\n{}", include_str!("./help.txt")); + } + + Command::Continue => { + self.status = Status::Continue; + self.should_echo_pc = true; + dprintln!(Always, Info, "Continuing..."); + } + + Command::Finish => { + self.status = Status::Finish; + self.should_echo_pc = true; + dprintln!(Always, Info, "Finishing subroutine..."); + } + + Command::Step { count } => { + self.status = Status::Step { count: count - 1 }; + self.should_echo_pc = true; + } + + Command::Next => { + self.status = Status::Next { + return_addr: state.pc() + 1, + }; + self.should_echo_pc = true; + } + + Command::Get { location } => match location { + Location::Register(register) => { + dprintln!(Sometimes, Info, "Register R{}:", register as u16); + Output::Debugger(Condition::Always, Default::default()) + .print_integer(state.reg(register as u16)); + } + Location::Memory(location) => { + let address = self.resolve_location_address(state, &location)?; + dprintln!(Sometimes, Info, "Memory at address 0x{:04x}:", address); + Output::Debugger(Condition::Always, Default::default()) + .print_integer(state.mem(address)); + } + }, + + Command::Set { location, value } => match location { + Location::Register(register) => { + *state.reg_mut(register as u16) = value; + dprintln!( + Always, + Warning, + "Updated register R{} to 0x{:04x}.", + register as u16, + value, + ); + } + Location::Memory(location) => { + let address = self.resolve_location_address(state, &location)?; + *state.mem_mut(address) = value; + dprintln!( + Always, + Warning, + "Updated memory at address 0x{:04x} to 0x{:04x}.", + address, + value, + ); + } + }, + + Command::Registers => { + dprintln!(Sometimes, Info, "Registers:"); + Output::Debugger(Condition::Always, Default::default()).print_registers(state); + } + + Command::Jump { location } => { + let address = self.resolve_location_address(state, &location)?; + if !(self.orig()..USER_MEMORY_END).contains(&address) { + dprintln!( + Always, + Error, + "Address is not in user address space. Must be in range [0x{:04x}, 0x{:04x}).", + self.orig(), + USER_MEMORY_END, + ); + return None; + } + *state.pc_mut() = address; + self.should_echo_pc = true; + dprintln!(Always, Warning, "Set program counter to 0x{:04x}", address); + } + + Command::Eval { instruction } => { + self.should_echo_pc = true; + eval::eval(state, instruction); + } + + Command::Source { location } => { + // TODO(feat): Only check memory in context range + if !state.memory_equals(&self.initial_state) { + dprintln!( + Always, + Warning, + "Note: Program memory may have been modified." + ); + } + if let Some(address) = self.resolve_location_address(state, &location) { + self.asm_source.show_line_context(address); + } + } + + Command::BreakAdd { location } => { + let address = self.resolve_location_address(state, &location)?; + if self.breakpoints.insert(Breakpoint { + address, + is_predefined: false, + }) { + dprintln!( + Always, + Error, + "Breakpoint already exists at 0x{:04x}.", + address + ); + } else { + dprintln!(Always, Warning, "Added breakpoint at 0x{:04x}.", address); + } + } + + Command::BreakRemove { location } => { + let address = self.resolve_location_address(state, &location)?; + if self.breakpoints.remove(address) { + dprintln!(Always, Warning, "Removed breakpoint at 0x{:04x}.", address); + } else { + dprintln!(Always, Error, "No breakpoint exists at 0x{:04x}.", address); + } + } + + Command::BreakList => { + if self.breakpoints.is_empty() { + dprintln!(Always, Info, "No breakpoints exist."); + } else { + dprintln!(Always, Info, "Breakpoints:"); + for (i, breakpoint) in self.breakpoints.iter().enumerate() { + if Output::is_minimal() { + dprintln!(Always, Info, "0x{:04x}", breakpoint.address); + continue; + } + dprint!( + Always, + Info, + "\x1b[2m{}\x1b[0m 0x{:04x} \x1b[2m──\x1b[0m ", + if i + 1 == self.breakpoints.len() { + "╰─" + } else { + "├─" + }, + breakpoint.address + ); + self.asm_source.show_single_line(breakpoint.address); + } + } + } + } + + None + } + + /// Returns `None` on EOF. + fn next_command(&mut self) -> Option { + // Loop until valid command or EOF + loop { + let line = self.command_source.read()?.trim(); + // Necessary, since `Command::try_from` assumes non-empty line + if line.is_empty() { + continue; + } + + let command = match Command::try_from(line) { + Ok(command) => command, + Err(error) => { + dprintln!(Always, Error, "{}", error); + dprintln!(Always, Error, "Type `help` for a list of commands."); + continue; + } + }; + + return Some(command); + } + } + + /// Returns `None` if `location` is out of bounds or an invalid label. + fn resolve_location_address(&self, state: &RunState, location: &MemoryLocation) -> Option { + match location { + MemoryLocation::Address(address) => Some(*address), + MemoryLocation::PCOffset(offset) => self.resolve_pc_offset(state.pc(), *offset), + MemoryLocation::Label(label) => self.resolve_label_address(label), + } + } + + /// Returns `None` if `pc + offset` is out of bounds. + fn resolve_pc_offset(&self, pc: u16, offset: i16) -> Option { + let Some(address) = self.add_address_offset(pc, offset) else { + dprintln!( + Always, + Error, + "Program counter + offset is out of bounds of memory." + ); + return None; + }; + Some(address) + } + + /// Returns `None` if `label` is out of bounds or an invalid label. + fn resolve_label_address(&self, label: &Label) -> Option { + let Some(address) = get_label_address(&label.name) else { + dprintln!(Always, Error, "Label not found named `{}`.", label.name); + return None; + }; + + let Some(address) = self.add_address_offset(address + self.orig(), label.offset) else { + dprintln!( + Always, + Error, + "Label address + offset is out of bounds of memory." + ); + return None; + }; + + dprintln!( + Always, + Info, + "Label `{}` is at address 0x{:04x}.", + label.name, + address + ); + Some(address) + } + + /// Returns `None` if `pc + offset` is out of bounds. + fn add_address_offset(&self, address: u16, offset: i16) -> Option { + let address = address as i16 + offset; + // Check address in user program area + if address >= self.orig() as i16 && (address as u16) < USER_MEMORY_END { + Some(address as u16) + } else { + None + } + } + + pub(super) fn orig(&self) -> u16 { + self.initial_state.pc() + } + + pub(super) fn increment_instruction_count(&mut self) { + self.instruction_count += 1; + } +} + +fn get_label_address(name: &str) -> Option { + with_symbol_table(|sym| sym.get(name).copied()) + // Account for PC being incremented before instruction is executed + .map(|addr| addr - 1) +} + +impl AsmSource { + /// Show lines surrounding instruction/directive corresponding to `address`. + pub fn show_line_context(&self, address: u16) { + let Some(stmt) = self.get_source_statement(address) else { + return; + }; + let report = miette::miette!( + severity = miette::Severity::Advice, + labels = vec![miette::LabeledSpan::at( + stmt.span, + format!("At address 0x{:04x}", address), + )], + "", // TODO(feat): Maybe add a message here? + ) + .with_source_code(self.src); + eprintln!("{:?}", report); + } + + /// Show instruction/directive corresponding to `address`, with no context. + pub fn show_single_line(&self, address: u16) { + let Some(stmt) = self.get_source_statement(address) else { + return; + }; + let start = stmt.span.offs(); + let end = start + stmt.span.len(); + let line = &self.src[start..end]; + dprintln!(Always, Normal, "{}", line); + } + + /// Get [`AsmLine`] corresponding to `address`. + /// + /// Used to access source code span. + fn get_source_statement(&self, address: u16) -> Option<&AsmLine> { + if address < self.orig || (address - self.orig) as usize >= self.ast.len() { + dprintln!( + Always, + Info, + "Address 0x{:04x} does not correspond to an instruction", + address + ); + return None; + }; + let stmt = self + .ast + .get((address - self.orig) as usize) + .expect("index was checked to be within bounds above"); + Some(stmt) + } +} diff --git a/src/debugger/parse.rs b/src/debugger/parse.rs new file mode 100644 index 0000000..99b5e36 --- /dev/null +++ b/src/debugger/parse.rs @@ -0,0 +1,1131 @@ +use super::command::{CommandName, Label, Location, MemoryLocation}; +use super::error::{ArgumentError, CommandError, ValueError}; +use crate::symbol::Register; + +#[derive(Debug, PartialEq)] +enum Argument { + Register(Register), + Integer(i32), + Label(Label), + PCOffset(i16), +} + +#[derive(Clone, Copy, Debug)] +enum Sign { + Positive = 1, + Negative = -1, +} + +#[derive(Clone, Copy, Debug)] +enum Radix { + Binary = 2, + Octal = 8, + Decimal = 10, + Hex = 16, +} + +impl Radix { + /// Parse a single digit in a given radix. + pub fn parse_digit(&self, ch: char) -> Option { + Some(match self { + Self::Binary => match ch { + '0' => 0, + '1' => 1, + _ => return None, + }, + Self::Octal => match ch { + '0'..='7' => ch as u8 - b'0', + _ => return None, + }, + Self::Decimal => match ch { + '0'..='9' => ch as u8 - b'0', + _ => return None, + }, + Self::Hex => match ch { + '0'..='9' => ch as u8 - b'0', + 'a'..='f' => ch as u8 - b'a' + 10, + 'A'..='F' => ch as u8 - b'A' + 10, + _ => return None, + }, + }) + } +} + +/// Try to convert an `i32` into another integer type. +fn resize_int>(integer: i32) -> Result { + integer + .try_into() + .map_err(|_| ValueError::IntegerTooLarge {}) +} + +/// Returns `true` if `name` matchs any item of `candidates` (case insensitive). +fn matches(name: &str, candidates: &[&str]) -> bool { + for candidate in candidates { + if name.eq_ignore_ascii_case(candidate) { + return true; + } + } + false +} + +/// Returns the first [`CommandName`], which has a corresponding candidate which matches `name`(case insensitive). +/// +/// Returns `None` if no match was found. +fn find_match(name: &str, commands: &[(CommandName, &[&str])]) -> Option { + for (command, candidates) in commands { + if matches(name, candidates) { + return Some(*command); + } + } + None +} + +impl Argument { + pub fn kind(&self) -> &'static str { + match self { + Argument::Register(_) => "register", + Argument::Integer(_) => "integer", + Argument::Label(_) => "label", + Argument::PCOffset(_) => "program counter offset", + } + } +} + +pub struct CommandIter<'a> { + buffer: &'a str, + /// Characters before this index have been successfully parsed. + base: usize, + /// Characters between base..head are currently being parsed. + head: usize, + /// Amount of arguments requested (successfully or not). + /// + /// Must only be incremented by [`Self::next_argument`] + arg_count: u8, +} + +impl<'a> CommandIter<'a> { + pub fn from(buffer: &'a str) -> Self { + Self { + buffer, + base: 0, + head: 0, + arg_count: 0, + } + } + + pub fn arg_count(&self) -> u8 { + self.arg_count + } + + /// Parse and consume command name. + /// + /// Considers multi-word command names (i.e. subcommands) as one name. Eg. `break add`. + /// + /// Assumes line is non-empty. + pub fn get_command_name(&mut self) -> Result { + let command_name = self.next_command_name_part(); + // Command source should always return a string containing non-whitespace + // characters, so initial command name should always exist. + debug_assert!(command_name.is_some(), "missing command name"); + let command_name = command_name.unwrap_or(""); + + // TODO(feat): Add more aliases (such as undocumented typo aliases) + #[rustfmt::skip] + let commands: &[(_, &[_])] = &[ + (CommandName::Help, &["help", "--help", "h", "-h"]), + (CommandName::Continue, &["continue", "cont", "c"]), // proceed + (CommandName::Finish, &["finish", "fin", "f"]), + (CommandName::Exit, &["exit"]), + (CommandName::Quit, &["quit", "q"]), + (CommandName::Registers, &["registers", "reg", "r"]), + (CommandName::Reset, &["reset"]), + (CommandName::Step, &["progress", "p"]), // advance + (CommandName::Next, &["next", "n"]), + (CommandName::Get, &["get", "g"]), + (CommandName::Set, &["set", "s"]), + (CommandName::Jump, &["jump", "j"]), + (CommandName::Source, &["assembly", "asm", "a"]), // source + (CommandName::Eval, &["eval", "e"]), + (CommandName::BreakList, &["breaklist", "bl"]), + (CommandName::BreakAdd, &["breakadd", "ba"]), + (CommandName::BreakRemove, &["breakremove", "br"]), + ]; + let break_command = &["break", "b"]; + #[rustfmt::skip] + let break_subcommands: &[(_, &[_])] = &[ + (CommandName::BreakList, &["list", "l"]), + (CommandName::BreakAdd, &["add", "a"]), + (CommandName::BreakRemove, &["remove", "r"]), + ]; + + if let Some(command) = find_match(command_name, commands) { + return Ok(command); + }; + + // This could be written a bit nicer. But it doesn't seem necessary. + if matches(command_name, break_command) { + let command_name = break_command[0]; // Normalize name and get as `'static` + + let Some(subname) = self.next_command_name_part() else { + return Err(CommandError::MissingSubcommand { command_name }); + }; + + if let Some(command) = find_match(subname, break_subcommands) { + return Ok(command); + } + + return Err(CommandError::InvalidSubcommand { + command_name, + subcommand_name: subname.to_string(), + }); + } + + Err(CommandError::InvalidCommand { + command_name: command_name.to_string(), + }) + } + + /// Parse and consume next integer argument. + pub fn next_integer( + &mut self, + argument_name: &'static str, + expected_count: u8, + ) -> Result { + let actual_count = self.arg_count; + self.next_integer_inner( + argument_name, + Err(ArgumentError::MissingArgument { + argument_name, + expected_count, + actual_count, + }), + ) + } + + /// Parse and consume next positive integer argument, defaulting to `1`. + /// + /// Non-positive values will also be converted to `1`. + pub fn next_positive_integer_or_default( + &mut self, + argument_name: &'static str, + ) -> Result { + self.next_integer_inner(argument_name, Ok(1)) + .map(|value| value.max(1)) + } + + /// Parse and consume next integer argument. Use default result value if argument is `None`. + fn next_integer_inner( + &mut self, + argument_name: &'static str, + default: Result, + ) -> Result { + match self.next_argument(argument_name)? { + Some(Argument::Integer(count)) => { + resize_int(count).map_err(|error| ArgumentError::InvalidValue { + argument_name, + error, + }) + } + + Some(value) => Err(ArgumentError::InvalidValue { + argument_name, + error: ValueError::MismatchedType { + expected_type: "integer", + actual_type: value.kind(), + }, + }), + + None => default, + } + } + + /// Parse and consume next [`Location`] argument: a register or [`MemoryLocation`]. + pub fn next_location( + &mut self, + argument_name: &'static str, + expected_count: u8, + ) -> Result { + let actual_count = self.arg_count; + match self.next_argument(argument_name)? { + Some(Argument::Register(register)) => Ok(Location::Register(register)), + + Some(Argument::Integer(address)) => Ok(Location::Memory(MemoryLocation::Address( + resize_int(address).map_err(|error| ArgumentError::InvalidValue { + argument_name, + error, + })?, + ))), + + Some(Argument::Label(label)) => Ok(Location::Memory(MemoryLocation::Label(label))), + + Some(Argument::PCOffset(offset)) => { + Ok(Location::Memory(MemoryLocation::PCOffset(offset))) + } + + None => Err(ArgumentError::MissingArgument { + argument_name, + expected_count, + actual_count, + }), + } + } + + /// Parse and consume next [`MemoryLocation`] argument. + pub fn next_memory_location( + &mut self, + argument_name: &'static str, + expected_count: u8, + ) -> Result { + let actual_count = self.arg_count; + self.next_memory_location_inner( + argument_name, + Err(ArgumentError::MissingArgument { + argument_name, + expected_count, + actual_count, + }), + ) + } + + /// Parse and consume next [`MemoryLocation`] argument, defaulting to program counter + /// ([`MemoryLocation::PCOffset`]). + pub fn next_memory_location_or_default( + &mut self, + argument_name: &'static str, + ) -> Result { + self.next_memory_location_inner(argument_name, Ok(MemoryLocation::PCOffset(0))) + } + + /// Parse and consume next [`MemoryLocation`] argument. Use default result value if argument is `None`. + fn next_memory_location_inner( + &mut self, + argument_name: &'static str, + default: Result, + ) -> Result { + match self.next_argument(argument_name)? { + Some(Argument::Integer(address)) => Ok(MemoryLocation::Address( + resize_int(address).map_err(|error| ArgumentError::InvalidValue { + argument_name, + error, + })?, + )), + + Some(Argument::Label(label)) => Ok(MemoryLocation::Label(label)), + + Some(Argument::PCOffset(offset)) => Ok(MemoryLocation::PCOffset(offset)), + + Some(value) => Err(ArgumentError::InvalidValue { + argument_name, + error: ValueError::MismatchedType { + expected_type: "address, label, or program counter offset", + actual_type: value.kind(), + }, + }), + + None => default, + } + } + + /// Returns an error if the command contains any arguments which haven't been consumed. + pub fn expect_end_of_command(&mut self, expected: u8, actual: u8) -> Result<(), ArgumentError> { + self.skip_whitespace(); + let ch = self.peek(); + debug_assert!( + !matches!(ch, Some(';' | '\n')), + "semicolons/newlines should have been handled already" + ); + if !matches!(ch, None | Some(';' | '\n')) { + return Err(ArgumentError::TooManyArguments { + expected_count: expected, + actual_count: actual, + }); + } + Ok(()) + } + + /// Consume the rest of the command as one string. + /// + /// Leading/trailing whitespace is trimmed. + /// + /// Used for `eval` command. + /// + /// This can be `String` bc it will be allocated later regardless for [`Command::Eval`]. + pub fn collect_rest(&mut self) -> String { + let rest = self.buffer[self.head..].trim().to_string(); + self.head = self.buffer.len(); + rest + } + + /// Get next character at head, WITHOUT incrementing head. + fn peek(&self) -> Option { + if self.head >= self.buffer.len() { + return None; + } + let next = self.buffer[self.head..].chars().next()?; + Some(next) + } + /// Get next character at head, incrementing head. + fn next(&mut self) -> Option { + let next = self.peek()?; + self.head += next.len_utf8(); + Some(next) + } + + /// Get characters between base..head, WITHOUT updating base. + fn get(&self) -> &str { + assert!(self.base <= self.head, "base exceeded head"); + &self.buffer[self.base..self.head] + } + /// Get characters between base..head, updating base. + fn take(&mut self) -> &str { + assert!(self.base <= self.head, "base exceeded head"); + let slice = &self.buffer[self.base..self.head]; + self.set_base(); + slice + } + + /// Update base to head. + fn set_base(&mut self) { + self.base = self.head; + } + /// Backtrack head to base. + fn reset_head(&mut self) { + self.head = self.base; + } + + fn is_end_of_argument(&self) -> bool { + let ch = self.peek(); + debug_assert!( + !matches!(ch, Some(';' | '\n')), + "semicolons/newlines should have been handled already" + ); + matches!(ch, None | Some(' ' | ';' | '\n')) + } + + /// Consume all whitespace before next non-whitespace character. + fn skip_whitespace(&mut self) { + while self.peek().is_some_and(|ch| ch.is_whitespace()) { + self.next(); + } + self.set_base(); + } + + /// Used for both main command and subcommand (eg. `break add`). + fn next_command_name_part(&mut self) -> Option<&str> { + self.skip_whitespace(); + self.reset_head(); + + while self.peek().is_some_and(|ch| !ch.is_whitespace()) { + self.next(); + } + + if self.get().is_empty() { + return None; + } + Some(self.take()) + } + + /// Parse and consume the next [`Argument`]. + fn next_argument( + &mut self, + argument_name: &'static str, + ) -> Result, ArgumentError> { + self.next_argument_inner() + .map_err(|error| ArgumentError::InvalidValue { + argument_name, + error, + }) + } + + fn next_argument_inner(&mut self) -> Result, ValueError> { + debug_assert!( + self.head == self.base, + "should have been called with head==base" + ); + self.reset_head(); + self.skip_whitespace(); + + self.arg_count += 1; + + if self.is_end_of_argument() { + return Ok(None); + } + if let Some(offset) = self.next_pc_offset()? { + return Ok(Some(Argument::PCOffset(offset))); + } + if let Some(register) = self.next_register() { + return Ok(Some(Argument::Register(register))); + } + if let Some(integer) = self.next_integer_token(false)? { + return Ok(Some(Argument::Integer(integer))); + } + if let Some(label) = self.next_label_token()? { + return Ok(Some(Argument::Label(label))); + } + Err(ValueError::MalformedValue {}) + } + + /// Parse and consume the next [`Register`] argument. + fn next_register(&mut self) -> Option { + self.reset_head(); + // Don't skip whitespace + + if !self.next().is_some_and(|ch| ch == 'r' || ch == 'R') { + return None; + } + let register = match self.next()? { + '0' => Register::R0, + '1' => Register::R1, + '2' => Register::R2, + '3' => Register::R3, + '4' => Register::R4, + '5' => Register::R5, + '6' => Register::R6, + '7' => Register::R7, + _ => return None, + }; + + // Possibly the start of a label + if !self.is_end_of_argument() { + return None; + } + self.set_base(); + Some(register) + } + + // TODO(fix): Should `x1h` be skipped as an integer, and be parsed as a label? + + /// Parse and consume the next integer argument. + /// + /// Extremely liberal in accepted syntax. + /// + /// Accepts: + /// - Decimal (optional `#`), hex (`x`/`X`), octal (`o`/`O`), and binary (`b`/`B`) + /// - Optional single zero before non-decimal radix prefix. Eg. `0x4` + /// - Leading zeros after prefix and sign. Eg. `0x0004`, `#-03` + /// - Sign character before xor after radix prefix. Eg. `-#2`, `x+4` + /// + /// Returns `Ok(None)` (not an integer) for: + /// - Empty token + /// - Non-decimal radix prefix, with no zero before it, and non-digits after it. Eg. `xLabel`, `o` + /// + /// Returns `Err` (invalid integer and invalid token) for: + /// - Invalid digits for the given radix + /// - Decimal radix prefix `#` with zeros before it. Eg. `0#2` + /// - Decimal radix prefix `#` with no digits after it. Eg. `#` + /// - Multiple sign characters (before or after prefix) + /// - Missing sign character '-' or '+', if `require_sign == true` + /// - Multiple zeros before radix prefix. Eg. `00x4` + /// - Absolute value out of bounds for `i32`. (Does *NOT* check if integer fits in specific bit size) + fn next_integer_token(&mut self, require_sign: bool) -> Result, ValueError> { + self.reset_head(); + // Don't skip whitespace + + // Take sign BEFORE prefix + let first_sign: Option = self.next_integer_sign(); + + // Take optional prefix + let Some((radix, has_leading_zeros, prefix_is_symbol)) = self.next_integer_prefix()? else { + // Sign was already given, so it must be an invalid token + if first_sign.is_some() { + return Err(ValueError::MalformedInteger {}); + } + return Ok(None); + }; + + // Take sign AFTER prefix + let second_sign = self.next_integer_sign(); + let sign = match (first_sign, second_sign) { + (Some(sign), None) => Some(sign), + (None, Some(sign)) => Some(sign), + (None, None) => { + if require_sign { + return Err(ValueError::MalformedInteger {}); + } + None + } + // Disallow multiple sign characters: '-x-...', '++...', etc + (Some(_), Some(_)) => return Err(ValueError::MalformedInteger {}), + }; + + // Check next character is digit + if self.peek().is_none_or(|ch| radix.parse_digit(ch).is_none()) { + // Sign, '#', or pre-prefix zeros were given, so it must be an invalid integer token + if sign.is_some() || has_leading_zeros || prefix_is_symbol { + return Err(ValueError::MalformedInteger {}); + } + return Ok(None); + }; + + // Take digits until non-digit character + // Note that this loop handles post-prefix leading zeros like any other digit + let mut integer: i32 = 0; + while let Some(ch) = self.peek() { + if self.is_end_of_argument() { + break; + } + let Some(digit) = radix.parse_digit(ch) else { + return Err(ValueError::MalformedInteger {}); + }; + self.next(); + + // Re-checked later on convert to smaller int types + if integer > i32::MAX / radix as i32 { + return Err(ValueError::IntegerTooLarge {}); + } + + integer *= radix as i32; + integer += digit as i32; + } + if let Some(sign) = sign { + integer *= sign as i32; + } + + if !self.is_end_of_argument() { + return Err(ValueError::MalformedInteger {}); + } + self.set_base(); + Ok(Some(integer)) + } + + /// Consume the sign character for an integer. + /// Must only be called by `next_token_integer`. + fn next_integer_sign(&mut self) -> Option { + // Don't reset head + // Don't skip whitespace + + let sign = match self.peek() { + Some('-') => Sign::Negative, + Some('+') => Sign::Positive, + _ => return None, + }; + + self.next(); + self.set_base(); + Some(sign) + } + + /// Get radix from integer prefix. + /// Must only be called by `next_token_integer`. + /// + /// Returns radix, whether leading zeros are included, and whether radix prefix is a + /// non-alphabetic symbol (i.e. `#`). + // TODO(refactor): Possibly return a named struct type + fn next_integer_prefix(&mut self) -> Result, ValueError> { + // Don't reset head + // Don't skip whitespace + + // Take single leading zero before prefix + let has_leading_zeros = self.peek().is_some_and(|ch| ch == '0'); + if has_leading_zeros { + self.next(); + } + + // Number is all zeroes (no radix prefix) + // Zeroes were taken as leading zeros + if has_leading_zeros && self.is_end_of_argument() { + self.reset_head(); + return Ok(Some((Radix::Decimal, true, false))); + } + + let mut next_char = true; // Whether to increment head for prefix character + let (radix, prefix_is_symbol) = match self.peek() { + Some('#') => { + // Disallow '0#...' + if has_leading_zeros { + return Err(ValueError::MalformedInteger {}); + } + (Radix::Decimal, true) + } + // Allow 'b...' or 'x...' + // Caller must check next characters are valid digits in the radix, so as to not parse + // non-integer tokens like 'xLabel' as integers (and fail) + Some('b' | 'B') => (Radix::Binary, false), + Some('o' | 'O') => (Radix::Octal, false), + Some('x' | 'X') => (Radix::Hex, false), + // No prefix. Don't skip character + Some('0'..='9') => { + next_char = false; + (Radix::Decimal, false) + } + Some('-' | '+') => { + // Disallow '0-...' and '0+...' + // Disallow '--...', '-+...', etc + return Err(ValueError::MalformedInteger {}); + } + // Not an integer + _ => return Ok(None), + }; + if next_char { + self.next(); // Skip prefix character + } + + // Don't set base; might not be an integer yet + Ok(Some((radix, has_leading_zeros, prefix_is_symbol))) + } + + /// Returns `true` if the given character can appear at the start of a label. + fn label_can_start_with(ch: char) -> bool { + matches!(ch, 'a'..='z' | 'A'..='Z' | '_') + } + /// Returns `true` if the given character can appear as a subsequent character of a label. + fn label_can_contain(ch: char) -> bool { + matches!(ch, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_') + } + + /// Consume the next [`Label`] argument. + fn next_label_token(&mut self) -> Result, ValueError> { + self.reset_head(); + // Don't skip whitespace + + // Check first character can begin a label + if !self.next().is_some_and(Self::label_can_start_with) { + return Ok(None); + }; + // Take characters until non-alphanumeric + while self.peek().is_some_and(Self::label_can_contain) { + self.next(); + } + + let label = self.take().to_string(); + let offset = resize_int(self.next_integer_token(true)?.unwrap_or(0))?; + + if !self.is_end_of_argument() { + return Err(ValueError::MalformedLabel {}); + } + self.set_base(); + Ok(Some(Label { + name: label, + offset, + })) + } + + /// Parse and consume the next PC offset argument. + fn next_pc_offset(&mut self) -> Result, ValueError> { + self.reset_head(); + // Don't skip whitespace + + if !self.next().is_some_and(|ch| ch == '^') { + return Ok(None); + } + + self.set_base(); + let offset = resize_int(self.next_integer_token(false)?.unwrap_or(0))?; + + debug_assert!( + self.is_end_of_argument(), + "should have consumed characters until end of argument, whether integer succesfully parsed or not"); + self.set_base(); + Ok(Some(offset)) + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn many_arguments_works() { + let line = " name -54 r3 0x5812 Foo name2 Bar+0x04 4209"; + let mut iter = CommandIter::from(line); + + let argument_name = "dummy"; + + assert_eq!(iter.next_command_name_part(), Some("name")); + assert_eq!( + iter.next_argument(argument_name), + Ok(Some(Argument::Integer(-54))) + ); + assert_eq!( + iter.next_argument(argument_name), + Ok(Some(Argument::Register(Register::R3))) + ); + assert_eq!( + iter.next_argument(argument_name), + Ok(Some(Argument::Integer(0x5812))) + ); + assert_eq!( + iter.next_argument(argument_name), + Ok(Some(Argument::Label(Label { + name: "Foo".into(), + offset: 0, + }))) + ); + assert_eq!(iter.next_command_name_part(), Some("name2")); + assert_eq!( + iter.next_argument(argument_name), + Ok(Some(Argument::Label(Label { + name: "Bar".into(), + offset: 0x04, + }))) + ); + assert_eq!( + iter.next_argument(argument_name), + Ok(Some(Argument::Integer(4209))) + ); + assert_eq!(iter.next_argument(argument_name), Ok(None)); + assert_eq!(iter.next_argument(argument_name), Ok(None)); + } + + macro_rules! expect_tokens { + ( $method:ident ($($args:tt)*), $input:expr, $($expected:tt)* ) => {{ + eprintln!("Test input: <{}>", $input); + let mut iter = CommandIter::from($input); + let result = iter.$method($($args)*); + expect_tokens!(@expected result, $($expected)*); + }}; + (@expected $result:expr, Err(_)) => { + assert!($result.is_err()); + }; + (@expected $result:expr, $expected:expr) => { + assert_eq!($result, $expected, stringify!($expected)); + }; + } + + macro_rules! label { + ( $name:expr $(, $offset:expr )? $(,)? ) => { + Label { + name: ($name).into(), + offset: label!(@offset $($offset)?), + } + }; + (@offset $offset:expr) => { $offset }; + (@offset) => { 0 }; + } + + #[test] + fn next_argument_works() { + let argument_name = "dummy"; + macro_rules! expect_argument { ( $($x:tt)* ) => { + expect_tokens!(next_argument(argument_name), $($x)*); + }} + expect_argument!("", Ok(None)); + expect_argument!(" ", Ok(None)); + expect_argument!("r0", Ok(Some(Argument::Register(Register::R0)))); + expect_argument!(" R3 Foo", Ok(Some(Argument::Register(Register::R3)))); + expect_argument!("123", Ok(Some(Argument::Integer(123)))); + expect_argument!(" 123 ", Ok(Some(Argument::Integer(123)))); + expect_argument!("123 Foo", Ok(Some(Argument::Integer(123)))); + expect_argument!("0x-853", Ok(Some(Argument::Integer(-0x853)))); + expect_argument!("Foo ", Ok(Some(Argument::Label(label!("Foo"))))); + expect_argument!("Foo-23", Ok(Some(Argument::Label(label!("Foo", -23))))); + expect_argument!(" Foo 23", Ok(Some(Argument::Label(label!("Foo"))))); + } + + #[test] + #[should_panic] + fn semicolon_panics() { + let argument_name = "dummy"; + expect_tokens!(next_argument(argument_name), " ; ", Err(_)); + } + + #[test] + fn next_register_works() { + macro_rules! expect_register { ( $($x:tt)* ) => { + expect_tokens!(next_register(), $($x)*); + }} + + expect_register!("", None); + expect_register!("a", None); + expect_register!("rn", None); + expect_register!("r8", None); + expect_register!("R0n", None); + expect_register!("r0n", None); + expect_register!("r0", Some(Register::R0)); + expect_register!("R7", Some(Register::R7)); + } + + #[test] + fn next_integer_token_works() { + macro_rules! expect_integer { ( $require_sign:expr, $($x:tt)* ) => { + expect_tokens!(next_integer_token($require_sign), $($x)*); + }} + + // These tests cover all edge cases which I can think of + // Invalid or non-integers + expect_integer!(false, "", Ok(None)); // Non-integer + expect_integer!(false, "a", Ok(None)); + expect_integer!(false, "z", Ok(None)); + expect_integer!(false, "&", Ok(None)); + expect_integer!(false, ",", Ok(None)); + expect_integer!(false, "b2", Ok(None)); + expect_integer!(false, "o8", Ok(None)); + expect_integer!(false, "xg", Ok(None)); + expect_integer!(false, "b", Ok(None)); + expect_integer!(false, "o", Ok(None)); + expect_integer!(false, "x", Ok(None)); + expect_integer!(false, "-", Err(_)); // Invalid integers + expect_integer!(false, "+", Err(_)); + expect_integer!(false, "#", Err(_)); + expect_integer!(false, "#-", Err(_)); + expect_integer!(false, "-#", Err(_)); + expect_integer!(false, "-#-", Err(_)); + expect_integer!(false, "-#-24", Err(_)); + expect_integer!(false, "0#0", Err(_)); + expect_integer!(false, "0#24", Err(_)); + expect_integer!(false, "-0#24", Err(_)); + expect_integer!(false, "0#-24", Err(_)); + expect_integer!(false, "-0#-24", Err(_)); + expect_integer!(false, "x-", Err(_)); + expect_integer!(false, "-x", Err(_)); + expect_integer!(false, "-x-", Err(_)); + expect_integer!(false, "-x-24", Err(_)); + expect_integer!(false, "0x", Err(_)); + expect_integer!(false, "0x-", Err(_)); + expect_integer!(false, "-0x", Err(_)); + expect_integer!(false, "-0x-", Err(_)); + expect_integer!(false, "-0x-24", Err(_)); + expect_integer!(false, "0-x24", Err(_)); + expect_integer!(false, "00x4", Err(_)); + expect_integer!(false, "##", Err(_)); // Invalid digit for decimal base + expect_integer!(false, "-##", Err(_)); + expect_integer!(false, "#b", Err(_)); + expect_integer!(false, "#-b", Err(_)); + expect_integer!(false, "-#b", Err(_)); + expect_integer!(false, "0b2", Err(_)); // Invalid digit for base + expect_integer!(false, "0o8", Err(_)); + expect_integer!(false, "0xg", Err(_)); + expect_integer!(false, "-b2", Err(_)); + expect_integer!(false, "-o8", Err(_)); + expect_integer!(false, "-xg", Err(_)); + expect_integer!(false, "b-2", Err(_)); + expect_integer!(false, "o-8", Err(_)); + expect_integer!(false, "x-g", Err(_)); + expect_integer!(false, "--4", Err(_)); // Multiple sign characters + expect_integer!(false, "-+4", Err(_)); + expect_integer!(false, "++4", Err(_)); + expect_integer!(false, "+-4", Err(_)); + expect_integer!(false, "#--4", Err(_)); + expect_integer!(false, "#-+4", Err(_)); + expect_integer!(false, "#++4", Err(_)); + expect_integer!(false, "#+-4", Err(_)); + expect_integer!(false, "-#-4", Err(_)); + expect_integer!(false, "-#+4", Err(_)); + expect_integer!(false, "+#+4", Err(_)); + expect_integer!(false, "+#-4", Err(_)); + expect_integer!(false, "--#4", Err(_)); + expect_integer!(false, "-+#4", Err(_)); + expect_integer!(false, "++#4", Err(_)); + expect_integer!(false, "+-#4", Err(_)); + expect_integer!(true, "--4", Err(_)); + expect_integer!(true, "#--4", Err(_)); + expect_integer!(true, "+#-4", Err(_)); + expect_integer!(true, "+-#4", Err(_)); + expect_integer!(true, "#4", Err(_)); // Missing sign character + expect_integer!(true, "x4", Err(_)); + // Simple bounds check (it is not supposed to be super accurate) + expect_integer!(false, "x80000000", Err(_)); + expect_integer!(false, "x7fffffff", Ok(Some(0x7fffffff))); + expect_integer!(false, "x-7fffffff", Ok(Some(-0x7fffffff))); + expect_integer!(false, "x-80000000", Err(_)); + // Decimal + expect_integer!(false, "0", Ok(Some(0))); + expect_integer!(false, "00", Ok(Some(0))); + expect_integer!(false, "#0", Ok(Some(0))); + expect_integer!(false, "#00", Ok(Some(0))); + expect_integer!(false, "-#0", Ok(Some(0))); + expect_integer!(false, "+#0", Ok(Some(0))); + expect_integer!(false, "-#00", Ok(Some(0))); + expect_integer!(false, "#-0", Ok(Some(0))); + expect_integer!(false, "#+0", Ok(Some(0))); + expect_integer!(false, "#-00", Ok(Some(0))); + expect_integer!(false, "4", Ok(Some(4))); + expect_integer!(false, "+4", Ok(Some(4))); + expect_integer!(false, "4284", Ok(Some(4284))); + expect_integer!(false, "004284", Ok(Some(4284))); + expect_integer!(false, "#4", Ok(Some(4))); + expect_integer!(false, "#4284", Ok(Some(4284))); + expect_integer!(false, "#004284", Ok(Some(4284))); + expect_integer!(false, "-4", Ok(Some(-4))); + expect_integer!(false, "+4", Ok(Some(4))); + expect_integer!(false, "-4284", Ok(Some(-4284))); + expect_integer!(false, "-004284", Ok(Some(-4284))); + expect_integer!(false, "-#4", Ok(Some(-4))); + expect_integer!(false, "+#4", Ok(Some(4))); + expect_integer!(false, "-#4284", Ok(Some(-4284))); + expect_integer!(false, "-#004284", Ok(Some(-4284))); + expect_integer!(false, "#-4", Ok(Some(-4))); + expect_integer!(false, "#+4", Ok(Some(4))); + expect_integer!(false, "#-4284", Ok(Some(-4284))); + expect_integer!(false, "#-004284", Ok(Some(-4284))); + expect_integer!(true, "-4", Ok(Some(-4))); + expect_integer!(true, "+4", Ok(Some(4))); + expect_integer!(true, "-4284", Ok(Some(-4284))); + expect_integer!(true, "-004284", Ok(Some(-4284))); + expect_integer!(true, "-#4", Ok(Some(-4))); + expect_integer!(true, "+#4", Ok(Some(4))); + expect_integer!(true, "-#4284", Ok(Some(-4284))); + expect_integer!(true, "-#004284", Ok(Some(-4284))); + expect_integer!(true, "#-4", Ok(Some(-4))); + expect_integer!(true, "#+4", Ok(Some(4))); + expect_integer!(true, "#-4284", Ok(Some(-4284))); + expect_integer!(true, "#-004284", Ok(Some(-4284))); + expect_integer!(true, "4", Err(_)); + expect_integer!(true, "4284", Err(_)); + expect_integer!(true, "004284", Err(_)); + expect_integer!(true, "#4", Err(_)); + expect_integer!(true, "#4284", Err(_)); + expect_integer!(true, "#004284", Err(_)); + expect_integer!(true, "#4", Err(_)); + // Hex + expect_integer!(false, "x0", Ok(Some(0x0))); + expect_integer!(false, "x00", Ok(Some(0x0))); + expect_integer!(false, "0x0", Ok(Some(0x0))); + expect_integer!(false, "0x00", Ok(Some(0x0))); + expect_integer!(false, "-x0", Ok(Some(0x0))); + expect_integer!(false, "+x0", Ok(Some(0x0))); + expect_integer!(false, "-x00", Ok(Some(0x0))); + expect_integer!(false, "0x-0", Ok(Some(0x0))); + expect_integer!(false, "0x-00", Ok(Some(0x0))); + expect_integer!(false, "-0x0", Ok(Some(0x0))); + expect_integer!(false, "-0x00", Ok(Some(0x0))); + expect_integer!(false, "x4", Ok(Some(0x4))); + expect_integer!(false, "x004", Ok(Some(0x4))); + expect_integer!(false, "x429", Ok(Some(0x429))); + expect_integer!(false, "0x4", Ok(Some(0x4))); + expect_integer!(false, "0x004", Ok(Some(0x4))); + expect_integer!(false, "0x429", Ok(Some(0x429))); + expect_integer!(false, "-x4", Ok(Some(-0x4))); + expect_integer!(false, "+x4", Ok(Some(0x4))); + expect_integer!(false, "-x004", Ok(Some(-0x4))); + expect_integer!(false, "-x429", Ok(Some(-0x429))); + expect_integer!(false, "-0x4", Ok(Some(-0x4))); + expect_integer!(false, "+0x4", Ok(Some(0x4))); + expect_integer!(false, "-0x004", Ok(Some(-0x4))); + expect_integer!(false, "-0x429", Ok(Some(-0x429))); + expect_integer!(false, "x-4", Ok(Some(-0x4))); + expect_integer!(false, "x-004", Ok(Some(-0x4))); + expect_integer!(false, "x+004", Ok(Some(0x4))); + expect_integer!(false, "x-429", Ok(Some(-0x429))); + expect_integer!(false, "-0x4", Ok(Some(-0x4))); + expect_integer!(false, "-0x004", Ok(Some(-0x4))); + expect_integer!(false, "-0x429", Ok(Some(-0x429))); + expect_integer!(false, "+0x429", Ok(Some(0x429))); + expect_integer!(true, "-x4", Ok(Some(-0x4))); + expect_integer!(true, "+x4", Ok(Some(0x4))); + expect_integer!(true, "-x004", Ok(Some(-0x4))); + expect_integer!(true, "-x429", Ok(Some(-0x429))); + expect_integer!(true, "-0x4", Ok(Some(-0x4))); + expect_integer!(true, "+0x4", Ok(Some(0x4))); + expect_integer!(true, "-0x004", Ok(Some(-0x4))); + expect_integer!(true, "-0x429", Ok(Some(-0x429))); + expect_integer!(true, "x-4", Ok(Some(-0x4))); + expect_integer!(true, "x-004", Ok(Some(-0x4))); + expect_integer!(true, "x+004", Ok(Some(0x4))); + expect_integer!(true, "x-429", Ok(Some(-0x429))); + expect_integer!(true, "-0x4", Ok(Some(-0x4))); + expect_integer!(true, "-0x004", Ok(Some(-0x4))); + expect_integer!(true, "-0x429", Ok(Some(-0x429))); + expect_integer!(true, "+0x429", Ok(Some(0x429))); + expect_integer!(true, "x4", Err(_)); + expect_integer!(true, "x004", Err(_)); + expect_integer!(true, "x429", Err(_)); + expect_integer!(true, "0x4", Err(_)); + expect_integer!(true, "0x004", Err(_)); + expect_integer!(true, "0x429", Err(_)); + expect_integer!(true, "x4", Err(_)); + expect_integer!(true, "x004", Err(_)); + expect_integer!(true, "x429", Err(_)); + expect_integer!(true, "0x4", Err(_)); + expect_integer!(true, "0x004", Err(_)); + expect_integer!(true, "0x429", Err(_)); + expect_integer!(true, "0x429", Err(_)); + // Octal (0o427==0x117) + expect_integer!(false, "o0", Ok(Some(0x0))); + expect_integer!(false, "o00", Ok(Some(0x0))); + expect_integer!(false, "0o0", Ok(Some(0x0))); + expect_integer!(false, "0o00", Ok(Some(0x0))); + expect_integer!(false, "-o0", Ok(Some(0x0))); + expect_integer!(false, "-o00", Ok(Some(0x0))); + expect_integer!(false, "o-0", Ok(Some(0x0))); + expect_integer!(false, "o-00", Ok(Some(0x0))); + expect_integer!(false, "-0o0", Ok(Some(0x0))); + expect_integer!(false, "-0o00", Ok(Some(0x0))); + expect_integer!(false, "0o-0", Ok(Some(0x0))); + expect_integer!(false, "0o-00", Ok(Some(0x0))); + expect_integer!(false, "o4", Ok(Some(0x4))); + expect_integer!(false, "o004", Ok(Some(0x4))); + expect_integer!(false, "o427", Ok(Some(0x117))); + expect_integer!(false, "0o4", Ok(Some(0x4))); + expect_integer!(false, "0o004", Ok(Some(0x4))); + expect_integer!(false, "0o427", Ok(Some(0x117))); + expect_integer!(false, "-o4", Ok(Some(-0x4))); + expect_integer!(false, "-o004", Ok(Some(-0x4))); + expect_integer!(false, "-o427", Ok(Some(-0x117))); + expect_integer!(false, "-0o4", Ok(Some(-0x4))); + expect_integer!(false, "-0o004", Ok(Some(-0x4))); + expect_integer!(false, "-0o427", Ok(Some(-0x117))); + expect_integer!(false, "o-4", Ok(Some(-0x4))); + expect_integer!(false, "o-004", Ok(Some(-0x4))); + expect_integer!(false, "o-427", Ok(Some(-0x117))); + expect_integer!(false, "0o-4", Ok(Some(-0x4))); + expect_integer!(false, "0o-004", Ok(Some(-0x4))); + expect_integer!(false, "0o-427", Ok(Some(-0x117))); + // Binary + expect_integer!(false, "b0", Ok(Some(0b0))); + expect_integer!(false, "b00", Ok(Some(0b0))); + expect_integer!(false, "0b0", Ok(Some(0b0))); + expect_integer!(false, "0b00", Ok(Some(0b0))); + expect_integer!(false, "-b0", Ok(Some(0b0))); + expect_integer!(false, "-b00", Ok(Some(0b0))); + expect_integer!(false, "b-0", Ok(Some(0b0))); + expect_integer!(false, "b-00", Ok(Some(0b0))); + expect_integer!(false, "-0b0", Ok(Some(0b0))); + expect_integer!(false, "-0b00", Ok(Some(0b0))); + expect_integer!(false, "0b-0", Ok(Some(0b0))); + expect_integer!(false, "0b-00", Ok(Some(0b0))); + expect_integer!(false, "b1", Ok(Some(0b1))); + expect_integer!(false, "b101", Ok(Some(0b101))); + expect_integer!(false, "b00101", Ok(Some(0b101))); + expect_integer!(false, "0b1", Ok(Some(0b1))); + expect_integer!(false, "0b101", Ok(Some(0b101))); + expect_integer!(false, "0b00101", Ok(Some(0b101))); + expect_integer!(false, "-b1", Ok(Some(-0b1))); + expect_integer!(false, "-b101", Ok(Some(-0b101))); + expect_integer!(false, "-b00101", Ok(Some(-0b101))); + expect_integer!(false, "b-1", Ok(Some(-0b1))); + expect_integer!(false, "b-101", Ok(Some(-0b101))); + expect_integer!(false, "b-00101", Ok(Some(-0b101))); + expect_integer!(false, "-0b1", Ok(Some(-0b1))); + expect_integer!(false, "-0b101", Ok(Some(-0b101))); + expect_integer!(false, "-0b00101", Ok(Some(-0b101))); + expect_integer!(false, "0b-1", Ok(Some(-0b1))); + expect_integer!(false, "0b-101", Ok(Some(-0b101))); + expect_integer!(false, "0b-00101", Ok(Some(-0b101))); + } + + #[test] + fn next_label_token_works() { + macro_rules! expect_label { ( $($x:tt)* ) => { + expect_tokens!(next_label_token(), $($x)*); + }} + + expect_label!("", Ok(None)); + expect_label!("0x1283", Ok(None)); + expect_label!("!@*)#", Ok(None)); + expect_label!("0Foo", Ok(None)); + expect_label!("Foo!", Err(_)); + expect_label!("F", Ok(Some(label!("F")))); + expect_label!("Foo", Ok(Some(label!("Foo")))); + expect_label!("_Foo", Ok(Some(label!("_Foo")))); + expect_label!("F_oo12", Ok(Some(label!("F_oo12")))); + expect_label!("Foo12_", Ok(Some(label!("Foo12_")))); + expect_label!("Foo+0", Ok(Some(label!("Foo", 0)))); + expect_label!("Foo-0", Ok(Some(label!("Foo", 0)))); + expect_label!("Foo+4", Ok(Some(label!("Foo", 4)))); + expect_label!("Foo-43", Ok(Some(label!("Foo", -43)))); + expect_label!("Foo+", Err(_)); + expect_label!("Foo-", Err(_)); + expect_label!("Foo ", Ok(Some(label!("Foo")))); + expect_label!("Foo+4 ", Ok(Some(label!("Foo", 4)))); + expect_label!("Foo-4 !!", Ok(Some(label!("Foo", -4)))); + expect_label!("Foo+ ", Err(_)); + expect_label!("Foo- ", Err(_)); + expect_label!("Foo -4", Ok(Some(label!("Foo")))); + expect_label!("Foo +4", Ok(Some(label!("Foo")))); + expect_label!("Foo+0x034", Ok(Some(label!("Foo", 0x34)))); + expect_label!("Foo-0o4", Ok(Some(label!("Foo", -4)))); + expect_label!("Foo-#24", Ok(Some(label!("Foo", -24)))); + expect_label!("Foo+#024", Ok(Some(label!("Foo", 24)))); + } +} diff --git a/src/debugger/source.rs b/src/debugger/source.rs new file mode 100644 index 0000000..0ec83fc --- /dev/null +++ b/src/debugger/source.rs @@ -0,0 +1,554 @@ +use std::fmt; +use std::fs::{self, File}; +use std::io::{self, BufRead, BufReader, IsTerminal, Read, Write}; + +use console::Key; + +use crate::output::DEBUGGER_PRIMARY_COLOR; +use crate::{dprint, dprintln, output::Output}; + +/// Read from argument first, if `Some`. Then read from stream. +// TODO(rename): to `CommandSource` +#[allow(private_interfaces)] // Perhaps a bad practice +#[derive(Debug)] +pub struct Source { + argument: Option, + stream: Stream, +} + +/// Stdin or interactive terminal. +#[derive(Debug)] +enum Stream { + Stdin(Stdin), + Terminal(Terminal), +} + +/// Command-line argument. +#[derive(Debug)] +struct Argument { + buffer: String, + /// Byte index. + cursor: usize, +} + +/// Stdin which is not attached to a terminal, i.e. piped. +#[derive(Debug)] +struct Stdin { + stdin: io::Stdin, + /// Command must be stored somewhere to be referenced. + buffer: String, +} + +/// Interactive unbuffered terminal. +// TODO(feat): Support CTRL+Arrow keybinds +#[derive(Debug)] +struct Terminal { + term: console::Term, + buffer: String, + /// Byte index. + cursor: usize, + /// Visible line cursor in terminal (char index, not byte index). + visible_cursor: usize, + /// History list and file. + history: TerminalHistory, +} + +/// All history information for `Terminal`. +#[derive(Debug)] +struct TerminalHistory { + list: Vec, + /// Focused item in history, or new entry if index==length. + index: usize, + /// `None` indicates failure to open file. + file: Option, +} + +const PROMPT: &str = "DEBUGGER> "; + +/// Print prompt and command. +fn echo_command(command: Option<&str>) { + // Echo prompt and command for non-terminal source + // Equivalent code found in terminal source + if !Output::is_minimal() || command.is_some() { + dprint!(Always, Normal, "\x1b[1m{}", PROMPT); + dprintln!( + Always, + Normal, + "{}", + command.unwrap_or("\x1b[3m(end of input)").trim() + ); + } +} + +/// A trait for objects which can yield a command, by iterating a string or reading a file. +pub trait SourceRead { + /// `None` indicates EOF. + /// Returned string slice MAY include leading or trailing whitespace. + fn read(&mut self) -> Option<&str>; +} + +impl Source { + pub fn from(argument: Option) -> Self { + Self { + argument: argument.map(Argument::from), + stream: Stream::new(), + } + } +} + +impl SourceRead for Source { + fn read(&mut self) -> Option<&str> { + // Always try to read from argument first + // If argument is `None`, or if read from argument returns `None`, then read from stream + // Note that `self.argument` cannot then be set to `None`, due to lifetime of returned value + if let Some(argument) = &mut self.argument { + if let Some(command) = argument.read() { + echo_command(Some(command)); + return Some(command); + } + } + self.stream.read() + } +} + +impl Stream { + pub fn new() -> Self { + let stdin = io::stdin(); + if stdin.is_terminal() { + return Self::Terminal(Terminal::new()); + } + Self::Stdin(Stdin::from(stdin)) + } +} + +impl SourceRead for Stream { + fn read(&mut self) -> Option<&str> { + match self { + Self::Stdin(stdin) => { + let command = stdin.read(); + echo_command(command); + command + } + Self::Terminal(terminal) => terminal.read(), + } + } +} + +impl Argument { + pub fn from(source: String) -> Self { + Self { + buffer: source, + cursor: 0, + } + } +} + +impl SourceRead for Argument { + fn read(&mut self) -> Option<&str> { + // EOF + if self.cursor >= self.buffer.len() { + return None; + } + + // Take characters until delimiter + let start = self.cursor; + let mut chars = self.buffer[self.cursor..].chars(); + while let Some(ch) = chars.next().filter(|ch| *ch != '\n' && *ch != ';') { + self.cursor += ch.len_utf8(); + } + + let end = self.cursor; + self.cursor += 1; // sizeof('\n' or ';') + + let command = self + .buffer + .get(start..end) + .expect("calculated incorrect character indexes"); + Some(command) + } +} + +impl Stdin { + pub fn from(stdin: io::Stdin) -> Self { + Self { + stdin, + buffer: String::new(), + } + } + + /// `None` indicates EOF. + fn read_char(&mut self) -> Option { + let mut buffer = [0; 1]; + if self.stdin.read(&mut buffer).unwrap() == 0 { + return None; + } + Some(buffer[0] as char) + } +} + +impl SourceRead for Stdin { + fn read(&mut self) -> Option<&str> { + self.buffer.clear(); + + // Take characters until delimiter + loop { + let Some(ch) = self.read_char() else { + if self.buffer.is_empty() { + return None; // First character is EOF + } + break; + }; + if ch == '\n' || ch == ';' { + break; + } + self.buffer.push(ch); + } + + Some(&self.buffer) + } +} + +impl Terminal { + pub fn new() -> Self { + Self { + term: console::Term::stdout(), + buffer: String::new(), + cursor: 0, + visible_cursor: 0, + history: TerminalHistory::new(), + } + } + + /// Returns `true` if current command is a new command, rather than a focused history item. + fn is_next(&self) -> bool { + debug_assert!( + self.history.index <= self.history.list.len(), + "index went past history" + ); + self.history.index >= self.history.list.len() + } + + /// Run before modifying `next`. + /// If focused on a historic item, clone it to `next` and update index. + fn update_next(&mut self) { + if self.is_next() { + return; + } + self.buffer = self + .history + .list + .get(self.history.index) + .expect("checked above") + .clone(); + self.history.index = self.history.list.len(); + } + + /// Get next or historic command, from index. + fn get_current(&self) -> &str { + if self.is_next() { + &self.buffer + } else { + self.history + .list + .get(self.history.index) + .expect("checked above") + } + } + + fn print_prompt(&mut self) { + // Clear line, print prompt, set cursor position + self.term.clear_line().unwrap(); + + // Print prompt and current input + // Equivalent code found in non-terminal source + if Output::is_minimal() { + write!(&mut self.term, "{}", PROMPT).unwrap(); + } else { + write!( + &mut self.term, + "\x1b[1;{}m{}\x1b[0m", + DEBUGGER_PRIMARY_COLOR, PROMPT, + ) + .unwrap(); + } + + // Inline `self.get_current()` due to borrowing issues + let current = if self.is_next() { + &self.buffer + } else { + self.history + .list + .get(self.history.index) + .expect("checked above") + }; + write!(self.term, "{}", current).unwrap(); + + self.term + .move_cursor_left( + self.get_current() + .chars() + .count() + .saturating_sub(self.visible_cursor), + ) + .unwrap(); + + self.term.flush().unwrap(); + } + + // Return of `true` indicates to break loop. + fn read_key(&mut self) -> bool { + let key = self.term.read_key().unwrap(); + match key { + Key::Enter | Key::Char('\n') => { + if self.is_next() && self.buffer.trim().is_empty() { + self.buffer.clear(); + self.visible_cursor = 0; + println!(); + } else { + self.update_next(); + return true; + } + } + + Key::Char(ch) => match ch { + // Ignore ASCII control characters + '\x00'..='\x1f' | '\x7f' => (), + + // Pasting should be automatically supported, since terminals simulate typing each + // character + _ => { + self.update_next(); + self.buffer.insert_char_index(self.visible_cursor, ch); + self.visible_cursor += 1; + } + }, + + Key::Backspace => { + self.update_next(); + if self.visible_cursor > 0 + && self.visible_cursor <= self.get_current().chars().count() + { + self.visible_cursor -= 1; + self.buffer.remove_char_index(self.visible_cursor); + } + } + Key::Del => { + self.update_next(); + if self.visible_cursor < self.get_current().chars().count() { + self.buffer.remove_char_index(self.visible_cursor); + } + } + + // Left/right in current input + Key::ArrowLeft => { + if self.visible_cursor > 0 { + self.visible_cursor -= 1; + } + } + Key::ArrowRight => { + if self.visible_cursor < self.get_current().chars().count() { + self.visible_cursor += 1; + } + } + + // Back/forth through history + Key::ArrowUp => { + if self.history.index > 0 { + self.history.index -= 1; + self.visible_cursor = self.get_current().chars().count(); + } + } + Key::ArrowDown => { + if self.history.index < self.history.list.len() { + self.history.index += 1; + self.visible_cursor = self.get_current().chars().count(); + } + } + + _ => (), + } + false + } + + /// Read entire (multi-command) line from terminal. + fn read_line(&mut self) { + self.buffer.clear(); + self.visible_cursor = 0; + + // Read keys until newline + loop { + self.print_prompt(); + if self.read_key() { + break; + } + } + println!(); + + debug_assert!( + !self.buffer.trim().is_empty(), + "should have looped until non-empty" + ); + + // Push to history if different to last command + if self + .history + .list + .last() + .is_none_or(|previous| previous != &self.buffer) + { + self.history.push(self.buffer.clone()); + } + // Always reset index to next command + self.history.index = self.history.list.len(); + } + + /// Returns next command from line buffer. + fn get_next_command(&mut self) -> &str { + let rest = &self.buffer[self.cursor..]; + match rest.find(';') { + // Multiple commands in buffer + // Take first command and update head index + Some(index) => { + self.cursor += index + 1; + &rest[..index] + } + // Rest of buffer is 1 command + // Take rest of buffer and reset head index + None => { + self.cursor = 0; + rest + } + } + } +} + +impl SourceRead for Terminal { + fn read(&mut self) -> Option<&str> { + // Reached end of line buffer: read new line + if self.cursor == 0 { + self.read_line(); + } + Some(self.get_next_command()) + } +} + +/// Extension trait for character-indexed string operations. +trait CharIndexed { + /// Insert a character at a character index. + fn insert_char_index(&mut self, char_index: usize, ch: char); + /// Remove a character at a character index. + fn remove_char_index(&mut self, char_index: usize) -> char; + + /// Returns the byte index from a character index, and the total character count. + fn count_chars_bytes(string: &str, char_index: usize) -> (usize, usize) { + let mut byte_index = string.len(); + let mut char_count = 0; + for (i, (j, _)) in string.char_indices().enumerate() { + if i == char_index { + byte_index = j; + } + char_count += 1; + } + (byte_index, char_count) + } +} + +impl CharIndexed for String { + fn insert_char_index(&mut self, char_index: usize, ch: char) { + let (byte_index, char_count) = Self::count_chars_bytes(self, char_index); + assert!(char_index <= char_count, "out-of-bounds char index"); + self.insert(byte_index, ch) + } + fn remove_char_index(&mut self, char_index: usize) -> char { + let (byte_index, char_count) = Self::count_chars_bytes(self, char_index); + assert!(char_index < char_count, "out-of-bounds char index"); + self.remove(byte_index) + } +} + +impl TerminalHistory { + const FILE_NAME: &str = "lace-debugger-history"; + + pub fn new() -> Self { + let mut file = Self::get_file(); + let list = Self::read_file(file.as_mut()); + let index = list.len(); + Self { list, index, file } + } + + /// Push command into list and write to file + pub fn push(&mut self, command: String) { + if let Some(file) = &mut self.file { + if writeln!(file, "{}", command).is_err() { + Self::report_error("Failed to write to file"); + } + } + self.list.push(command); + } + + /// Returns empty vector if failed to read. + fn read_file(file: Option<&mut File>) -> Vec { + let Some(file) = file else { + return Vec::new(); + }; + let mut history = Vec::new(); + for line in BufReader::new(file).lines() { + let Ok(line) = line else { + Self::report_error("Failed to read from file"); + break; + }; + history.push(line); + } + history + } + + /// Get file path and open file. + /// + /// Returns `None` if anything fails. + fn get_file() -> Option { + let Some(parent_dir) = dirs_next::cache_dir() else { + Self::report_error(format_args!( + "Cannot retrieve user cache directory. Eg. $XDG_CACHE_HOME" + )); + return None; + }; + if !parent_dir.is_dir() { + Self::report_error(format_args!( + "Parent directory is not a directory: {}", + parent_dir.display(), + )); + return None; + } + + let file_path = parent_dir.join(Self::FILE_NAME); + if file_path.exists() && !file_path.is_file() { + Self::report_error(format_args!( + "File exists but is not a file: {}", + file_path.display(), + )); + return None; + } + + match fs::OpenOptions::new() + .create(true) + .read(true) + .append(true) + .open(&file_path) + { + Ok(file) => Some(file), + Err(_error) => { + Self::report_error(format_args!("Failed to open file: {}", file_path.display(),)); + None + } + } + } + + fn report_error(message: impl fmt::Display) { + dprintln!( + Always, + Error, + "Error with debugger history file: {}", + message, + ); + } +} diff --git a/src/error.rs b/src/error.rs index fbf5b70..456c677 100644 --- a/src/error.rs +++ b/src/error.rs @@ -143,11 +143,12 @@ pub fn parse_generic_unexpected(src: &'static str, expected: &str, found: Token) } pub fn parse_eof(src: &'static str) -> Report { + let offset = src.len().checked_sub(1).unwrap_or(0); miette!( severity = Severity::Error, code = "parse::unexpected_eof", help = "you may be missing operands in your last statement", - labels = vec![LabeledSpan::at_offset(src.len() - 1, "here")], + labels = vec![LabeledSpan::at_offset(offset, "here")], "Unexpected end of file", ) .with_source_code(src) diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs index bf07ff7..27edafd 100644 --- a/src/lexer/mod.rs +++ b/src/lexer/mod.rs @@ -25,15 +25,22 @@ impl Token { Token { kind, span } } - pub fn byte(val: u16) -> Self { + pub fn byte(val: u16, span: Span) -> Self { Token { kind: TokenKind::Byte(val), - span: Span::dummy(), + span, } } - pub fn nullbyte() -> Self { - Token::byte(0) + pub fn nullbyte(span: Span) -> Self { + Token::byte(0, span) + } + + pub fn breakpoint(span: Span) -> Self { + Token { + kind: TokenKind::Breakpoint, + span, + } } } @@ -57,6 +64,7 @@ pub enum TokenKind { Reg(Register), /// Preprocessor raw values Byte(u16), + Breakpoint, Whitespace, Comment, Eof, @@ -71,31 +79,18 @@ impl Display for TokenKind { TokenKind::Lit(_) => "literal", TokenKind::Dir(_) => "preprocessor directive", TokenKind::Reg(_) => "register", - TokenKind::Whitespace | TokenKind::Comment | TokenKind::Eof | TokenKind::Byte(_) => { - unreachable!("whitespace, comment, eof, byte attempted to be displayed") + TokenKind::Whitespace + | TokenKind::Comment + | TokenKind::Eof + | TokenKind::Byte(_) + | TokenKind::Breakpoint => { + unreachable!("whitespace, comment, eof, byte, breakpoitn attempted to be displayed") } }; f.write_str(lit) } } -#[allow(dead_code)] -pub fn tokenize(input: &'static str) -> impl Iterator> + '_ { - let mut cursor = Cursor::new(input); - std::iter::from_fn(move || loop { - let token = cursor.advance_token(); - if let Ok(inner) = &token { - if inner.kind == TokenKind::Whitespace { - continue; - } - if inner.kind == TokenKind::Eof { - return None; - } - } - return Some(token); - }) -} - /// Test if a character is considered to be whitespace, including commas. pub(crate) fn is_whitespace(c: char) -> bool { char::is_ascii_whitespace(&c) || matches!(c, ',' | ':') @@ -306,6 +301,7 @@ impl Cursor<'_> { ".stringz" => Some(Dir(Stringz)), ".blkw" => Some(Dir(Blkw)), ".fill" => Some(Dir(Fill)), + ".break" => Some(Dir(Break)), _ => None, } } diff --git a/src/lib.rs b/src/lib.rs index 35c0da9..734ad84 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,7 +6,11 @@ pub use air::Air; // Running mod runtime; -pub use runtime::RunState; +pub use runtime::RunEnvironment; +#[macro_use] +mod debugger; +pub use debugger::DebuggerOptions; +mod output; // Reset global state for watch mod symbol; diff --git a/src/main.rs b/src/main.rs index fd1cb7c..d417263 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,8 +13,8 @@ use hotwatch::{ }; use miette::{bail, IntoDiagnostic, Result}; -use lace::reset_state; -use lace::{Air, RunState, StaticSource}; +use lace::{reset_state, DebuggerOptions}; +use lace::{Air, RunEnvironment, StaticSource}; /// Lace is a complete & convenient assembler toolchain for the LC3 assembly language. #[derive(Parser)] @@ -31,8 +31,22 @@ struct Args { enum Command { /// Run text `.asm` or binary `.lc3` file directly and output to terminal Run { - /// .asm file to run + /// `.asm` or `.lc3` file to run name: PathBuf, + /// Produce minimal output, suited for blackbox tests + #[arg(short, long)] + minimal: bool, + }, + /// Run text `.asm` file directly and with debugger + Debug { + /// `.asm` file to run + name: PathBuf, + /// Read debugger commands from argument + #[arg(short, long)] + command: Option, + /// Produce minimal output, suited for blackbox tests + #[arg(short, long)] + minimal: bool, }, /// Create binary `.lc3` file to run later or view compiled data Compile { @@ -68,10 +82,26 @@ fn main() -> miette::Result<()> { let args = Args::parse(); lace::env::init(); + miette::set_hook(Box::new(|_| { + Box::new( + miette::MietteHandlerOpts::new() // + .context_lines(6) + .build(), + ) + }))?; + if let Some(command) = args.command { match command { - Command::Run { name } => { - run(&name)?; + Command::Run { name, minimal } => { + run(&name, None, minimal)?; + Ok(()) + } + Command::Debug { + name, + command, + minimal, + } => { + run(&name, Some(DebuggerOptions { command }), minimal)?; Ok(()) } Command::Compile { name, dest } => { @@ -91,7 +121,7 @@ fn main() -> miette::Result<()> { } // Write lines - for stmt in air { + for stmt in &air { let _ = file.write(&stmt.emit()?.to_be_bytes()); } @@ -169,7 +199,7 @@ fn main() -> miette::Result<()> { } } else { if let Some(path) = args.path { - run(&path)?; + run(&path, None, false)?; Ok(()) } else { println!("\n~ lace v{VERSION} - Copyright (c) 2024 Artemis Rosman ~"); @@ -204,11 +234,15 @@ where println!("{left:>12} {right}"); } -fn run(name: &PathBuf) -> Result<()> { +fn run(name: &PathBuf, debugger_opts: Option, minimal: bool) -> Result<()> { file_message(MsgColor::Green, "Assembling", &name); let mut program = if let Some(ext) = name.extension() { match ext.to_str().unwrap() { "lc3" | "obj" => { + if debugger_opts.is_some() { + bail!("Cannot use debugger on non-assembly file"); + } + // Read to byte buffer let mut file = File::open(&name).into_diagnostic()?; let f_size = file.metadata().unwrap().len(); @@ -223,12 +257,15 @@ fn run(name: &PathBuf) -> Result<()> { .chunks_exact(2) .map(|word| u16::from_be_bytes([word[0], word[1]])) .collect(); - RunState::from_raw(&u16_buf)? + RunEnvironment::from_raw(&u16_buf)? } "asm" => { let contents = StaticSource::new(fs::read_to_string(&name).into_diagnostic()?); let air = assemble(&contents)?; - RunState::try_from(air)? + match debugger_opts { + None => RunEnvironment::try_from(&air)?, + Some(opts) => RunEnvironment::try_from_with_debugger(air, opts)?, + } } _ => { bail!("File has unknown extension. Exiting...") @@ -238,6 +275,8 @@ fn run(name: &PathBuf) -> Result<()> { bail!("File has no extension. Exiting..."); }; + program.set_minimal(minimal); + message(MsgColor::Green, "Running", "emitted binary"); program.run(); diff --git a/src/output.rs b/src/output.rs new file mode 100644 index 0000000..185d796 --- /dev/null +++ b/src/output.rs @@ -0,0 +1,475 @@ +use std::cell::RefCell; +use std::fmt::{self, Write as _}; + +use crate::runtime::RunState; + +/// Main color used by [`Output::Debugger`]. +/// +/// Note that color depends on the [`Category`] used, and can be overridden. +pub const DEBUGGER_PRIMARY_COLOR: &str = "34"; + +/// Print to [`Output::Debugger`]. +#[macro_export] +macro_rules! dprint { + ( $condition:expr, $category:expr, $fmt:expr $(, $($tt:tt)* )? ) => {{ + // This is not very hygenic. But makes macro more ergonomic to use. + #[allow(unused_imports)] + use $crate::output::{Condition::*, Category::*}; + + $crate::output::Output::Debugger($crate::output::Condition::Sometimes, $category) + .print_category($category); + $crate::output::Output::Debugger($condition, $category) + .print(format_args!($fmt $(, $($tt)* )?) + ); + eprint!("\x1b[0m"); // This is not ideal here + }}; + + // Trigger type error if missing condition/kind + ( $fmt:literal $($tt:tt)* ) => {{ + $crate::output::Output::Debugger($fmt); + }}; + + // TODO(refactor): Match more patterns for helpful compile errors +} + +/// Print to [`Output::Debugger`], with a newline. +#[macro_export] +macro_rules! dprintln { + ( $condition:expr ) => {{ + $crate::dprint!( + $condition, + $crate::output::Category::Normal, + "\n" + ); + }}; + + ( $condition:expr, $category:expr, $fmt:expr $(, $($tt:tt)* )? ) => {{ + $crate::dprint!( + $condition, + $category, + concat!($fmt, "\n") + $(, $($tt)* )? + ); + }}; + + ( $condition:expr, $category:expr ) => {{ + compile_error!("Either remove the category or include a format string"); + }}; + + // Let `dprint` issue any other compiler errors + ( $($tt:tt)* ) => {{ + $crate::dprint!($($tt)*); + }}; +} + +/// Output channel. +#[derive(Clone, Copy, Debug)] +pub enum Output { + /// For program output. + /// Writes to `stdout`. + /// No ANSI color/style attributes are applied. + Normal, + /// For debugger output. + /// Writes to `stderr`. + /// ANSI color/style attributes, and/or line decorations, will be applied depending on [`Category`]. + /// Whether or not anything is printed depends on the [`Condition`]. + Debugger(Condition, Category), +} + +/// A condition of `Sometimes` will not print anything if `Output::is_minimal() == true`. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Condition { + /// Always print. + Always, + /// Only print if `Output::minimal() == false`. + Sometimes, +} + +#[derive(Clone, Copy, Debug, Default, PartialEq)] +pub enum Category { + /// No decoration. + #[default] + Normal, + /// Implies that NO change was made to memory/registers/breakpoints/etc. + Info, + /// Implies that a change was made to memory/registers/breakpoints/etc. + Warning, + /// An error occurred while parsing or executing a command. + Error, + /// Use `{` and `}` to delimit ANSI color/style attributes (instead of `\x1b[` and `m`) + Special, +} + +impl Output { + thread_local! { + /// Only access using [`Output::is_minimal`] and [`Output::set_minimal`]. + static IS_MINIMAL: RefCell = const { RefCell::new(false) }; + } + /// Whether output willl be printed 'minimally'. + /// + /// Should return `true` iff `--minimal` argument was given. + pub fn is_minimal() -> bool { + Self::IS_MINIMAL.with(|value| *value.borrow()) + } + /// Set whether output will be printed 'minimally'. + /// + /// Use this method to handle `--minimal` argument. + pub fn set_minimal(new_value: bool) -> bool { + Self::IS_MINIMAL.with(|value| value.replace(new_value)) + } + + /// If cursor is NOT at the start of a line, then start a new line (ie. print '\n'). + /// + /// Relies on previously-printed strings to keep track of cursor position. This is done + /// automatically within the [`Output`] struct, but free [`print!`], [`eprint!`], etc. calls will + /// not track the state. + pub fn start_new_line(&self) { + if !LineTracker::is_line_start() { + self.print('\n'); + } + } + + /// Print a value, which implements [`fmt::Display`]. + pub fn print(&self, value: impl fmt::Display) { + self.print_fmt(format_args!("{}", value)); + } + + /// Print an integer, as a *signed* decimal. + pub fn print_decimal(&self, value: u16) { + self.print(format_args!("{}", value as i16)); + } + + /// Print a decoration symbol, to indicate the purpose of the next message printed. + /// + /// Only works for [`Output::Debugger`]. + pub fn print_category(&self, category: Category) { + debug_assert!( + matches!(self, Self::Debugger(..)), + "`Output::print_category()` called on `Output::Normal`" + ); + if !matches!(self, Self::Debugger(..)) { + return; + } + + match category { + Category::Normal => (), + Category::Info => self.print(" · "), + Category::Warning => self.print(" ➔ "), + Category::Error => self.print(" ⨯ "), + Category::Special => (), + } + } + + /// Print all registers (R0-7, PC, and CC) in a fancy table. + /// + /// Prints values as hex, signed decimal, unsigned decimal, and character. + /// + /// PC and CC will be only displayed as hex and 3-bit binary respectively. + pub fn print_registers(&self, state: &RunState) { + if Self::is_minimal() { + for i in 0..8 { + self.print(format_args!("R{} {}\n", i, state.reg(i))); + } + self.print(format_args!("PC {}\n", state.pc())); + self.print(format_args!("CC {:03b}\n", state.flag() as u8)); + return; + } + + self.print("\x1b[2m┌────────────────────────────────────┐\x1b[0m\n"); + self.print("\x1b[2m│ \x1b[3mhex int uint char\x1b[0m\x1b[2m │\x1b[0m\n"); + + // R0-7 + for i in 0..8 { + self.print("\x1b[2m│\x1b[0m"); + self.print(format_args!(" \x1b[1mR\x1b[1m{}\x1b[0m ", i)); + self.print_integer_inner(state.reg(i)); + self.print(" \x1b[2m│\x1b[0m\n"); + } + + // PC, CC + self.print("\x1b[2m│\x1b[0m"); + self.print(" \x1b[1mPC\x1b[0m"); + self.print(format_args!(" 0x{:04x}", state.pc())); + self.print(" "); + self.print(" \x1b[1mCC\x1b[0m"); + self.print(format_args!(" {:03b}", state.flag() as u8)); + self.print(" \x1b[2m│\x1b[0m\n"); + + self.print("\x1b[2m└────────────────────────────────────┘\x1b[0m\n"); + } + + /// Prints a register as hex, signed decimal, unsigned decimal, and character, in a fancy + /// table. + pub fn print_integer(&self, value: u16) { + if Self::is_minimal() { + self.print_decimal(value); + self.print('\n'); + return; + } + self.print("\x1b[2m┌────────────────────────────────┐\x1b[0m\n"); + self.print("\x1b[2m│ \x1b[3mhex int uint char\x1b[0m\x1b[2m │\x1b[0m\n"); + self.print("\x1b[2m│\x1b[0m "); + self.print_integer_inner(value); + self.print(" \x1b[2m│\x1b[0m\n"); + self.print("\x1b[2m└────────────────────────────────┘\x1b[0m\n"); + } + + /// Prints a register as hex, signed decimal, unsigned decimal, and character. + fn print_integer_inner(&self, value: u16) { + if Self::is_minimal() { + self.print_decimal(value); + return; + } + self.print(format_args!("0x{:04x}", value)); + self.print(format_args!(" {:-6}", value)); + self.print(format_args!(" {:-6}", value as i16)); + self.print(" "); + self.print_char_display(value); + } + + /// Print a character in a descriptive way: + /// + /// - 'Significant' control characters display their abbreviated names. + /// - ASCII space is displayed as `[_]`. + /// - Printable ASCII characters are displayed normally. + /// - Any other ASCII character is printed as `───` + /// - Any non-ASCII (UTF-16) character is displayed as `┄┄┄` + fn print_char_display(&self, value: u16) { + debug_assert!( + !Self::is_minimal(), + "`print_display` should not be called if `--minimal`" + ); + if Self::is_minimal() { + return; + } + + // Print 3 characters + match value { + // ASCII control characters which are arbitrarily considered significant + 0x00 => self.print("NUL"), + 0x08 => self.print("BS "), + 0x09 => self.print("HT "), + 0x0a => self.print("LF "), + 0x0b => self.print("VT "), + 0x0c => self.print("FF "), + 0x0d => self.print("CR "), + 0x1b => self.print("ESC"), + 0x7f => self.print("DEL"), + + // Space + 0x20 => self.print("[_]"), + + // Printable ASCII characters + 0x21..=0x7e => self.print(format_args!("{:-6}", value as u8 as char)), + + // Any ASCII character not already matched (unimportant control characters) + 0x00..=0x7f => self.print("\x1b[2m───\x1b[0m"), + // Any non-ASCII character + 0x0080.. => self.print("\x1b[2m┄┄┄\x1b[0m"), + } + } + + /// Print a value returned by the [`format_args!`] macro. + /// + /// Use [`Output::print`] wrapper method. + fn print_fmt(&self, args: fmt::Arguments) { + let minimal = Self::is_minimal(); + match self { + Self::Normal => { + NormalWriter { minimal }.write_fmt(args).unwrap(); + } + Self::Debugger(condition, category) => { + if minimal && condition == &Condition::Sometimes { + return; + } + DebuggerWriter { + minimal, + category: *category, + } + .write_fmt(args) + .unwrap(); + } + } + } +} + +/// Writer for [`Output::Normal`] +struct NormalWriter { + minimal: bool, +} +impl fmt::Write for NormalWriter { + fn write_str(&mut self, string: &str) -> fmt::Result { + if self.minimal { + print!("{}", Decolored::new(string)); + } else { + print!("{}", string); + } + LineTracker.write_str(string).unwrap(); + Ok(()) + } +} + +/// Writer for [`Output::Debugger`] +/// +/// [`Condition`] must be checked by caller. +struct DebuggerWriter { + minimal: bool, + category: Category, +} +impl fmt::Write for DebuggerWriter { + fn write_str(&mut self, string: &str) -> fmt::Result { + if self.minimal { + eprint!("{}", Decolored::new(string)); + return Ok(()); + } + + let color = match self.category { + Category::Normal => DEBUGGER_PRIMARY_COLOR, + Category::Info => DEBUGGER_PRIMARY_COLOR, + Category::Warning => "33", + Category::Error => "31", + + Category::Special => { + // Acts similar to `Colored::fmt` + eprint!("\x1b[{}m", DEBUGGER_PRIMARY_COLOR); + + let mut chars = string.chars(); + while let Some(ch) = chars.next() { + if ch != '{' { + eprint!("{}", ch); + continue; + } + + eprint!("\x1b["); + for ch in chars.by_ref() { + if ch == '}' { + break; + } + eprint!("{}", ch); + // Re-apply color when reset + if ch == '0' { + eprint!(";{}", DEBUGGER_PRIMARY_COLOR); + } + } + eprint!("m"); + } + + LineTracker.write_str(string).unwrap(); + return Ok(()); + } + }; + + eprint!("{}", Colored::new(color, string)); + LineTracker.write_str(string).unwrap(); + Ok(()) + } +} + +/// Tracks whether the cursor is at the start of a line (at column 1). +/// +/// Uses [`fmt::Write`], in order to handle [`fmt::Arguments`], and therefore anything which implements +/// [`fmt::Display`]. +/// +/// If the last printable character is a newline character ('\n' or '\r'), then the state will be +/// set to `true`. +/// Otherwise it will be set to false. +/// If the string does not contain any printable characters, then the state will not change. +struct LineTracker; +impl LineTracker { + thread_local! { + static IS_LINE_START: RefCell = const { RefCell::new(true) }; + } + pub fn is_line_start() -> bool { + Self::IS_LINE_START.with(|value| *value.borrow()) + } + fn set_line_start(new_value: bool) -> bool { + Self::IS_LINE_START.with(|value| value.replace(new_value)) + } +} +impl fmt::Write for LineTracker { + fn write_str(&mut self, string: &str) -> fmt::Result { + // TODO(fix): This will break with "\x1b[0m" for example. String should be scanned forwards + for ch in string.chars().rev() { + let is_line_start = match ch { + '\n' | '\r' => true, + '\x00'..='\x1f' | '\x7f' => continue, + _ => false, + }; + Self::set_line_start(is_line_start); + break; + } + + Ok(()) + } +} + +/// Applies an ANSI color/style attribute to a string, when displayed. +/// +/// Given attributes are re-applied after any 'reset' code (`\x1b[0m`) is encountered. +struct Colored<'a> { + /// Note: This is only `'static` for convenience. If needed, another lifetime parameter could + /// be created. + color: &'static str, + string: &'a str, +} +impl<'a> Colored<'a> { + pub fn new(color: &'static str, string: &'a str) -> Self { + Self { color, string } + } +} +impl fmt::Display for Colored<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str("\x1b[")?; + f.write_str(self.color)?; + f.write_str("m")?; + + let mut chars = self.string.chars(); + while let Some(ch) = chars.next() { + // Print color code in string -- everything between '\x1b' and 'm' (inclusive) + // Re-apply global color + if ch == '\x1b' { + f.write_char('\x1b')?; + for ch in chars.by_ref() { + f.write_char(ch)?; + if ch == 'm' { + break; + } + } + f.write_str("\x1b[")?; + f.write_str(self.color)?; + f.write_str("m")?; + continue; + } + + f.write_char(ch)?; + } + + // Do not reset color. This should be done by caller (Eg. `dprint!`) + + Ok(()) + } +} + +/// Removes all ANSI escape codes (color codes) from a string, when displayed. +struct Decolored<'a> { + string: &'a str, +} +impl<'a> Decolored<'a> { + pub fn new(string: &'a str) -> Self { + Self { string } + } +} +impl fmt::Display for Decolored<'_> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let mut chars = self.string.chars(); + while let Some(ch) = chars.next() { + // Skip everything between '\x1b' and 'm' (inclusive) + if ch == '\x1b' { + while chars.next().is_some_and(|ch| ch != 'm') {} + continue; + } + f.write_char(ch)?; + } + Ok(()) + } +} diff --git a/src/parser.rs b/src/parser.rs index c0e4d2d..0b6dab4 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -4,9 +4,10 @@ use miette::Result; use crate::{ air::{Air, AirStmt, ImmediateOrReg, RawWord}, + debugger::Breakpoint, error, lexer::{cursor::Cursor, LiteralKind, Token, TokenKind}, - symbol::{DirKind, InstrKind, Label, Register, Span, TrapKind}, + symbol::{DirKind, InstrKind, Label, Register, Span, SrcOffset, TrapKind}, }; /// Replaces raw value directives .fill, .blkw, .stringz with equivalent raw bytes @@ -22,12 +23,14 @@ pub fn preprocess(src: &'static str) -> Result> { // Into raw word with the next literal as value TokenKind::Dir(DirKind::Fill) => { let val = cur.advance_real()?; + // Span entire directive name and integer literal + let span = dir.span.join(val.span); match val.kind { TokenKind::Lit(LiteralKind::Hex(lit)) => { - res.push(Token::byte(lit)); + res.push(Token::byte(lit, span)); } TokenKind::Lit(LiteralKind::Dec(lit)) => { - res.push(Token::byte(lit as u16)); + res.push(Token::byte(lit as u16, span)); } _ => return Err(error::preproc_bad_lit(val.span, src, false)), } @@ -35,10 +38,11 @@ pub fn preprocess(src: &'static str) -> Result> { // Into a series of raw null words TokenKind::Dir(DirKind::Blkw) => { let val = cur.advance_real()?; + let span = dir.span.join(val.span); match val.kind { TokenKind::Lit(LiteralKind::Hex(lit)) => { for _ in 0..lit { - res.push(Token::nullbyte()); + res.push(Token::nullbyte(span)); } } TokenKind::Lit(LiteralKind::Dec(lit)) => { @@ -46,7 +50,7 @@ pub fn preprocess(src: &'static str) -> Result> { println!("{:?}", error::preproc_bad_lit(val.span, src, true)); } for _ in 0..lit as u16 { - res.push(Token::nullbyte()); + res.push(Token::nullbyte(span)); } } _ => return Err(error::preproc_bad_lit(val.span, src, false)), @@ -58,15 +62,21 @@ pub fn preprocess(src: &'static str) -> Result> { match val.kind { TokenKind::Lit(LiteralKind::Str) => { let str_raw = cur.get_range(val.span.into()); + let span = dir.span.join(val.span); // Get rid of quotation marks for c in unescape(&str_raw[1..str_raw.len() - 1]).chars() { - res.push(Token::byte(c as u16)); + res.push(Token::byte(c as u16, span)); } - res.push(Token::nullbyte()); + res.push(Token::nullbyte(span)); } _ => return Err(error::preproc_no_str(val.span, src)), } } + TokenKind::Dir(DirKind::Break) => { + // Note that this span will never be used + // Since breakpoints don't push bytes + res.push(Token::breakpoint(dir.span)); + } // Eliminated during preprocessing TokenKind::Comment | TokenKind::Whitespace => continue, TokenKind::Eof | TokenKind::Dir(DirKind::End) => break, @@ -76,6 +86,25 @@ pub fn preprocess(src: &'static str) -> Result> { Ok(res) } +fn preprocess_simple(src: &'static str) -> Result> { + let mut res: Vec = Vec::new(); + let mut cur = Cursor::new(src); + + loop { + let token = cur.advance_real()?; + match token.kind { + TokenKind::Byte(_) => unreachable!("Found byte in stream"), + TokenKind::Breakpoint => unreachable!("Found breakpoint in stream"), + TokenKind::Comment | TokenKind::Whitespace => continue, + TokenKind::Eof => break, + + _ => res.push(token), + } + } + + Ok(res) +} + fn unescape(s: &str) -> Cow { if s.find('\\').is_none() { return Cow::Borrowed(s); @@ -117,6 +146,8 @@ pub struct AsmParser { air: Air, /// Tracker for current line line: u16, + + tok_end: usize, } impl AsmParser { @@ -127,8 +158,20 @@ impl AsmParser { Ok(AsmParser { src, toks: toks.into_iter().peekable(), - air: Air::new(), + air: Air::new(src), + line: 1, + tok_end: 0, + }) + } + + pub fn new_simple(src: &'static str) -> Result { + let toks = preprocess_simple(src)?; + Ok(AsmParser { + src, + toks: toks.into_iter().peekable(), + air: Air::new(src), line: 1, + tok_end: 0, }) } @@ -166,6 +209,14 @@ impl AsmParser { self.air.set_orig(orig)?; continue; } + TokenKind::Breakpoint => { + let addr = self.air.len() as u16; + self.air.breakpoints.insert(Breakpoint { + address: addr, + is_predefined: true, + }); + continue; + } TokenKind::Instr(instr_kind) => self.parse_instr(instr_kind)?, TokenKind::Trap(trap_kind) => self.parse_trap(trap_kind)?, TokenKind::Byte(val) => self.parse_byte(val), @@ -174,7 +225,14 @@ impl AsmParser { unreachable!("Found whitespace/comment/eof in preprocessed stream") } }; - self.air.add_stmt(stmt); + + let len = if self.tok_end < tok.span.offs() { + tok.span.len() + } else { + self.tok_end - tok.span.offs() + }; + let span = Span::new(SrcOffset(tok.span.offs()), len); + self.air.add_stmt(stmt, span); } else { if labeled_line { return Err(error::parse_eof(self.src)); @@ -187,6 +245,36 @@ impl AsmParser { Ok(self.air) } + pub fn parse_simple(&mut self) -> Result { + let Some(tok) = self.toks.next() else { + return Err(error::parse_eof(self.src)); + }; + + match tok.kind { + TokenKind::Instr(instr_kind) => self.parse_instr(instr_kind), + TokenKind::Trap(trap_kind) => self.parse_trap(trap_kind), + + TokenKind::Dir(_) | TokenKind::Label | TokenKind::Lit(_) | TokenKind::Reg(_) => { + return Err(error::parse_generic_unexpected( + self.src, + "instruction", + tok, + )) + } + + // Does not exist in preprocessed token stream + TokenKind::Comment + | TokenKind::Whitespace + | TokenKind::Eof + | TokenKind::Byte(_) + | TokenKind::Breakpoint => { + unreachable!("Found invalid token kind in preprocessed stream"); + } + } + + // TODO(fix): Check for end of line here + } + /// Return label or leave iter untouched and return None fn optional_label(&mut self) -> Option { match self.toks.peek() { @@ -196,7 +284,7 @@ impl AsmParser { } /// Process several tokens to form valid AIR statement - fn parse_instr(&mut self, kind: InstrKind) -> Result { + pub fn parse_instr(&mut self, kind: InstrKind) -> Result { use crate::symbol::InstrKind; match kind { InstrKind::Push => { @@ -333,7 +421,10 @@ impl AsmParser { fn expect(&mut self, expected: TokenKind) -> Result { match self.toks.next() { - Some(tok) if tok.kind == expected => Ok(tok), + Some(tok) if tok.kind == expected => { + self.tok_end = tok.span.offs() + tok.span.len(); + Ok(tok) + } Some(unexpected) => { return Err(error::parse_generic_unexpected( self.src, @@ -351,7 +442,10 @@ impl AsmParser { expected: &str, ) -> Result { match self.toks.next() { - Some(tok) if check(&tok.kind) => Ok(tok), + Some(tok) if check(&tok.kind) => { + self.tok_end = tok.span.offs() + tok.span.len(); + Ok(tok) + } Some(unexpected) => { return Err(error::parse_generic_unexpected( self.src, expected, unexpected, @@ -554,7 +648,12 @@ mod test { let res = preprocess(r#"temp .stringz "\"hello\n\"""#).unwrap(); let expected = "\"hello\n\"\0" .chars() - .map(|c| Token::byte(c as u16)) + .map(|c| { + Token::byte( + c as u16, + Span::new(SrcOffset("temp ".len()), r#".stringz "\"hello\n\"""#.len()), + ) + }) .collect::>(); assert!(res[1..] == expected) } @@ -565,7 +664,12 @@ mod test { let res = preprocess(r#"temp .stringz "hello""#).unwrap(); let expected = "hello\0" .chars() - .map(|c| Token::byte(c as u16)) + .map(|c| { + Token::byte( + c as u16, + Span::new(SrcOffset("temp ".len()), r#".stringz "hello""#.len()), + ) + }) .collect::>(); assert!(res[1..] == expected) } @@ -610,7 +714,8 @@ mod test { dest: Register::R0, src_reg: Register::R1, src_reg_imm: ImmediateOrReg::Reg(Register::R2), - } + }, + span: Span::new(SrcOffset(0), "add r0 r1 r2".len()) } ) } @@ -634,7 +739,15 @@ mod test { dest: Register::R0, src_reg: Register::R1, src_reg_imm: ImmediateOrReg::Imm5(15), - } + }, + span: Span::new( + SrcOffset( + r#" + "# + .len() + ), + "add r0 r1 #15".len() + ) } ); assert_eq!( @@ -645,7 +758,16 @@ mod test { dest: Register::R0, src_reg: Register::R1, src_reg_imm: ImmediateOrReg::Imm5((-16i8) as u8), - } + }, + span: Span::new( + SrcOffset( + r#" + add r0 r1 #15 + "# + .len() + ), + "add r0 r1 #-16".len() + ) } ); } @@ -668,7 +790,8 @@ mod test { stmt: AirStmt::Branch { flag: Flag::Nzp, dest_label: Label::empty("label") - } + }, + span: Span::new(SrcOffset(0), "br label".len()) } ) } @@ -683,7 +806,8 @@ mod test { stmt: AirStmt::Branch { flag: Flag::Nzp, dest_label: Label::Ref(0x2 + 0x2) - } + }, + span: Span::new(SrcOffset(0), "br x2".len()) } ) } @@ -695,7 +819,8 @@ mod test { air.get(0), &AsmLine { line: 1, - stmt: AirStmt::RawWord { val: RawWord(0x30) } + stmt: AirStmt::RawWord { val: RawWord(0x30) }, + span: Span::new(SrcOffset("label ".len()), ".fill x30".len()) } ) } @@ -712,7 +837,8 @@ mod test { line: 1, stmt: AirStmt::RawWord { val: RawWord('a' as u16) - } + }, + span: Span::new(SrcOffset("label ".len()), ".stringz \"ab\"".len()) } ); assert_eq!( @@ -721,7 +847,8 @@ mod test { line: 2, stmt: AirStmt::RawWord { val: RawWord('b' as u16) - } + }, + span: Span::new(SrcOffset("label ".len()), ".stringz \"ab\"".len()) } ); assert_eq!( @@ -730,7 +857,8 @@ mod test { line: 3, stmt: AirStmt::RawWord { val: RawWord('\0' as u16) - } + }, + span: Span::new(SrcOffset("label ".len()), ".stringz \"ab\"".len()) } ); } @@ -752,7 +880,15 @@ mod test { line: 1, stmt: AirStmt::RawWord { val: RawWord('a' as u16) - } + }, + span: Span::new( + SrcOffset( + r#" + "# + .len() + ), + ".stringz \"a\"".len() + ) } ); assert_eq!( @@ -761,7 +897,16 @@ mod test { line: 3, stmt: AirStmt::RawWord { val: RawWord('b' as u16) - } + }, + span: Span::new( + SrcOffset( + r#" + .stringz "a" + "# + .len() + ), + ".stringz \"b\"".len() + ) } ); } @@ -787,7 +932,15 @@ mod test { dest: Register::R0, src_reg: Register::R0, src_reg_imm: ImmediateOrReg::Reg(Register::R0) - } + }, + span: Span::new( + SrcOffset( + r#" + label "# + .len() + ), + "add r0 r0 r0".len() + ) } ); assert_eq!( @@ -797,7 +950,16 @@ mod test { stmt: AirStmt::Branch { flag: Flag::Nzp, dest_label: Label::dummy(1) - } + }, + span: Span::new( + SrcOffset( + r#" + label add r0 r0 r0 + "# + .len() + ), + "br label".len() + ) } ); assert_eq!( @@ -807,7 +969,17 @@ mod test { stmt: AirStmt::Branch { flag: Flag::Nzp, dest_label: Label::empty("not_existing") - } + }, + span: Span::new( + SrcOffset( + r#" + label add r0 r0 r0 + br label + "# + .len() + ), + "br not_existing".len() + ) } ); assert_eq!( @@ -817,7 +989,18 @@ mod test { stmt: AirStmt::Branch { flag: Flag::Nzp, dest_label: Label::Ref(0x5 + 0x30), - } + }, + span: Span::new( + SrcOffset( + r#" + label add r0 r0 r0 + br label + br not_existing + "# + .len() + ), + "br x30".len() + ) } ); } diff --git a/src/runtime.rs b/src/runtime.rs index b39d46e..653cadf 100644 --- a/src/runtime.rs +++ b/src/runtime.rs @@ -5,11 +5,21 @@ use std::{ u16, u32, u8, usize, }; -use crate::{env, Air}; +use crate::{ + debugger::{Action, Debugger, DebuggerOptions, SignificantInstr}, + dprintln, env, + output::{Condition, Output}, + Air, +}; use colored::Colorize; use console::Term; use miette::Result; +/// First address which is out of bounds of user memory. +pub const USER_MEMORY_END: u16 = 0xFE00; +/// Sentinel value, which the PC is set to when a `HALT` is encountered. +pub const HALT_ADDRESS: u16 = 0xFFFF; + macro_rules! exception { ( $fmt:literal $($tt:tt)* ) => {{ eprintln!( @@ -21,10 +31,16 @@ macro_rules! exception { } /// LC3 can address 128KB of memory. -const MEMORY_MAX: usize = 0x10000; +pub(crate) const MEMORY_MAX: usize = 0x10000; + +pub struct RunEnvironment { + state: RunState, + debugger: Option, +} /// Represents complete program state during runtime. -pub struct RunState { +#[derive(Clone)] +pub(super) struct RunState { /// System memory - 128KB in size. /// Need to figure out if this would cause problems with the stack. mem: Box<[u16; MEMORY_MAX]>, @@ -39,16 +55,16 @@ pub struct RunState { } #[derive(Clone, Copy)] -enum RunFlag { +pub(super) enum RunFlag { N = 0b100, Z = 0b010, P = 0b001, Uninit = 0b000, } -impl RunState { +impl RunEnvironment { // Not generic because of miette error - pub fn try_from(air: Air) -> Result { + pub fn try_from(air: &Air) -> Result { let orig = air.orig().unwrap_or(0x3000); let mut air_array: Vec = Vec::with_capacity(air.len() + 1); @@ -56,10 +72,31 @@ impl RunState { for stmt in air { air_array.push(stmt.emit()?); } - RunState::from_raw(air_array.as_slice()) + + RunEnvironment::from_raw(air_array.as_slice()) } - pub fn from_raw(raw: &[u16]) -> Result { + pub fn try_from_with_debugger( + air: Air, // Takes ownership of breakpoints + debugger_opts: DebuggerOptions, + ) -> Result { + let mut env = Self::try_from(&air)?; + + // Add orig to each breakpoint + let breakpoints = air.breakpoints.with_orig(env.state.pc); + + env.debugger = Some(Debugger::new( + debugger_opts, + env.state.clone(), + breakpoints, + air.ast, + air.src, + )); + + Ok(env) + } + + pub fn from_raw(raw: &[u16]) -> Result { if raw.len() == 0 { exception!("provided file is empty"); } @@ -77,16 +114,84 @@ impl RunState { // Prevents PC running through no-ops to the end of memory mem[orig + raw.len()] = 0xF025; - Ok(RunState { - mem: Box::new(mem), - pc: orig as u16, - reg: [0, 0, 0, 0, 0, 0, 0, 0xFDFF], - flag: RunFlag::Uninit, - _psr: 0, + Ok(RunEnvironment { + state: RunState { + mem: Box::new(mem), + pc: orig as u16, + reg: [0, 0, 0, 0, 0, 0, 0, 0xFDFF], + flag: RunFlag::Uninit, + _psr: 0, + }, + debugger: None, }) } - const OP_TABLE: [fn(&mut RunState, u16); 16] = [ + pub fn set_minimal(&mut self, minimal: bool) { + Output::set_minimal(minimal); + } + + /// Run with preset memory + pub fn run(&mut self) { + loop { + if let Some(debugger) = &mut self.debugger { + Output::Debugger(Condition::Always, Default::default()).start_new_line(); + + match debugger.next_action(&mut self.state) { + Action::Proceed => (), + Action::StopDebugger => { + dprintln!(Always, Warning, "Stopping debugger."); + // Go to start of next loop iteration, without debugger + self.debugger = None; + continue; + } + Action::ExitProgram => { + dprintln!(Always, Warning, "Exiting program."); + return; + } + } + + // If still stuck on HALT + // Never *execute* HALT while debugger is active + // Wait for pc to change, such as `reset`, `exit`, or `quit` + if SignificantInstr::try_from(self.state.mem[self.state.pc as usize]) + == Ok(SignificantInstr::TrapHalt) + { + continue; + } + // Debugger should catch this on next loop, and warn + if self.state.pc < debugger.orig() || self.state.pc >= USER_MEMORY_END { + continue; + } + // From this point, next instruction will always be executed + // (Unless debugger is `quit`, making this counter irrelevant anyway) + debugger.increment_instruction_count(); + } + + if self.state.pc == u16::MAX { + debug_assert!( + self.debugger.is_none(), + "halt should be caught if debugger is active", + ); + break; // Halt was triggered + } + // TODO(feat): Throw exception for pc < orig + if self.state.pc >= USER_MEMORY_END { + exception!("entered protected memory area >= {}", USER_MEMORY_END); + } + + let instr = self.state.mem[self.state.pc as usize]; + let opcode = (instr >> 12) as usize; + // PC incremented before instruction is performed + self.state.pc += 1; + RunState::OP_TABLE[opcode](&mut self.state, instr); + } + + Output::Normal.start_new_line(); + } +} + +impl RunState { + pub const OP_TABLE: [fn(&mut RunState, u16); 16] = [ Self::br, // 0x0 Self::add, // 0x1 Self::ld, // 0x2 @@ -105,47 +210,48 @@ impl RunState { Self::trap, // 0xF ]; - /// Run with preset memory - pub fn run(&mut self) { - loop { - if self.pc == u16::MAX { - break; // Halt was triggered - } - if self.pc >= 0xFE00 { - exception!("entered protected memory area >= 0xFE00"); - } - let instr = self.mem[self.pc as usize]; - let opcode = (instr >> 12) as usize; - // PC incremented before instruction is performed - self.pc += 1; - Self::OP_TABLE[opcode](self, instr); - } - } - #[inline] - fn reg(&self, reg: u16) -> u16 { + pub(super) fn reg(&self, reg: u16) -> u16 { debug_assert!(reg < 8, "tried to access invalid register 'r{}'", reg); // SAFETY: Should only be indexed with values that are & 0b111 unsafe { *self.reg.get_unchecked(reg as usize) } } #[inline] - fn reg_mut(&mut self, reg: u16) -> &mut u16 { + pub(super) fn reg_mut(&mut self, reg: u16) -> &mut u16 { debug_assert!(reg < 8, "tried to access invalid register 'r{}'", reg); // SAFETY: Should only be indexed with values that are & 0b111 unsafe { self.reg.get_unchecked_mut(reg as usize) } } #[inline] - fn mem(&self, addr: u16) -> u16 { + pub(super) fn mem(&self, addr: u16) -> u16 { // SAFETY: memory fits any u16 index unsafe { *self.mem.get_unchecked(addr as usize) } } #[inline] - fn mem_mut(&mut self, addr: u16) -> &mut u16 { + pub(super) fn mem_mut(&mut self, addr: u16) -> &mut u16 { // SAFETY: memory fits any u16 index unsafe { self.mem.get_unchecked_mut(addr as usize) } } + #[inline] + pub(super) fn pc(&self) -> u16 { + self.pc + } + #[inline] + pub(super) fn pc_mut(&mut self) -> &mut u16 { + &mut self.pc + } + + #[inline] + pub(super) fn flag(&self) -> RunFlag { + self.flag + } + + pub(super) fn memory_equals(&self, other: &RunState) -> bool { + self.mem == other.mem + } + #[inline] fn s_ext(mut val: u16, bits: u32) -> u16 { debug_assert!(bits > 0 && bits < 16); @@ -363,7 +469,7 @@ impl RunState { // out 0x21 => { let chr = (self.reg(0) & 0xFF) as u8 as char; - print!("{chr}"); + Output::Normal.print(chr); stdout().flush().unwrap(); } // puts @@ -375,7 +481,7 @@ impl RunState { if chr_ascii == '\0' { break; } - print!("{}", chr_ascii); + Output::Normal.print(chr_ascii); } stdout().flush().unwrap(); } @@ -383,7 +489,7 @@ impl RunState { 0x23 => { let ch = read_input(); *self.reg_mut(0) = ch as u16; - print!("{}", ch); + Output::Normal.print(ch as char); stdout().flush().unwrap(); } // putsp @@ -395,29 +501,25 @@ impl RunState { if chr_ascii == '\0' { break 'string; } - print!("{}", chr_ascii); + Output::Normal.print(chr_ascii); } } stdout().flush().unwrap(); } // halt 0x25 => { - self.pc = u16::MAX; + self.pc = HALT_ADDRESS; println!("\n{:>12}", "Halted".cyan()); } // putn 0x26 => { let val = self.reg(0); - println!("{val}"); + Output::Normal.print_decimal(val); } // reg 0x27 => { - println!("\n------ Registers ------"); - for (i, reg) in self.reg.iter().enumerate() { - println!("r{i}: {reg:.>#19}"); - // println!("r{i}: {reg:.>#19b}"); - } - println!("-----------------------"); + Output::Normal.start_new_line(); + Output::Normal.print_registers(self); } // unknown _ => exception!( diff --git a/src/symbol.rs b/src/symbol.rs index 0966078..9f95408 100644 --- a/src/symbol.rs +++ b/src/symbol.rs @@ -140,6 +140,20 @@ impl Span { pub fn end(&self) -> usize { self.offs.0 + self.len } + + pub fn join(&self, other: Span) -> Span { + let (left, right) = if self.offs() > other.offs() { + (other, *self) // Prevent underflow + } else { + (*self, other) + }; + + let len = right.offs() - left.offs() + right.len(); + // TODO(fix): What does this mean ? + debug_assert!(len >= left.len(), "Span length was calculated incorrectly"); + + Span::new(SrcOffset(left.offs()), len) + } } // Used for miette conversion @@ -274,6 +288,7 @@ pub enum DirKind { Stringz, Blkw, Fill, + Break, } /// Newtype representing an offset from a particular address. diff --git a/tests/files/hw.asm b/tests/files/hw.asm index ab4b198..d676501 100644 --- a/tests/files/hw.asm +++ b/tests/files/hw.asm @@ -1,5 +1,9 @@ ; comment lea r0 hw puts +lea r0 hw +.BREAK +puts +puts halt hw .stringz "Hello, world!"