From 9eab376ecdc125d1ff2afed952471c48141f3ee1 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 20 Jan 2024 03:09:41 -0500
Subject: [PATCH 01/60] WIP reworking lexer to use custom fragments and custom
 file map with mmap2

---
 wright/Cargo.toml                             |   8 +
 wright/src/bin/wright.rs                      |  18 -
 wright/src/filemap.rs                         | 228 +++++++-
 wright/src/parser.rs                          |  10 +-
 wright/src/parser/fragment.rs                 |  64 +++
 wright/src/parser/lexer.rs                    | 513 +-----------------
 wright/src/parser/{ => old}/ast.rs            |   0
 .../src/parser/{ => old}/ast/declaration.rs   |   0
 .../parser/{ => old}/ast/declaration/class.rs |   0
 .../parser/{ => old}/ast/declaration/enum.rs  |   0
 .../{ => old}/ast/declaration/function.rs     |   0
 .../{ => old}/ast/declaration/generics.rs     |   0
 .../{ => old}/ast/declaration/import.rs       |   0
 .../{ => old}/ast/declaration/module.rs       |   0
 .../parser/{ => old}/ast/declaration/type.rs  |   0
 .../parser/{ => old}/ast/declaration/union.rs |   0
 .../{ => old}/ast/declaration/visibility.rs   |   0
 .../{ => old}/ast/declaration/where_clause.rs |   0
 wright/src/parser/{ => old}/ast/expression.rs |   0
 .../parser/{ => old}/ast/expression/block.rs  |   0
 .../{ => old}/ast/expression/literal.rs       |   0
 .../ast/expression/literal/boolean.rs         |   0
 .../ast/expression/literal/character.rs       |   0
 .../ast/expression/literal/escapes.rs         |   0
 .../ast/expression/literal/integer.rs         |   0
 .../ast/expression/literal/string.rs          |   0
 .../{ => old}/ast/expression/parentheses.rs   |   0
 .../{ => old}/ast/expression/primary.rs       |   0
 wright/src/parser/{ => old}/ast/identifier.rs |   0
 wright/src/parser/{ => old}/ast/metadata.rs   |   0
 wright/src/parser/{ => old}/ast/path.rs       |   0
 wright/src/parser/{ => old}/ast/statement.rs  |   0
 .../parser/{ => old}/ast/statement/bind.rs    |   0
 wright/src/parser/{ => old}/ast/types.rs      |   0
 wright/src/parser/{ => old}/error.rs          |   0
 wright/src/parser/old/lexer.rs                | 511 +++++++++++++++++
 wright/src/parser/old/lexer/definition.rs     |  72 +++
 .../parser/{ => old}/lexer/pretty_print.rs    |   0
 wright/src/parser/{ => old}/lexer/tokens.rs   |   0
 wright/src/parser/{ => old}/state.rs          |   0
 wright/src/parser/{ => old}/util.rs           |   0
 .../parser/{ => old}/util/discard_error.rs    |   0
 wright/src/parser/{ => old}/util/erase.rs     |   0
 .../parser/{ => old}/util/first_successful.rs |   0
 wright/src/parser/{ => old}/util/ignore.rs    |   0
 wright/src/parser/{ => old}/util/map.rs       |   0
 wright/src/repl.rs                            |  32 +-
 wright/tests/lexer.rs                         |  62 +--
 48 files changed, 927 insertions(+), 591 deletions(-)
 create mode 100644 wright/src/parser/fragment.rs
 rename wright/src/parser/{ => old}/ast.rs (100%)
 rename wright/src/parser/{ => old}/ast/declaration.rs (100%)
 rename wright/src/parser/{ => old}/ast/declaration/class.rs (100%)
 rename wright/src/parser/{ => old}/ast/declaration/enum.rs (100%)
 rename wright/src/parser/{ => old}/ast/declaration/function.rs (100%)
 rename wright/src/parser/{ => old}/ast/declaration/generics.rs (100%)
 rename wright/src/parser/{ => old}/ast/declaration/import.rs (100%)
 rename wright/src/parser/{ => old}/ast/declaration/module.rs (100%)
 rename wright/src/parser/{ => old}/ast/declaration/type.rs (100%)
 rename wright/src/parser/{ => old}/ast/declaration/union.rs (100%)
 rename wright/src/parser/{ => old}/ast/declaration/visibility.rs (100%)
 rename wright/src/parser/{ => old}/ast/declaration/where_clause.rs (100%)
 rename wright/src/parser/{ => old}/ast/expression.rs (100%)
 rename wright/src/parser/{ => old}/ast/expression/block.rs (100%)
 rename wright/src/parser/{ => old}/ast/expression/literal.rs (100%)
 rename wright/src/parser/{ => old}/ast/expression/literal/boolean.rs (100%)
 rename wright/src/parser/{ => old}/ast/expression/literal/character.rs (100%)
 rename wright/src/parser/{ => old}/ast/expression/literal/escapes.rs (100%)
 rename wright/src/parser/{ => old}/ast/expression/literal/integer.rs (100%)
 rename wright/src/parser/{ => old}/ast/expression/literal/string.rs (100%)
 rename wright/src/parser/{ => old}/ast/expression/parentheses.rs (100%)
 rename wright/src/parser/{ => old}/ast/expression/primary.rs (100%)
 rename wright/src/parser/{ => old}/ast/identifier.rs (100%)
 rename wright/src/parser/{ => old}/ast/metadata.rs (100%)
 rename wright/src/parser/{ => old}/ast/path.rs (100%)
 rename wright/src/parser/{ => old}/ast/statement.rs (100%)
 rename wright/src/parser/{ => old}/ast/statement/bind.rs (100%)
 rename wright/src/parser/{ => old}/ast/types.rs (100%)
 rename wright/src/parser/{ => old}/error.rs (100%)
 create mode 100644 wright/src/parser/old/lexer.rs
 create mode 100644 wright/src/parser/old/lexer/definition.rs
 rename wright/src/parser/{ => old}/lexer/pretty_print.rs (100%)
 rename wright/src/parser/{ => old}/lexer/tokens.rs (100%)
 rename wright/src/parser/{ => old}/state.rs (100%)
 rename wright/src/parser/{ => old}/util.rs (100%)
 rename wright/src/parser/{ => old}/util/discard_error.rs (100%)
 rename wright/src/parser/{ => old}/util/erase.rs (100%)
 rename wright/src/parser/{ => old}/util/first_successful.rs (100%)
 rename wright/src/parser/{ => old}/util/ignore.rs (100%)
 rename wright/src/parser/{ => old}/util/map.rs (100%)

diff --git a/wright/Cargo.toml b/wright/Cargo.toml
index 0eb54253..813d6ae3 100644
--- a/wright/Cargo.toml
+++ b/wright/Cargo.toml
@@ -24,8 +24,16 @@ derive_more = "0.99.17"
 unicode-ident = "1.0"
 codespan-reporting = "0.11.1"
 termcolor = "1.2.0"
+
+# Integers larger than 128 bits
 num = "0.4"
 
+# Portable (windows, mac, linux) file locking
+fs4 = { version = "0.7.0", features = ["sync"] }
+
+# Memory mapped files. 
+memmap2 = "0.9.3"
+
 [[bin]]
 name = "wright"
 test = false
diff --git a/wright/src/bin/wright.rs b/wright/src/bin/wright.rs
index 7fc4f2f8..4f548b1b 100644
--- a/wright/src/bin/wright.rs
+++ b/wright/src/bin/wright.rs
@@ -47,24 +47,6 @@ fn main() -> Result<()> {
     let cli = Cli::parse();
 
     match cli.command {
-        // Printing token debug information.
-        Some(Commands::Debug {
-            command: DebugCommands::Tokens { file, pretty },
-        }) => {
-            let source_str = fs::read_to_string(&file)?;
-            let source = SimpleFile::new(file.to_string_lossy(), &source_str);
-
-            if pretty {
-                Lexer::debug_pretty_print(&source)?;
-            } else {
-                for token in Lexer::new(&source_str) {
-                    println!("{}", token);
-                }
-            }
-
-            Ok(())
-        }
-
         // Start an interactive repl.
         Some(Commands::Repl) => repl::start(),
 
diff --git a/wright/src/filemap.rs b/wright/src/filemap.rs
index 7f167a4e..20d2a0ab 100644
--- a/wright/src/filemap.rs
+++ b/wright/src/filemap.rs
@@ -1,8 +1,20 @@
 //! Responsible for keeping track of different files added to the Wright build system.
 
-use codespan_reporting::files::{Files, SimpleFiles};
+use codespan_reporting::{files::{Files, SimpleFile}, term::Config, diagnostic::Diagnostic};
 use derive_more::Display;
-use std::path::PathBuf;
+use fs4::FileExt;
+use memmap2::Mmap;
+use termcolor::{ColorChoice, StandardStream};
+use std::{path::PathBuf, io, fs::File, sync::mpsc, thread, time::Duration};
+
+/// Rename import for clarity. 
+use codespan_reporting::files::Error as CodespanError;
+
+/// Convenience type alias. 
+type CodespanResult<T> = Result<T, CodespanError>;
+
+/// Amount of time before we should warn the user about locking the file taking too long. 
+const FILE_LOCK_WARNING_TIME: Duration = Duration::from_secs(5);
 
 /// Used to represent different file names used throughout this crate.
 #[derive(Debug, Display, Clone)]
@@ -20,8 +32,214 @@ pub enum FileName {
     None,
 }
 
+/// An immutable string that either references a source file in memory using an `&` reference or using a [Box]. 
+#[derive(Debug)]
+enum ImmutableString<'src> {
+    /// An immutable reference to an existing string. 
+    Reference(&'src str),
+
+    /// An owned immutable string. 
+    Owned(Box<str>),
+
+    /// A locked, memory mapped file from the OS. 
+    LockedFile {
+        /// The locked file that needs to be unlocked when this object is dropped. 
+        locked_file: File,
+        /// The memory locked file -- this is expected to be locked before
+        /// one creates it in the file 
+        mem_map: Mmap,
+    }
+}
+
 /// The file map that we use throughout the rest of this crate.
-pub type FileMap = SimpleFiles<FileName, String>;
+pub struct FileMap<'src> {
+    /// This is just a list of files we're keeping track of. 
+    /// This is identical to the current implementation of [codespan_reporting::files::SimpleFiles],
+    /// but we don't use theirs because we need to iterate over the [SimpleFile]s manually for various 
+    /// parts of the implementation (including the [Drop] implementation). 
+    inner: Vec<SimpleFile<FileName, ImmutableString<'src>>>
+}
+
+
+impl<'src> FileMap<'src> {
+    /// Construct a new empty [FileMap]. 
+    pub const fn new() -> Self {
+        FileMap { inner: Vec::new() }
+    }
+
+    /// Get a reference to a file from the internal [Vec] or return a [`CodespanError::FileMissing`] error. 
+    fn get(&self, file_id: <Self as Files<'src>>::FileId) -> CodespanResult<&SimpleFile<FileName, ImmutableString<'src>>> {
+        self.inner.get(file_id).ok_or(CodespanError::FileMissing)
+    }
+
+    /// Internal function to add a file to the vec. Public facing functions will need to do some conversion
+    /// and then call this. 
+    fn add(&mut self, name: FileName, source: ImmutableString<'src>) -> <Self as Files<'src>>::FileId {
+        // The file id is just the next index in the vec.
+        let file_id: usize = self.inner.len();
+        self.inner.push(SimpleFile::new(name, source));
+        file_id
+    }
+
+    /// Add a file (in the form of an owned string) to the file map. 
+    pub fn add_string(&mut self, name: FileName, source: String) -> <Self as Files<'src>>::FileId {
+        self.add(name, ImmutableString::Owned(source.into_boxed_str()))
+    }
+
+    /// Add a file (in the form of a string reference) to the file map. 
+    pub fn add_str_ref(&mut self, name: FileName, source: &'src str) -> <Self as Files<'src>>::FileId {
+        self.add(name, ImmutableString::Reference(source))
+    }
+
+    /// Add a file from the file system. This file will be 
+    /// opened with read permissions, locked, memory mapped, 
+    /// and then added to the file map. The file name in the memory map will be the [PathBuf] passed to this function. 
+    pub fn add_file(&mut self, path: PathBuf) -> io::Result<<Self as Files<'src>>::FileId> {
+        // Make a one-off enum here to use for channel messages.
+        enum ChannelMessage {
+            /// The file was successfully locked.
+            FileLocked(File),
+            /// There was an error locking the file. 
+            LockingError(io::Error),
+            /// File is taking a long time to lock. 
+            FiveSecondWarning,
+        }
+
+        // Open the file for reading. 
+        let file: File = File::open(&path)?;
+
+        // Create two threads and a mpsc channel for warning the user if 
+        // locking the file takes longer than 5 seconds. 
+        let (tx, rx) = mpsc::sync_channel::<ChannelMessage>(1);
+        let timout_tx = tx.clone();
+
+        // Thread to lock the file 
+        thread::spawn(move || { 
+            match file.lock_exclusive() {
+                Ok(_) => tx.send(ChannelMessage::FileLocked(file)),
+                Err(err) => tx.send(ChannelMessage::LockingError(err))
+            }
+        });
+
+        // Thread to warn user if it takes too long. 
+        thread::spawn(move || {
+            thread::sleep(FILE_LOCK_WARNING_TIME);
+            timout_tx.send(ChannelMessage::FiveSecondWarning);
+        });
 
-/// The file id type used to refer to files in the file map.
-pub type FileId = <FileMap as Files<'static>>::FileId;
+        // Use an infinite loop to make sure we recieve all the messages from the senders. 
+        loop {
+            match rx.recv() {
+                // Emit the diagnostic for the 5-second warning. 
+                Ok(ChannelMessage::FiveSecondWarning) => {
+                    // Get a lock on the standard out so that we don't get interrupted here. 
+                    let stdout = StandardStream::stdout(ColorChoice::Auto);
+                    let mut stdout = stdout.lock();
+                    // Make the diagnostic to show to the user.
+                    let message = format!("Getting a file lock on {} has taken more than {} seconds.", path.display(), FILE_LOCK_WARNING_TIME.as_secs());
+                    let diagnostic: Diagnostic<<FileMap<'src> as Files<'src>>::FileId> = Diagnostic::note().with_message(message);
+                    // Emit the diagnostic to the user.
+                    codespan_reporting::term::emit(&mut stdout, &Config::default(), self, &diagnostic)
+                        // Convert from the potential codespan error to a normal IO err. 
+                        .map_err(|cs_err: CodespanError| match cs_err {
+                            CodespanError::Io(io_err) => io_err,
+                            _ => unreachable!("We should not see any other codespan errors here, since we do not reference files in this diagnostic."),
+                        })?
+                }
+
+                // Handle any io errors locking the file by returning them. 
+                Ok(ChannelMessage::LockingError(io_err)) => return Err(io_err),
+
+                // Handle success by finishing adding the file to the FileMap. 
+                Ok(ChannelMessage::FileLocked(file)) => {
+                    // The file is now locked, we can memmory map it and add it ro the vec. 
+                    // SAFETY: The file should be locked at this point so undefined behaviour from concurrent 
+                    // modification is avoided. 
+                    let mem_map: Mmap = unsafe { 
+                        Mmap::map(&file)
+                            // Make sure we unlock the file if there's an issue memory mapping it. 
+                            .map_err(|err| {
+                                file.unlock();
+                                err
+                            })
+                    }?;
+
+                    // Double check that the file is valid utf-8. If not, return an IO error. 
+                    let raw_data: &[u8] = mem_map.as_ref();
+                    let as_str: Result<&str, std::str::Utf8Error> = std::str::from_utf8(raw_data);
+                    if as_str.is_err() {
+                        // The file is not valid for us so we should unlock it and return an error. 
+                        file.unlock();
+                        return Err(io::Error::new(io::ErrorKind::InvalidData, as_str.unwrap_err()));
+                    }
+
+                    // The file's contents are valid utf-8, add them to the file map. 
+                    let file_id: usize = self.inner.len();
+                    self.add(FileName::Real(path), ImmutableString::LockedFile { locked_file: file, mem_map });
+                    return Ok(file_id);
+                }
+
+                Err(_) => unreachable!("The reciever should never reach a state where both senders are closed."),
+            }    
+        }
+    }
+}
+
+impl<'src> Drop for FileMap<'src> {
+    fn drop(&mut self) {
+        // Unlock all files from the file system. 
+        for file in self.inner.iter() {
+            match file.source() {
+                // Locked and memory-mapped files need to be unlocked before dropping. 
+                ImmutableString::LockedFile { locked_file, .. } => {
+                    // Unlock the file to give back to the OS. 
+                    locked_file.unlock();
+                },
+
+                // All other types of file can drop normally. 
+                _ => {}
+            }
+        }
+    }
+}
+
+/// The implementation here is basically identical to the one for [codespan_reporting::files::SimpleFiles]. 
+impl<'src> Files<'src> for FileMap<'src> {
+    /// File IDs here are just indices into [FileMap]'s internal [Vec]. 
+    type FileId = usize;
+
+    type Name = FileName;
+
+    type Source = &'src str;
+
+    fn name(&self, id: Self::FileId) -> Result<Self::Name, codespan_reporting::files::Error> {
+        Ok(self.get(id)?.name().clone())
+    }
+
+    fn source(&'src self, id: Self::FileId) -> Result<Self::Source, codespan_reporting::files::Error> {
+        Ok(self.get(id)?.source().as_ref())
+    }
+
+    fn line_index(&self, id: Self::FileId, byte_index: usize) -> Result<usize, codespan_reporting::files::Error> {
+        self.get(id)?.line_index((), byte_index)
+    }
+
+    fn line_range(&self, id: Self::FileId, line_index: usize) -> Result<std::ops::Range<usize>, codespan_reporting::files::Error> {
+        self.get(id)?.line_range((), line_index)
+    }
+}
+
+impl<'src> AsRef<str> for ImmutableString<'src> {
+    fn as_ref(&self) -> &str {
+        match self {
+            ImmutableString::Reference(str) => str,
+            ImmutableString::Owned(str) => &str,
+            ImmutableString::LockedFile { mem_map, .. } => {
+                // Get a direct reference to the data that is in the memory map. 
+                let raw_data: &[u8] = mem_map.as_ref();
+                // SAFETY: UTF-8 validity is checked when the file is added to the file map. 
+                unsafe { std::str::from_utf8_unchecked(raw_data) }
+            }
+        }
+    }
+}
diff --git a/wright/src/parser.rs b/wright/src/parser.rs
index 3779b817..a1c20b2e 100644
--- a/wright/src/parser.rs
+++ b/wright/src/parser.rs
@@ -1,7 +1,9 @@
 //! Parsers module, for all the parsers implemented by wright and necessary to parse wright source code.
 
-pub mod ast;
-pub mod error;
+// pub mod ast;
+// pub mod error;
+// pub mod state;
+// pub mod util;
+
+pub mod fragment;
 pub mod lexer;
-pub mod state;
-pub mod util;
diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
new file mode 100644
index 00000000..49755154
--- /dev/null
+++ b/wright/src/parser/fragment.rs
@@ -0,0 +1,64 @@
+//! [Fragment] struct and implementation for dealing with fragments of source code.
+
+use std::str::Chars;
+
+/// A fragment of source code. 
+#[derive(Clone, Copy, Debug)]
+pub struct Fragment<'src> {
+    /// Fragments are represented using direct string references into the source file itself. 
+    pub inner: &'src str
+}
+
+impl<'src> Fragment<'src> {
+    /// Get the length (in bytes) of this fragment. 
+    pub const fn len(&self) -> usize {
+        self.inner.len()
+    }
+
+    /// Return true if this fragment overlaps at all with the other (either one contains the start of the other, 
+    /// by pointer).
+    pub fn overlaps(&self, other: &Self) -> bool {
+        // Get the pointer to the start of the string. 
+        let (start, len) = (self.inner.as_ptr(), self.len());
+        // Get a pointer just past the end of the string. 
+        // SAFETY: the resulting pointer is guarunteed to point at one byte past the end of the string. 
+        let end = unsafe { start.add(len) };
+
+        // Do the same thing for the other fragment. 
+        let (other_start, len) = (other.inner.as_ptr(), other.len());
+        let other_end = unsafe { other_start.add(len) };
+
+        // Check bounds. 
+        (start <= other_start && other_start < end) || (other_start <= start && start < other_end)
+    }
+
+    /// Split this fragment into two sub fragments, with the first one being `bytes` long and the second containing the
+    /// rest of this fragment. 
+    pub fn split(&self, bytes: usize) -> (Self, Self) {
+        (Self { inner: &self.inner[..bytes] }, Self { inner: &self.inner[bytes..]})
+    }
+
+    /// Get an iterator over the characters in this fragment. 
+    pub fn chars(&self) -> Chars<'src> {
+        self.inner.chars()
+    }
+}
+
+
+#[cfg(test)]
+mod tests {
+    use crate::parser::fragment::Fragment;
+
+    #[test]
+    fn test_overlap() {
+        let a = Fragment { inner: "Test string" };
+        let b = Fragment { inner: &a.inner[3..] };
+        let c = Fragment { inner: &a.inner[..a.len()-3] };
+        let d = Fragment { inner: "other string" };
+
+        assert!(a.overlaps(&b));
+        assert!(b.overlaps(&c));
+        assert!(c.overlaps(&a));
+        assert!(!a.overlaps(&d));
+    }
+}
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 2d34d58b..41ff4f60 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -1,507 +1,14 @@
-//! The wright lexer. This module is responsible for lexical analysis and initial processing of source code.
+//! First pass lexer that gets run on the source code and returns a series of tokens with their associated [Fragment]s.
+//! 
+//! Note that this will strip out comments and whitespace, returning only fragments that match one of the paterns 
+//! defined for tokens. 
 
-mod pretty_print;
-pub mod tokens;
+use super::fragment::Fragment;
 
-use std::{
-    iter::{FusedIterator, Peekable},
-    str::CharIndices,
-};
-
-use self::tokens::{CommentTy, Token, TokenTy};
-
-/// Lexical analyzer for wright code. This struct host functions that produce tokens from wright source.
-#[derive(Debug, Clone)]
-pub struct Lexer<'a> {
-    /// Iterator over the indexed input characters tied to the lifetime of the source code.
-    iterator: Peekable<CharIndices<'a>>,
-    /// The source code passed to the lexer. This is used to check for keywords.
-    source: &'a str,
-}
-
-impl<'a> Lexer<'a> {
-    /// Create a new lexer that iterates on a given source string.
-    pub fn new(source: &'a str) -> Self {
-        Lexer {
-            iterator: source.char_indices().peekable(),
-            source,
-        }
-    }
-}
-
-impl<'a> Iterator for Lexer<'a> {
-    type Item = Token;
-
-    fn next(&mut self) -> Option<Token> {
-        // Get the next character out of the iterator.
-        let (start_index, next) = self.iterator.next()?;
-
-        // Handle single character tokens first.
-        let single_char_tokens = [
-            ('(', TokenTy::LeftParen),
-            (')', TokenTy::RightParen),
-            ('[', TokenTy::LeftSquare),
-            (']', TokenTy::RightSquare),
-            ('{', TokenTy::LeftBracket),
-            ('}', TokenTy::RightBracket),
-            ('@', TokenTy::At),
-            (';', TokenTy::Semi),
-            ('?', TokenTy::Question),
-            (',', TokenTy::Comma),
-            ('#', TokenTy::Pound),
-            ('$', TokenTy::Dollar),
-        ];
-
-        for (c, variant) in single_char_tokens {
-            if next == c {
-                return Some(Token { variant, length: 1 });
-            }
-        }
-
-        // Next handle tokens that can possibly be followed by an equal sign.
-        let possible_eq_upgrades = [
-            ('!', TokenTy::Bang, TokenTy::BangEq),
-            ('%', TokenTy::Mod, TokenTy::ModEq),
-            ('^', TokenTy::Xor, TokenTy::XorEq),
-            ('*', TokenTy::Star, TokenTy::StarEq),
-            ('+', TokenTy::Plus, TokenTy::PlusEq),
-        ];
-
-        for (c, no_eq, with_eq) in possible_eq_upgrades {
-            if next == c {
-                return match self.iterator.next_if(|&(_, x)| x == '=') {
-                    Some(_) => Some(Token {
-                        variant: with_eq,
-                        length: 2,
-                    }),
-                    None => Some(Token {
-                        variant: no_eq,
-                        length: 1,
-                    }),
-                };
-            }
-        }
-
-        // Next handle tokens that can be doubled or have an equals sign.
-        let possible_eq_or_double = [
-            ('&', TokenTy::And, TokenTy::AndEq, TokenTy::AndAnd),
-            ('|', TokenTy::Or, TokenTy::OrEq, TokenTy::OrOr),
-            ('<', TokenTy::Lt, TokenTy::LtEq, TokenTy::ShiftLeft),
-            ('>', TokenTy::Gt, TokenTy::GtEq, TokenTy::ShiftRight),
-            (':', TokenTy::Colon, TokenTy::ColonEq, TokenTy::ColonColon),
-            ('/', TokenTy::Div, TokenTy::DivEq, TokenTy::DivDiv),
-        ];
-
-        for (c, alone, with_eq, doubled) in possible_eq_or_double {
-            if next == c {
-                return match self.iterator.next_if(|&(_, x)| x == '=' || x == c) {
-                    // Followed by `=`
-                    Some((_, '=')) => Some(Token {
-                        variant: with_eq,
-                        length: 2,
-                    }),
-
-                    // Followed by itself.
-                    Some(_) => Some(Token {
-                        variant: doubled,
-                        length: 2,
-                    }),
-
-                    // Single char token
-                    None => Some(Token {
-                        variant: alone,
-                        length: 1,
-                    }),
-                };
-            }
-        }
-
-        // Next deal with arrows
-        let arrows = [
-            ('-', TokenTy::Minus, TokenTy::MinusEq, TokenTy::SingleArrow),
-            ('=', TokenTy::Eq, TokenTy::EqEq, TokenTy::DoubleArrow),
-            ('~', TokenTy::Tilde, TokenTy::TildeEq, TokenTy::TildeArrow),
-        ];
-
-        for (c, alone, with_eq, as_arrow) in arrows {
-            if next == c {
-                return match self.iterator.next_if(|&(_, x)| x == '=' || x == '>') {
-                    Some((_, '=')) => Some(Token {
-                        variant: with_eq,
-                        length: 2,
-                    }),
-                    Some((_, '>')) => Some(Token {
-                        variant: as_arrow,
-                        length: 2,
-                    }),
-                    None => Some(Token {
-                        variant: alone,
-                        length: 1,
-                    }),
-                    _ => unreachable!(),
-                };
-            }
-        }
-
-        // Dot and range operators.
-        if next == '.' {
-            return match self.iterator.next_if(|&(_, x)| x == '.') {
-                None => Some(Token {
-                    variant: TokenTy::Dot,
-                    length: 1,
-                }),
-                Some(_) => match self.iterator.next_if(|&(_, x)| x == '=') {
-                    None => Some(Token {
-                        variant: TokenTy::Range,
-                        length: 2,
-                    }),
-                    Some(_) => Some(Token {
-                        variant: TokenTy::RangeInclusive,
-                        length: 3,
-                    }),
-                },
-            };
-        }
-
-        // Whitespace.
-        if next.is_whitespace() {
-            // Accumulate the number of bytes of whitespace consumed.
-            let mut acc = next.len_utf8();
-            // Use while-let instead of take-while to avoid consuming the whole iterator.
-            while let Some((_, consumed)) = self.iterator.next_if(|&(_, x)| x.is_whitespace()) {
-                acc += consumed.len_utf8();
-            }
-
-            return Some(Token {
-                variant: TokenTy::Whitespace,
-                length: acc,
-            });
-        }
-
-        // Identifiers
-        if unicode_ident::is_xid_start(next) || next == '_' {
-            // Accumulate the number of bytes consumed in the identifier.
-            let mut acc = next.len_utf8();
-            // Consume the rest of the identifier.
-            while let Some((_, consumed)) = self
-                .iterator
-                .next_if(|&(_, x)| unicode_ident::is_xid_continue(x))
-            {
-                acc += consumed.len_utf8();
-            }
-
-            // Get the matching source code to check for reserved words.
-            let range = start_index..start_index + acc;
-            let matching_source = &self.source[range];
-
-            // Match on reserved words.
-            let variant: TokenTy = match matching_source {
-                // Declaration keywords
-                "class" => TokenTy::Class,
-                "struct" => TokenTy::Struct,
-                "record" => TokenTy::Record,
-                "trait" => TokenTy::Trait,
-                "func" => TokenTy::Func,
-                "enum" => TokenTy::Enum,
-                "union" => TokenTy::Union,
-                "module" => TokenTy::Module,
-                "import" => TokenTy::Import,
-                "implement" => TokenTy::Implement,
-                "represent" => TokenTy::Represent,
-
-                // Visibility keywords
-                "public" => TokenTy::Public,
-                "package" => TokenTy::Package,
-                "private" => TokenTy::Private,
-
-                // Boolean literals
-                "true" => TokenTy::True,
-                "false" => TokenTy::False,
-
-                // Other keywords.
-                "constraint" => TokenTy::Constraint,
-                "constrain" => TokenTy::Constrain,
-                "relation" => TokenTy::Relation,
-                "unsafe" => TokenTy::Unsafe,
-                "unchecked" => TokenTy::Unchecked,
-                "lifetime" => TokenTy::Lifetime,
-                "outlives" => TokenTy::Outlives,
-                "Self" => TokenTy::SelfUpper,
-                "self" => TokenTy::SelfLower,
-                "type" => TokenTy::Type,
-                "const" => TokenTy::Const,
-                "var" => TokenTy::Var,
-                "if" => TokenTy::If,
-                "else" => TokenTy::Else,
-                "match" => TokenTy::Match,
-                "is" => TokenTy::Is,
-                "as" => TokenTy::As,
-                "on" => TokenTy::On,
-                "in" => TokenTy::In,
-                "not" => TokenTy::Not,
-                "dyn" => TokenTy::Dyn,
-                "try" => TokenTy::Try,
-
-                _ => TokenTy::Identifier,
-            };
-
-            return Some(Token {
-                variant,
-                length: acc,
-            });
-        }
-
-        // Numerical literals.
-        if next.is_ascii_digit() {
-            // Accumulate the number of bytes consumed in the numeric literal.
-            // All ascii is 1 byte wide so avoid the extra call to `.len_utf8()`.
-            let mut acc = 1;
-            // Track the radix
-            let mut radix = 10;
-
-            // Change the radix if necessary
-            if next == '0' {
-                if let Some((_, prefix)) = self
-                    .iterator
-                    .next_if(|(_, x)| ['x', 'o', 'b', 'X', 'B'].contains(x))
-                {
-                    acc += 1;
-
-                    radix = match prefix {
-                        'x' | 'X' => 16,
-                        'b' | 'B' => 2,
-                        'o' => 8,
-                        _ => unreachable!(),
-                    };
-                }
-            }
-
-            // Consume the rest of the integer literal.
-            while self
-                .iterator
-                .next_if(|&(_, x)| x.is_digit(radix) || x == '_')
-                .is_some()
-            {
-                // All accepted characters should be ascii, so we can just simplify `.len_utf8()` to 1.
-                acc += 1;
-            }
-
-            return Some(Token {
-                variant: TokenTy::IntegerLit,
-                length: acc,
-            });
-        }
-
-        // String and Character literals.
-        if ['\'', '"', '`'].contains(&next) {
-            // Accumulator to track number of bytes consumed.
-            let mut acc: usize = 1;
-            let mut is_terminated = false;
-
-            // Consume characters until the end of the literal
-            while let Some((_, consumed)) = self.iterator.next() {
-                acc += consumed.len_utf8();
-
-                match consumed {
-                    // Ending character is the same as starting character.
-                    // Escapes should all be handled, so don't worry about this being escaped.
-                    _ if consumed == next => {
-                        is_terminated = true;
-                        break;
-                    }
-
-                    // Escaped pattern.
-                    // Only worry about escaped terminators here, since all other escaped
-                    // patterns can be dealt with later.
-                    '\\' => {
-                        // Consume the escaped character regardless of what it is.
-                        // It will always be part of the quoted literal.
-                        if let Some((_, escaped)) = self.iterator.next() {
-                            acc += escaped.len_utf8();
-                        }
-                    }
-
-                    // Do nothing for non-escaped chars since the quoted literal continues
-                    // and we have already recorded the consumed bytes.
-                    _ => {}
-                }
-            }
-
-            // We have finished consuming the literal -- make sure we produce the
-            // right variant
-            return match next {
-                '\'' => Some(Token {
-                    variant: TokenTy::CharLit { is_terminated },
-                    length: acc,
-                }),
-                _ => Some(Token {
-                    variant: TokenTy::StringLit {
-                        is_format: next == '`',
-                        is_terminated,
-                    },
-                    length: acc,
-                }),
-            };
-        }
-
-        // Comments.
-        if next == '#' {
-            // Use accumulator to track number of bytes consumed.
-            let mut acc = 1;
-
-            // There are a few variants as follows.
-            // `#...` - single line comment
-            // `#*...*#` - multiline comment
-            // `##...` - single line inner doc comment
-            // `##!...` - single line outer doc comment
-            // `#**...*#` - multiline inner doc comment
-            // `#*!...*#` - multiline outer doc comment
-            // If a multiline comment is not terminated by the end of the file then just mark it as such in the
-            // produced token. A seperate token error handling layer will raise that outside of this function.
-
-            // Handle multiline comments
-            if self.iterator.next_if(|&(_, x)| x == '*').is_some() {
-                acc += 1;
-
-                // Check if it's a doc comment.
-                let comment_type = match self.iterator.next_if(|&(_, x)| x == '*' || x == '!') {
-                    Some((_, '*')) => {
-                        acc += 1;
-                        CommentTy::InnerDoc
-                    }
-
-                    Some((_, '!')) => {
-                        acc += 1;
-                        CommentTy::OuterDoc
-                    }
-
-                    None => CommentTy::Normal,
-
-                    _ => unreachable!(),
-                };
-
-                // Read the rest of the multi-line comment
-                while let Some((_, consumed)) = self.iterator.next() {
-                    acc += consumed.len_utf8();
-                    if consumed == '*' && matches!(self.iterator.peek(), Some((_, '#'))) {
-                        acc += 1;
-                        return Some(Token {
-                            variant: TokenTy::MultilineComment {
-                                comment_type,
-                                is_terminated: true,
-                            },
-                            length: acc,
-                        });
-                    }
-                }
-
-                // If we hit the end, the comment is not terminated.
-                return Some(Token {
-                    variant: TokenTy::MultilineComment {
-                        comment_type,
-                        is_terminated: false,
-                    },
-                    length: acc,
-                });
-            }
-
-            // Handle single line comment.
-            let mut comment_type = CommentTy::Normal;
-
-            // Check for inner doc comment
-            if self.iterator.next_if(|&(_, x)| x == '#').is_some() {
-                acc += 1;
-                comment_type = CommentTy::InnerDoc;
-
-                // Check for outer doc comment
-                if self.iterator.next_if(|&(_, x)| x == '!').is_some() {
-                    acc += 1;
-                    comment_type = CommentTy::OuterDoc;
-                }
-            }
-
-            // Read to end of line/file for rest of comment. Include line ending in consumed bytes.
-            for (_, consumed) in self.iterator.by_ref() {
-                acc += consumed.len_utf8();
-                if consumed == '\n' {
-                    break;
-                }
-            }
-
-            return Some(Token {
-                variant: TokenTy::SingleLineComment { comment_type },
-                length: acc,
-            });
-        }
-
-        // If we haven't matched by this point, return an unknown token.
-        Some(Token {
-            variant: TokenTy::Unknown,
-            length: next.len_utf8(),
-        })
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        // Get the size hint of the internal iterator.
-        let (inner_lower, upper) = self.iterator.size_hint();
-        // If there are any characters left, then there is at least one token remaining.
-        ((inner_lower > 0) as usize, upper)
-    }
-}
-
-impl<'a> FusedIterator for Lexer<'a> {}
-
-/// A token with an index in a piece of source code.
-#[derive(Copy, Clone, Debug)]
-pub struct IndexedToken {
-    /// The byte index into the source code that this token starts on.
-    pub index: usize,
-    /// The token itself.
-    pub token: Token,
-}
-
-/// An iterator over the tokens in the source code with byte indices attached.
-#[derive(Debug, Clone)]
-pub struct IndexedLexer<'src> {
-    /// The current index in source code -- the number of bytes currently consumed by the iterator.
-    pub index: usize,
-    /// The underlying lexer iterator.
-    lexer: Lexer<'src>,
-}
-
-impl<'src> IndexedLexer<'src> {
-    /// Construct a new indexed lexer.
-    pub fn new(source: &'src str) -> Self {
-        Self {
-            index: 0,
-            lexer: Lexer::new(source),
-        }
-    }
-}
-
-impl<'a> Iterator for IndexedLexer<'a> {
-    type Item = IndexedToken;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        // Pull a token from the iterator.
-        let token = self.lexer.next()?;
-
-        // If available, add the current index to it to return.
-        let indexed_token = IndexedToken {
-            index: self.index,
-            token,
-        };
-
-        // Update the current index with the length of the token.
-        self.index += token.length;
-
-        // Return indexed token
-        Some(indexed_token)
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        self.lexer.size_hint()
-    }
+/// The 
+#[derive(Debug)]
+pub struct Lexer<'src> {
+    /// The remaining source code that has not been processed and returned as a token from the iterator yet. 
+    pub remaining: Fragment<'src>,
 }
 
-impl<'a> FusedIterator for IndexedLexer<'a> {}
diff --git a/wright/src/parser/ast.rs b/wright/src/parser/old/ast.rs
similarity index 100%
rename from wright/src/parser/ast.rs
rename to wright/src/parser/old/ast.rs
diff --git a/wright/src/parser/ast/declaration.rs b/wright/src/parser/old/ast/declaration.rs
similarity index 100%
rename from wright/src/parser/ast/declaration.rs
rename to wright/src/parser/old/ast/declaration.rs
diff --git a/wright/src/parser/ast/declaration/class.rs b/wright/src/parser/old/ast/declaration/class.rs
similarity index 100%
rename from wright/src/parser/ast/declaration/class.rs
rename to wright/src/parser/old/ast/declaration/class.rs
diff --git a/wright/src/parser/ast/declaration/enum.rs b/wright/src/parser/old/ast/declaration/enum.rs
similarity index 100%
rename from wright/src/parser/ast/declaration/enum.rs
rename to wright/src/parser/old/ast/declaration/enum.rs
diff --git a/wright/src/parser/ast/declaration/function.rs b/wright/src/parser/old/ast/declaration/function.rs
similarity index 100%
rename from wright/src/parser/ast/declaration/function.rs
rename to wright/src/parser/old/ast/declaration/function.rs
diff --git a/wright/src/parser/ast/declaration/generics.rs b/wright/src/parser/old/ast/declaration/generics.rs
similarity index 100%
rename from wright/src/parser/ast/declaration/generics.rs
rename to wright/src/parser/old/ast/declaration/generics.rs
diff --git a/wright/src/parser/ast/declaration/import.rs b/wright/src/parser/old/ast/declaration/import.rs
similarity index 100%
rename from wright/src/parser/ast/declaration/import.rs
rename to wright/src/parser/old/ast/declaration/import.rs
diff --git a/wright/src/parser/ast/declaration/module.rs b/wright/src/parser/old/ast/declaration/module.rs
similarity index 100%
rename from wright/src/parser/ast/declaration/module.rs
rename to wright/src/parser/old/ast/declaration/module.rs
diff --git a/wright/src/parser/ast/declaration/type.rs b/wright/src/parser/old/ast/declaration/type.rs
similarity index 100%
rename from wright/src/parser/ast/declaration/type.rs
rename to wright/src/parser/old/ast/declaration/type.rs
diff --git a/wright/src/parser/ast/declaration/union.rs b/wright/src/parser/old/ast/declaration/union.rs
similarity index 100%
rename from wright/src/parser/ast/declaration/union.rs
rename to wright/src/parser/old/ast/declaration/union.rs
diff --git a/wright/src/parser/ast/declaration/visibility.rs b/wright/src/parser/old/ast/declaration/visibility.rs
similarity index 100%
rename from wright/src/parser/ast/declaration/visibility.rs
rename to wright/src/parser/old/ast/declaration/visibility.rs
diff --git a/wright/src/parser/ast/declaration/where_clause.rs b/wright/src/parser/old/ast/declaration/where_clause.rs
similarity index 100%
rename from wright/src/parser/ast/declaration/where_clause.rs
rename to wright/src/parser/old/ast/declaration/where_clause.rs
diff --git a/wright/src/parser/ast/expression.rs b/wright/src/parser/old/ast/expression.rs
similarity index 100%
rename from wright/src/parser/ast/expression.rs
rename to wright/src/parser/old/ast/expression.rs
diff --git a/wright/src/parser/ast/expression/block.rs b/wright/src/parser/old/ast/expression/block.rs
similarity index 100%
rename from wright/src/parser/ast/expression/block.rs
rename to wright/src/parser/old/ast/expression/block.rs
diff --git a/wright/src/parser/ast/expression/literal.rs b/wright/src/parser/old/ast/expression/literal.rs
similarity index 100%
rename from wright/src/parser/ast/expression/literal.rs
rename to wright/src/parser/old/ast/expression/literal.rs
diff --git a/wright/src/parser/ast/expression/literal/boolean.rs b/wright/src/parser/old/ast/expression/literal/boolean.rs
similarity index 100%
rename from wright/src/parser/ast/expression/literal/boolean.rs
rename to wright/src/parser/old/ast/expression/literal/boolean.rs
diff --git a/wright/src/parser/ast/expression/literal/character.rs b/wright/src/parser/old/ast/expression/literal/character.rs
similarity index 100%
rename from wright/src/parser/ast/expression/literal/character.rs
rename to wright/src/parser/old/ast/expression/literal/character.rs
diff --git a/wright/src/parser/ast/expression/literal/escapes.rs b/wright/src/parser/old/ast/expression/literal/escapes.rs
similarity index 100%
rename from wright/src/parser/ast/expression/literal/escapes.rs
rename to wright/src/parser/old/ast/expression/literal/escapes.rs
diff --git a/wright/src/parser/ast/expression/literal/integer.rs b/wright/src/parser/old/ast/expression/literal/integer.rs
similarity index 100%
rename from wright/src/parser/ast/expression/literal/integer.rs
rename to wright/src/parser/old/ast/expression/literal/integer.rs
diff --git a/wright/src/parser/ast/expression/literal/string.rs b/wright/src/parser/old/ast/expression/literal/string.rs
similarity index 100%
rename from wright/src/parser/ast/expression/literal/string.rs
rename to wright/src/parser/old/ast/expression/literal/string.rs
diff --git a/wright/src/parser/ast/expression/parentheses.rs b/wright/src/parser/old/ast/expression/parentheses.rs
similarity index 100%
rename from wright/src/parser/ast/expression/parentheses.rs
rename to wright/src/parser/old/ast/expression/parentheses.rs
diff --git a/wright/src/parser/ast/expression/primary.rs b/wright/src/parser/old/ast/expression/primary.rs
similarity index 100%
rename from wright/src/parser/ast/expression/primary.rs
rename to wright/src/parser/old/ast/expression/primary.rs
diff --git a/wright/src/parser/ast/identifier.rs b/wright/src/parser/old/ast/identifier.rs
similarity index 100%
rename from wright/src/parser/ast/identifier.rs
rename to wright/src/parser/old/ast/identifier.rs
diff --git a/wright/src/parser/ast/metadata.rs b/wright/src/parser/old/ast/metadata.rs
similarity index 100%
rename from wright/src/parser/ast/metadata.rs
rename to wright/src/parser/old/ast/metadata.rs
diff --git a/wright/src/parser/ast/path.rs b/wright/src/parser/old/ast/path.rs
similarity index 100%
rename from wright/src/parser/ast/path.rs
rename to wright/src/parser/old/ast/path.rs
diff --git a/wright/src/parser/ast/statement.rs b/wright/src/parser/old/ast/statement.rs
similarity index 100%
rename from wright/src/parser/ast/statement.rs
rename to wright/src/parser/old/ast/statement.rs
diff --git a/wright/src/parser/ast/statement/bind.rs b/wright/src/parser/old/ast/statement/bind.rs
similarity index 100%
rename from wright/src/parser/ast/statement/bind.rs
rename to wright/src/parser/old/ast/statement/bind.rs
diff --git a/wright/src/parser/ast/types.rs b/wright/src/parser/old/ast/types.rs
similarity index 100%
rename from wright/src/parser/ast/types.rs
rename to wright/src/parser/old/ast/types.rs
diff --git a/wright/src/parser/error.rs b/wright/src/parser/old/error.rs
similarity index 100%
rename from wright/src/parser/error.rs
rename to wright/src/parser/old/error.rs
diff --git a/wright/src/parser/old/lexer.rs b/wright/src/parser/old/lexer.rs
new file mode 100644
index 00000000..baf58553
--- /dev/null
+++ b/wright/src/parser/old/lexer.rs
@@ -0,0 +1,511 @@
+//! The wright lexer. This module is responsible for lexical analysis and initial processing of source code.
+//! 
+//! This is implemented here using an iterator that looks up the next character from the input using a `const`-defined
+//! lexer structure definition. This can be found in [definition]. 
+
+pub mod tokens;
+mod definition;
+// mod pretty_print;
+
+use std::{
+    iter::{FusedIterator, Peekable},
+    str::CharIndices,
+};
+
+use self::tokens::{CommentTy, Token, TokenTy};
+
+/// Lexical analyzer for wright code. This struct host functions that produce tokens from wright source.
+#[derive(Debug, Clone)]
+pub struct Lexer<'a> {
+    /// Iterator over the indexed input characters tied to the lifetime of the source code.
+    iterator: Peekable<CharIndices<'a>>,
+    /// The source code passed to the lexer. This is used to check for keywords.
+    source: &'a str,
+}
+
+impl<'a> Lexer<'a> {
+    /// Create a new lexer that iterates on a given source string.
+    pub fn new(source: &'a str) -> Self {
+        Lexer {
+            iterator: source.char_indices().peekable(),
+            source,
+        }
+    }
+}
+
+impl<'a> Iterator for Lexer<'a> {
+    type Item = Token;
+
+    fn next(&mut self) -> Option<Token> {
+        // Get the next character out of the iterator.
+        let (start_index, next) = self.iterator.next()?;
+
+        // Handle single character tokens first.
+        let single_char_tokens = [
+            ('(', TokenTy::LeftParen),
+            (')', TokenTy::RightParen),
+            ('[', TokenTy::LeftSquare),
+            (']', TokenTy::RightSquare),
+            ('{', TokenTy::LeftBracket),
+            ('}', TokenTy::RightBracket),
+            ('@', TokenTy::At),
+            (';', TokenTy::Semi),
+            ('?', TokenTy::Question),
+            (',', TokenTy::Comma),
+            ('#', TokenTy::Pound),
+            ('$', TokenTy::Dollar),
+        ];
+
+        for (c, variant) in single_char_tokens {
+            if next == c {
+                return Some(Token { variant, length: 1 });
+            }
+        }
+
+        // Next handle tokens that can possibly be followed by an equal sign.
+        let possible_eq_upgrades = [
+            ('!', TokenTy::Bang, TokenTy::BangEq),
+            ('%', TokenTy::Mod, TokenTy::ModEq),
+            ('^', TokenTy::Xor, TokenTy::XorEq),
+            ('*', TokenTy::Star, TokenTy::StarEq),
+            ('+', TokenTy::Plus, TokenTy::PlusEq),
+        ];
+
+        for (c, no_eq, with_eq) in possible_eq_upgrades {
+            if next == c {
+                return match self.iterator.next_if(|&(_, x)| x == '=') {
+                    Some(_) => Some(Token {
+                        variant: with_eq,
+                        length: 2,
+                    }),
+                    None => Some(Token {
+                        variant: no_eq,
+                        length: 1,
+                    }),
+                };
+            }
+        }
+
+        // Next handle tokens that can be doubled or have an equals sign.
+        let possible_eq_or_double = [
+            ('&', TokenTy::And, TokenTy::AndEq, TokenTy::AndAnd),
+            ('|', TokenTy::Or, TokenTy::OrEq, TokenTy::OrOr),
+            ('<', TokenTy::Lt, TokenTy::LtEq, TokenTy::ShiftLeft),
+            ('>', TokenTy::Gt, TokenTy::GtEq, TokenTy::ShiftRight),
+            (':', TokenTy::Colon, TokenTy::ColonEq, TokenTy::ColonColon),
+            ('/', TokenTy::Div, TokenTy::DivEq, TokenTy::DivDiv),
+        ];
+
+        for (c, alone, with_eq, doubled) in possible_eq_or_double {
+            if next == c {
+                return match self.iterator.next_if(|&(_, x)| x == '=' || x == c) {
+                    // Followed by `=`
+                    Some((_, '=')) => Some(Token {
+                        variant: with_eq,
+                        length: 2,
+                    }),
+
+                    // Followed by itself.
+                    Some(_) => Some(Token {
+                        variant: doubled,
+                        length: 2,
+                    }),
+
+                    // Single char token
+                    None => Some(Token {
+                        variant: alone,
+                        length: 1,
+                    }),
+                };
+            }
+        }
+
+        // Next deal with arrows
+        let arrows = [
+            ('-', TokenTy::Minus, TokenTy::MinusEq, TokenTy::SingleArrow),
+            ('=', TokenTy::Eq, TokenTy::EqEq, TokenTy::DoubleArrow),
+            ('~', TokenTy::Tilde, TokenTy::TildeEq, TokenTy::TildeArrow),
+        ];
+
+        for (c, alone, with_eq, as_arrow) in arrows {
+            if next == c {
+                return match self.iterator.next_if(|&(_, x)| x == '=' || x == '>') {
+                    Some((_, '=')) => Some(Token {
+                        variant: with_eq,
+                        length: 2,
+                    }),
+                    Some((_, '>')) => Some(Token {
+                        variant: as_arrow,
+                        length: 2,
+                    }),
+                    None => Some(Token {
+                        variant: alone,
+                        length: 1,
+                    }),
+                    _ => unreachable!(),
+                };
+            }
+        }
+
+        // Dot and range operators.
+        if next == '.' {
+            return match self.iterator.next_if(|&(_, x)| x == '.') {
+                None => Some(Token {
+                    variant: TokenTy::Dot,
+                    length: 1,
+                }),
+                Some(_) => match self.iterator.next_if(|&(_, x)| x == '=') {
+                    None => Some(Token {
+                        variant: TokenTy::Range,
+                        length: 2,
+                    }),
+                    Some(_) => Some(Token {
+                        variant: TokenTy::RangeInclusive,
+                        length: 3,
+                    }),
+                },
+            };
+        }
+
+        // Whitespace.
+        if next.is_whitespace() {
+            // Accumulate the number of bytes of whitespace consumed.
+            let mut acc = next.len_utf8();
+            // Use while-let instead of take-while to avoid consuming the whole iterator.
+            while let Some((_, consumed)) = self.iterator.next_if(|&(_, x)| x.is_whitespace()) {
+                acc += consumed.len_utf8();
+            }
+
+            return Some(Token {
+                variant: TokenTy::Whitespace,
+                length: acc,
+            });
+        }
+
+        // Identifiers
+        if unicode_ident::is_xid_start(next) || next == '_' {
+            // Accumulate the number of bytes consumed in the identifier.
+            let mut acc = next.len_utf8();
+            // Consume the rest of the identifier.
+            while let Some((_, consumed)) = self
+                .iterator
+                .next_if(|&(_, x)| unicode_ident::is_xid_continue(x))
+            {
+                acc += consumed.len_utf8();
+            }
+
+            // Get the matching source code to check for reserved words.
+            let range = start_index..start_index + acc;
+            let matching_source = &self.source[range];
+
+            // Match on reserved words.
+            let variant: TokenTy = match matching_source {
+                // Declaration keywords
+                "class" => TokenTy::Class,
+                "struct" => TokenTy::Struct,
+                "record" => TokenTy::Record,
+                "trait" => TokenTy::Trait,
+                "func" => TokenTy::Func,
+                "enum" => TokenTy::Enum,
+                "union" => TokenTy::Union,
+                "module" => TokenTy::Module,
+                "import" => TokenTy::Import,
+                "implement" => TokenTy::Implement,
+                "represent" => TokenTy::Represent,
+
+                // Visibility keywords
+                "public" => TokenTy::Public,
+                "package" => TokenTy::Package,
+                "private" => TokenTy::Private,
+
+                // Boolean literals
+                "true" => TokenTy::True,
+                "false" => TokenTy::False,
+
+                // Other keywords.
+                "constraint" => TokenTy::Constraint,
+                "constrain" => TokenTy::Constrain,
+                "relation" => TokenTy::Relation,
+                "unsafe" => TokenTy::Unsafe,
+                "unchecked" => TokenTy::Unchecked,
+                "lifetime" => TokenTy::Lifetime,
+                "outlives" => TokenTy::Outlives,
+                "Self" => TokenTy::SelfUpper,
+                "self" => TokenTy::SelfLower,
+                "type" => TokenTy::Type,
+                "const" => TokenTy::Const,
+                "var" => TokenTy::Var,
+                "if" => TokenTy::If,
+                "else" => TokenTy::Else,
+                "match" => TokenTy::Match,
+                "is" => TokenTy::Is,
+                "as" => TokenTy::As,
+                "on" => TokenTy::On,
+                "in" => TokenTy::In,
+                "not" => TokenTy::Not,
+                "dyn" => TokenTy::Dyn,
+                "try" => TokenTy::Try,
+
+                _ => TokenTy::Identifier,
+            };
+
+            return Some(Token {
+                variant,
+                length: acc,
+            });
+        }
+
+        // Numerical literals.
+        if next.is_ascii_digit() {
+            // Accumulate the number of bytes consumed in the numeric literal.
+            // All ascii is 1 byte wide so avoid the extra call to `.len_utf8()`.
+            let mut acc = 1;
+            // Track the radix
+            let mut radix = 10;
+
+            // Change the radix if necessary
+            if next == '0' {
+                if let Some((_, prefix)) = self
+                    .iterator
+                    .next_if(|(_, x)| ['x', 'o', 'b', 'X', 'B'].contains(x))
+                {
+                    acc += 1;
+
+                    radix = match prefix {
+                        'x' | 'X' => 16,
+                        'b' | 'B' => 2,
+                        'o' => 8,
+                        _ => unreachable!(),
+                    };
+                }
+            }
+
+            // Consume the rest of the integer literal.
+            while self
+                .iterator
+                .next_if(|&(_, x)| x.is_digit(radix) || x == '_')
+                .is_some()
+            {
+                // All accepted characters should be ascii, so we can just simplify `.len_utf8()` to 1.
+                acc += 1;
+            }
+
+            return Some(Token {
+                variant: TokenTy::IntegerLit,
+                length: acc,
+            });
+        }
+
+        // String and Character literals.
+        if ['\'', '"', '`'].contains(&next) {
+            // Accumulator to track number of bytes consumed.
+            let mut acc: usize = 1;
+            let mut is_terminated = false;
+
+            // Consume characters until the end of the literal
+            while let Some((_, consumed)) = self.iterator.next() {
+                acc += consumed.len_utf8();
+
+                match consumed {
+                    // Ending character is the same as starting character.
+                    // Escapes should all be handled, so don't worry about this being escaped.
+                    _ if consumed == next => {
+                        is_terminated = true;
+                        break;
+                    }
+
+                    // Escaped pattern.
+                    // Only worry about escaped terminators here, since all other escaped
+                    // patterns can be dealt with later.
+                    '\\' => {
+                        // Consume the escaped character regardless of what it is.
+                        // It will always be part of the quoted literal.
+                        if let Some((_, escaped)) = self.iterator.next() {
+                            acc += escaped.len_utf8();
+                        }
+                    }
+
+                    // Do nothing for non-escaped chars since the quoted literal continues
+                    // and we have already recorded the consumed bytes.
+                    _ => {}
+                }
+            }
+
+            // We have finished consuming the literal -- make sure we produce the
+            // right variant
+            return match next {
+                '\'' => Some(Token {
+                    variant: TokenTy::CharLit { is_terminated },
+                    length: acc,
+                }),
+                _ => Some(Token {
+                    variant: TokenTy::StringLit {
+                        is_format: next == '`',
+                        is_terminated,
+                    },
+                    length: acc,
+                }),
+            };
+        }
+
+        // Comments.
+        if next == '#' {
+            // Use accumulator to track number of bytes consumed.
+            let mut acc = 1;
+
+            // There are a few variants as follows.
+            // `#...` - single line comment
+            // `#*...*#` - multiline comment
+            // `##...` - single line inner doc comment
+            // `##!...` - single line outer doc comment
+            // `#**...*#` - multiline inner doc comment
+            // `#*!...*#` - multiline outer doc comment
+            // If a multiline comment is not terminated by the end of the file then just mark it as such in the
+            // produced token. A seperate token error handling layer will raise that outside of this function.
+
+            // Handle multiline comments
+            if self.iterator.next_if(|&(_, x)| x == '*').is_some() {
+                acc += 1;
+
+                // Check if it's a doc comment.
+                let comment_type = match self.iterator.next_if(|&(_, x)| x == '*' || x == '!') {
+                    Some((_, '*')) => {
+                        acc += 1;
+                        CommentTy::InnerDoc
+                    }
+
+                    Some((_, '!')) => {
+                        acc += 1;
+                        CommentTy::OuterDoc
+                    }
+
+                    None => CommentTy::Normal,
+
+                    _ => unreachable!(),
+                };
+
+                // Read the rest of the multi-line comment
+                while let Some((_, consumed)) = self.iterator.next() {
+                    acc += consumed.len_utf8();
+                    if consumed == '*' && matches!(self.iterator.peek(), Some((_, '#'))) {
+                        acc += 1;
+                        return Some(Token {
+                            variant: TokenTy::MultilineComment {
+                                comment_type,
+                                is_terminated: true,
+                            },
+                            length: acc,
+                        });
+                    }
+                }
+
+                // If we hit the end, the comment is not terminated.
+                return Some(Token {
+                    variant: TokenTy::MultilineComment {
+                        comment_type,
+                        is_terminated: false,
+                    },
+                    length: acc,
+                });
+            }
+
+            // Handle single line comment.
+            let mut comment_type = CommentTy::Normal;
+
+            // Check for inner doc comment
+            if self.iterator.next_if(|&(_, x)| x == '#').is_some() {
+                acc += 1;
+                comment_type = CommentTy::InnerDoc;
+
+                // Check for outer doc comment
+                if self.iterator.next_if(|&(_, x)| x == '!').is_some() {
+                    acc += 1;
+                    comment_type = CommentTy::OuterDoc;
+                }
+            }
+
+            // Read to end of line/file for rest of comment. Include line ending in consumed bytes.
+            for (_, consumed) in self.iterator.by_ref() {
+                acc += consumed.len_utf8();
+                if consumed == '\n' {
+                    break;
+                }
+            }
+
+            return Some(Token {
+                variant: TokenTy::SingleLineComment { comment_type },
+                length: acc,
+            });
+        }
+
+        // If we haven't matched by this point, return an unknown token.
+        Some(Token {
+            variant: TokenTy::Unknown,
+            length: next.len_utf8(),
+        })
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        // Get the size hint of the internal iterator.
+        let (inner_lower, upper) = self.iterator.size_hint();
+        // If there are any characters left, then there is at least one token remaining.
+        ((inner_lower > 0) as usize, upper)
+    }
+}
+
+impl<'a> FusedIterator for Lexer<'a> {}
+
+/// A token with an index in a piece of source code.
+#[derive(Copy, Clone, Debug)]
+pub struct IndexedToken {
+    /// The byte index into the source code that this token starts on.
+    pub index: usize,
+    /// The token itself.
+    pub token: Token,
+}
+
+/// An iterator over the tokens in the source code with byte indices attached.
+#[derive(Debug, Clone)]
+pub struct IndexedLexer<'src> {
+    /// The current index in source code -- the number of bytes currently consumed by the iterator.
+    pub index: usize,
+    /// The underlying lexer iterator.
+    lexer: Lexer<'src>,
+}
+
+impl<'src> IndexedLexer<'src> {
+    /// Construct a new indexed lexer.
+    pub fn new(source: &'src str) -> Self {
+        Self {
+            index: 0,
+            lexer: Lexer::new(source),
+        }
+    }
+}
+
+impl<'a> Iterator for IndexedLexer<'a> {
+    type Item = IndexedToken;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        // Pull a token from the iterator.
+        let token = self.lexer.next()?;
+
+        // If available, add the current index to it to return.
+        let indexed_token = IndexedToken {
+            index: self.index,
+            token,
+        };
+
+        // Update the current index with the length of the token.
+        self.index += token.length;
+
+        // Return indexed token
+        Some(indexed_token)
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        self.lexer.size_hint()
+    }
+}
+
+impl<'a> FusedIterator for IndexedLexer<'a> {}
diff --git a/wright/src/parser/old/lexer/definition.rs b/wright/src/parser/old/lexer/definition.rs
new file mode 100644
index 00000000..8dbeb5fa
--- /dev/null
+++ b/wright/src/parser/old/lexer/definition.rs
@@ -0,0 +1,72 @@
+//! The lexer definition in a rust constant that tells us how to handle characters encountered and lists all the 
+//! possible tokens produced. 
+
+use super::tokens::{TokenTy};
+
+/// A single character token matches a single character from the input, and produces a token of the length of the 
+/// character exactly. 
+#[derive(Clone, Copy, Debug)]
+pub struct SingleCharToken {
+    /// The character to match. 
+    pub matching_char: char, 
+    /// The token type produced. 
+    pub produces: TokenTy, 
+}
+
+impl SingleCharToken {
+    /// Turn a single character token into a lexer branch. 
+    const fn into_lexer_branch(self) -> LexerBranch {
+        LexerBranch::SingleCharToken(self)
+    }
+}
+
+/// A set of posible continuations from a single char token that will form multi char tokens 
+/// (i.e. going from `&` to `&&` and `&=`).  
+#[derive(Clone, Copy, Debug)]
+pub struct PossibleContinuations {
+    /// The base single char and the token it produces when not followed by one of the other possible characters. 
+    pub base: SingleCharToken,
+    /// The characters that can follow this and the tokens they would produce. 
+    pub continuations: &'static [(char, TokenTy)]
+}
+
+impl PossibleContinuations {
+    /// Convert to a [LexerBranch].
+    const fn into_lexer_branch(self) -> LexerBranch {
+        LexerBranch::PossibleContinuations(self)
+    }
+}
+
+/// A branch in the lexer, representing options to be tried. 
+#[derive(Debug)]
+pub enum LexerBranch {
+    /// A single character token (such as '[') with no option for continuation. 
+    SingleCharToken(SingleCharToken),
+    PossibleContinuations(PossibleContinuations)
+
+}
+
+// Below is a variety of `const-fn`s to make generating this structure easier. 
+
+/// Makes a [SingleCharToken]. 
+const fn single(matching_char: char, produces: TokenTy) -> SingleCharToken {
+    SingleCharToken { matching_char, produces }
+}
+
+/// Makes a [PossibleContinuations].
+const fn pc(matching_char: char, produces: TokenTy, continuations: &'static [(char, TokenTy)]) -> PossibleContinuations {
+    PossibleContinuations { base: single(matching_char, produces), continuations }
+}
+
+
+/// The lexer's definition, in abstract branching. 
+pub const DEFINITION: &[LexerBranch] = &[
+    single('(', TokenTy::LeftParen).into_lexer_branch(),
+    single(')', TokenTy::RightParen).into_lexer_branch(),
+
+    pc('+', TokenTy::Plus, &[
+        ('=', TokenTy::PlusEq),
+    ]).into_lexer_branch(),
+
+
+];
diff --git a/wright/src/parser/lexer/pretty_print.rs b/wright/src/parser/old/lexer/pretty_print.rs
similarity index 100%
rename from wright/src/parser/lexer/pretty_print.rs
rename to wright/src/parser/old/lexer/pretty_print.rs
diff --git a/wright/src/parser/lexer/tokens.rs b/wright/src/parser/old/lexer/tokens.rs
similarity index 100%
rename from wright/src/parser/lexer/tokens.rs
rename to wright/src/parser/old/lexer/tokens.rs
diff --git a/wright/src/parser/state.rs b/wright/src/parser/old/state.rs
similarity index 100%
rename from wright/src/parser/state.rs
rename to wright/src/parser/old/state.rs
diff --git a/wright/src/parser/util.rs b/wright/src/parser/old/util.rs
similarity index 100%
rename from wright/src/parser/util.rs
rename to wright/src/parser/old/util.rs
diff --git a/wright/src/parser/util/discard_error.rs b/wright/src/parser/old/util/discard_error.rs
similarity index 100%
rename from wright/src/parser/util/discard_error.rs
rename to wright/src/parser/old/util/discard_error.rs
diff --git a/wright/src/parser/util/erase.rs b/wright/src/parser/old/util/erase.rs
similarity index 100%
rename from wright/src/parser/util/erase.rs
rename to wright/src/parser/old/util/erase.rs
diff --git a/wright/src/parser/util/first_successful.rs b/wright/src/parser/old/util/first_successful.rs
similarity index 100%
rename from wright/src/parser/util/first_successful.rs
rename to wright/src/parser/old/util/first_successful.rs
diff --git a/wright/src/parser/util/ignore.rs b/wright/src/parser/old/util/ignore.rs
similarity index 100%
rename from wright/src/parser/util/ignore.rs
rename to wright/src/parser/old/util/ignore.rs
diff --git a/wright/src/parser/util/map.rs b/wright/src/parser/old/util/map.rs
similarity index 100%
rename from wright/src/parser/util/map.rs
rename to wright/src/parser/old/util/map.rs
diff --git a/wright/src/repl.rs b/wright/src/repl.rs
index 66582fb5..74560937 100644
--- a/wright/src/repl.rs
+++ b/wright/src/repl.rs
@@ -2,7 +2,7 @@
 
 use crate::{
     filemap::{FileMap, FileName},
-    parser::lexer::Lexer,
+    // parser::lexer::Lexer,
     WRIGHT_VERSION,
 };
 use derive_more::Display;
@@ -120,34 +120,6 @@ pub fn start() -> anyhow::Result<()> {
         write!(&mut output, "[{}]: << ", input_number)?;
         output.flush()?;
 
-        // Add line to the code map.
-        let file_handle = code_map.add(
-            FileName::Repl {
-                line_number: input_number,
-            },
-            line,
-        );
-        // Get a ref to the line we just added to the code map.
-        let line_ref: &str = code_map.get(file_handle).unwrap().source().as_str();
-
-        match repl_mode {
-            ReplMode::Ast => {
-                unimplemented!("AST mode is unimplemented.");
-            }
-
-            ReplMode::Tokens => {
-                // Make a new lexer and iterate through the tokens generated.
-                let lexer = Lexer::new(line_ref);
-
-                for token in lexer {
-                    write!(&mut output, "[{}]", token)?;
-                }
-
-                // Write newline.
-                writeln!(&mut output)?;
-            }
-
-            ReplMode::Eval => unimplemented!("Eval mode is unimplemented."),
-        }
+        unimplemented!("REPL needs to be re-worked a bit."); 
     }
 }
diff --git a/wright/tests/lexer.rs b/wright/tests/lexer.rs
index 4dd0acde..42b972a9 100644
--- a/wright/tests/lexer.rs
+++ b/wright/tests/lexer.rs
@@ -1,33 +1,33 @@
-use wright::parser::lexer::{
-    tokens::{Token, TokenTy},
-    Lexer,
-};
+// use wright::parser::lexer::{
+//     tokens::{Token, TokenTy},
+//     Lexer,
+// };
 
-/// Test unterminated string literal.
-#[test]
-fn unterminated_string_literal() {
-    let tokens: Vec<Token> = Lexer::new(r#""this string is not closed"#).collect();
-    assert_eq!(tokens.len(), 1);
-    assert_eq!(
-        tokens[0].variant,
-        TokenTy::StringLit {
-            is_format: false,
-            is_terminated: false
-        }
-    );
-}
+// /// Test unterminated string literal.
+// #[test]
+// fn unterminated_string_literal() {
+//     let tokens: Vec<Token> = Lexer::new(r#""this string is not closed"#).collect();
+//     assert_eq!(tokens.len(), 1);
+//     assert_eq!(
+//         tokens[0].variant,
+//         TokenTy::StringLit {
+//             is_format: false,
+//             is_terminated: false
+//         }
+//     );
+// }
 
-/// Test string literal with escaped terminal.
-#[test]
-fn string_with_escape() {
-    let tokens: Vec<Token> =
-        Lexer::new(r#" "this string has an escaped terminator \" " "#).collect();
-    assert_eq!(tokens.len(), 3);
-    assert_eq!(
-        tokens[1].variant,
-        TokenTy::StringLit {
-            is_format: false,
-            is_terminated: true
-        }
-    );
-}
+// /// Test string literal with escaped terminal.
+// #[test]
+// fn string_with_escape() {
+//     let tokens: Vec<Token> =
+//         Lexer::new(r#" "this string has an escaped terminator \" " "#).collect();
+//     assert_eq!(tokens.len(), 3);
+//     assert_eq!(
+//         tokens[1].variant,
+//         TokenTy::StringLit {
+//             is_format: false,
+//             is_terminated: true
+//         }
+//     );
+// }

From e9348626e7e3f71028aecc3094b2ebdcc53fe61d Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 20 Jan 2024 03:19:41 -0500
Subject: [PATCH 02/60] Better error handling on unlocks.

---
 wright/src/filemap.rs | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/wright/src/filemap.rs b/wright/src/filemap.rs
index 20d2a0ab..45d7d21b 100644
--- a/wright/src/filemap.rs
+++ b/wright/src/filemap.rs
@@ -124,7 +124,7 @@ impl<'src> FileMap<'src> {
         // Thread to warn user if it takes too long. 
         thread::spawn(move || {
             thread::sleep(FILE_LOCK_WARNING_TIME);
-            timout_tx.send(ChannelMessage::FiveSecondWarning);
+            timout_tx.send(ChannelMessage::FiveSecondWarning)
         });
 
         // Use an infinite loop to make sure we recieve all the messages from the senders. 
@@ -159,7 +159,7 @@ impl<'src> FileMap<'src> {
                         Mmap::map(&file)
                             // Make sure we unlock the file if there's an issue memory mapping it. 
                             .map_err(|err| {
-                                file.unlock();
+                                file.unlock().map_err(|err| eprintln!("Error unlocking file: {:?}", err)).ok();
                                 err
                             })
                     }?;
@@ -169,7 +169,7 @@ impl<'src> FileMap<'src> {
                     let as_str: Result<&str, std::str::Utf8Error> = std::str::from_utf8(raw_data);
                     if as_str.is_err() {
                         // The file is not valid for us so we should unlock it and return an error. 
-                        file.unlock();
+                        file.unlock().map_err(|err| eprintln!("Error unlocking file: {:?}", err)).ok();
                         return Err(io::Error::new(io::ErrorKind::InvalidData, as_str.unwrap_err()));
                     }
 
@@ -193,7 +193,7 @@ impl<'src> Drop for FileMap<'src> {
                 // Locked and memory-mapped files need to be unlocked before dropping. 
                 ImmutableString::LockedFile { locked_file, .. } => {
                     // Unlock the file to give back to the OS. 
-                    locked_file.unlock();
+                    locked_file.unlock().map_err(|err| eprintln!("Error unlocking file: {:?}", err)).ok();
                 },
 
                 // All other types of file can drop normally. 

From 1e82b8582f6e694978de19efc8d696677da8d3ce Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 20 Jan 2024 03:20:01 -0500
Subject: [PATCH 03/60] Typo

---
 wright/src/filemap.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wright/src/filemap.rs b/wright/src/filemap.rs
index 45d7d21b..5dbce910 100644
--- a/wright/src/filemap.rs
+++ b/wright/src/filemap.rs
@@ -196,7 +196,7 @@ impl<'src> Drop for FileMap<'src> {
                     locked_file.unlock().map_err(|err| eprintln!("Error unlocking file: {:?}", err)).ok();
                 },
 
-                // All other types of file can drop normally. 
+                // All other types of files can drop normally. 
                 _ => {}
             }
         }

From a974b69dbcd38225b2af0770cf6f0426f1078ec1 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 20 Jan 2024 20:59:11 -0500
Subject: [PATCH 04/60] Remove excess file ID calculation

---
 wright/src/filemap.rs | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/wright/src/filemap.rs b/wright/src/filemap.rs
index 5dbce910..b362089b 100644
--- a/wright/src/filemap.rs
+++ b/wright/src/filemap.rs
@@ -174,9 +174,7 @@ impl<'src> FileMap<'src> {
                     }
 
                     // The file's contents are valid utf-8, add them to the file map. 
-                    let file_id: usize = self.inner.len();
-                    self.add(FileName::Real(path), ImmutableString::LockedFile { locked_file: file, mem_map });
-                    return Ok(file_id);
+                    return Ok(self.add(FileName::Real(path), ImmutableString::LockedFile { locked_file: file, mem_map }));
                 }
 
                 Err(_) => unreachable!("The reciever should never reach a state where both senders are closed."),

From 02e284213b362be9a0c33ee14b8456371acbb2d8 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sun, 21 Jan 2024 04:42:52 -0500
Subject: [PATCH 05/60] Additions to fragments and filemaps

---
 wright/src/filemap.rs         | 16 ++++++++++++++++
 wright/src/parser/fragment.rs | 34 +++++++++++++++++++++++-----------
 2 files changed, 39 insertions(+), 11 deletions(-)

diff --git a/wright/src/filemap.rs b/wright/src/filemap.rs
index b362089b..9bad2d2e 100644
--- a/wright/src/filemap.rs
+++ b/wright/src/filemap.rs
@@ -6,6 +6,7 @@ use fs4::FileExt;
 use memmap2::Mmap;
 use termcolor::{ColorChoice, StandardStream};
 use std::{path::PathBuf, io, fs::File, sync::mpsc, thread, time::Duration};
+use crate::parser::fragment::Fragment;
 
 /// Rename import for clarity. 
 use codespan_reporting::files::Error as CodespanError;
@@ -181,6 +182,21 @@ impl<'src> FileMap<'src> {
             }    
         }
     }
+
+    /// Find the file ID of a given [Fragment] using the fragment's internal pointer. 
+    pub fn find_fragment(&self, fragment: &Fragment<'src>) -> Option<<Self as Files<'src>>::FileId> {
+        // Iterate on file IDs. 
+        for file_id in 0..self.inner.len() {
+            // Use expect because all of these file IDs should be fine. 
+            let source: &str = self.source(file_id).expect("All file IDs here are valid");
+            if (Fragment { inner: source }).contains(fragment) {
+                return Some(file_id);
+            }
+        }
+
+        // If there was no file containing the given fragment, return none. 
+        None
+    }
 }
 
 impl<'src> Drop for FileMap<'src> {
diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
index 49755154..ae5d73da 100644
--- a/wright/src/parser/fragment.rs
+++ b/wright/src/parser/fragment.rs
@@ -15,23 +15,35 @@ impl<'src> Fragment<'src> {
         self.inner.len()
     }
 
-    /// Return true if this fragment overlaps at all with the other (either one contains the start of the other, 
-    /// by pointer).
-    pub fn overlaps(&self, other: &Self) -> bool {
-        // Get the pointer to the start of the string. 
-        let (start, len) = (self.inner.as_ptr(), self.len());
+    /// Get a pair of pointers, the first one being at the beginning of the fragment, the second one pointing 
+    /// to the byte after the end of the fragment.
+    const fn start_and_end(&self) -> (*const u8, *const u8) {
+        // Get the pointer to the start of the fragment. 
+        let start: *const u8 = self.inner.as_ptr();
         // Get a pointer just past the end of the string. 
         // SAFETY: the resulting pointer is guarunteed to point at one byte past the end of the string. 
-        let end = unsafe { start.add(len) };
-
-        // Do the same thing for the other fragment. 
-        let (other_start, len) = (other.inner.as_ptr(), other.len());
-        let other_end = unsafe { other_start.add(len) };
+        (start, unsafe { start.add(self.len()) })
+    }
 
-        // Check bounds. 
+    /// Return true if this fragment overlaps at all with the other (either one contains the start of the other, 
+    /// by pointer).
+    pub fn overlaps(&self, other: &Self) -> bool {
+        // Get start and end pointers for both fragments.
+        let (start, end) = self.start_and_end();
+        let (other_start, other_end) = other.start_and_end();
+        // Check if this fragment contains either end of the other fragment. 
         (start <= other_start && other_start < end) || (other_start <= start && start < other_end)
     }
 
+    /// Return true if this fragment entirely contains another fragment using pointers. 
+    pub fn contains(&self, other: &Self) -> bool {
+        // Get start and end pointers for both fragments.
+        let (start, end) = self.start_and_end();
+        let (other_start, other_end) = other.start_and_end();
+        // Check bounds.
+        start <= other_start && end >= other_end
+    }
+
     /// Split this fragment into two sub fragments, with the first one being `bytes` long and the second containing the
     /// rest of this fragment. 
     pub fn split(&self, bytes: usize) -> (Self, Self) {

From 277d5bd01429245e40189340ca380fca0605f65b Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sun, 21 Jan 2024 13:01:16 -0500
Subject: [PATCH 06/60] Fix inaccurate Drop implementation

---
 wright/src/filemap.rs | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/wright/src/filemap.rs b/wright/src/filemap.rs
index 9bad2d2e..1ca33687 100644
--- a/wright/src/filemap.rs
+++ b/wright/src/filemap.rs
@@ -199,20 +199,21 @@ impl<'src> FileMap<'src> {
     }
 }
 
-impl<'src> Drop for FileMap<'src> {
+/// Implement drop here to make sure that the files get unlocked as they go out of scope/use.
+impl<'src> Drop for ImmutableString<'src> {
     fn drop(&mut self) {
-        // Unlock all files from the file system. 
-        for file in self.inner.iter() {
-            match file.source() {
-                // Locked and memory-mapped files need to be unlocked before dropping. 
-                ImmutableString::LockedFile { locked_file, .. } => {
-                    // Unlock the file to give back to the OS. 
-                    locked_file.unlock().map_err(|err| eprintln!("Error unlocking file: {:?}", err)).ok();
-                },
-
-                // All other types of files can drop normally. 
-                _ => {}
+        match self {
+            // Unlock locked files.
+            ImmutableString::LockedFile { locked_file, .. } => {
+                locked_file.unlock()
+                    // Log the error if there is one, 
+                    .map_err(|io_err: io::Error| eprintln!("{}", io_err))
+                    // Discard value of result
+                    .ok();
             }
+
+            // All other types drop trivially.  
+            ImmutableString::Owned(_) | ImmutableString::Reference(_) => {}
         }
     }
 }

From ab217f0ad8c9abc8ec2cbf366451be9ff52bb49b Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sun, 21 Jan 2024 13:19:25 -0500
Subject: [PATCH 07/60] Doc tweaks

---
 wright/src/filemap.rs | 2 +-
 wright/src/solver.rs  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/wright/src/filemap.rs b/wright/src/filemap.rs
index 1ca33687..70d05348 100644
--- a/wright/src/filemap.rs
+++ b/wright/src/filemap.rs
@@ -57,7 +57,7 @@ pub struct FileMap<'src> {
     /// This is just a list of files we're keeping track of. 
     /// This is identical to the current implementation of [codespan_reporting::files::SimpleFiles],
     /// but we don't use theirs because we need to iterate over the [SimpleFile]s manually for various 
-    /// parts of the implementation (including the [Drop] implementation). 
+    /// parts of the implementation.
     inner: Vec<SimpleFile<FileName, ImmutableString<'src>>>
 }
 
diff --git a/wright/src/solver.rs b/wright/src/solver.rs
index 06bf6a66..6679fbf5 100644
--- a/wright/src/solver.rs
+++ b/wright/src/solver.rs
@@ -1 +1 @@
-//! The logical induction engine for wright -- this manages provingf out types and constraints at compile time.
+//! The logical induction engine for wright -- this manages proving out types and constraints at compile time.

From 8490ce24bc3d08b08e8b5b8fcd8b8c6b5768755b Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Mon, 22 Jan 2024 01:04:07 -0500
Subject: [PATCH 08/60] Docs clarification

---
 wright/src/filemap.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wright/src/filemap.rs b/wright/src/filemap.rs
index 70d05348..0ce9afa0 100644
--- a/wright/src/filemap.rs
+++ b/wright/src/filemap.rs
@@ -56,7 +56,7 @@ enum ImmutableString<'src> {
 pub struct FileMap<'src> {
     /// This is just a list of files we're keeping track of. 
     /// This is identical to the current implementation of [codespan_reporting::files::SimpleFiles],
-    /// but we don't use theirs because we need to iterate over the [SimpleFile]s manually for various 
+    /// but we don't use theirs because we need to iterate over each [SimpleFile] manually for various 
     /// parts of the implementation.
     inner: Vec<SimpleFile<FileName, ImmutableString<'src>>>
 }

From 7fb7217e30930fef71e43eaa0afce598a727bec3 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 27 Jan 2024 03:16:51 -0500
Subject: [PATCH 09/60] Start tokens and lexer

---
 wright/src/parser/lexer.rs | 149 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 148 insertions(+), 1 deletion(-)

diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 41ff4f60..aeaccf78 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -5,10 +5,157 @@
 
 use super::fragment::Fragment;
 
-/// The 
+/// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language. 
 #[derive(Debug)]
 pub struct Lexer<'src> {
     /// The remaining source code that has not been processed and returned as a token from the iterator yet. 
     pub remaining: Fragment<'src>,
 }
 
+/// A token in wright source code. 
+#[derive(Debug)]
+pub struct Token<'src> {
+    /// What type of token this is. 
+    pub variant: TokenTy,
+    /// The matching fragment of source code -- this contains the location and length data for the token. 
+    pub fragment: Fragment<'src>
+}
+
+/// The different types of tokens in wright source.
+#[rustfmt::skip] // Turn off auto reformat. 
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum TokenTy {
+    LeftCurly, RightCurly,
+    LeftBracket, RightBracket,
+    LeftParen, RightParen,
+
+    Plus, PlusEq,
+    Minus, MinusEq,
+    Star, StarEq,
+    Div, DivEq,
+    Xor, XorEq,
+    Mod, ModEq,
+    Bang, BangEq,
+    Eq, EqEq,
+
+    Lt, LtEq, LtLt,
+    Gt, GtEq, GtGt,
+    And, AndEq, AndAnd,
+    Or, OrEq, OrOr,
+    
+    Colon, ColonColon,
+
+    At,
+    Tilde,
+    Underscore,
+    Semi,
+    Dot,
+    Comma,
+    Hash,
+
+    Identifier,
+
+    OuterDocComment, OuterBlockDocComment,
+    InnerDocComment, InnerBlockDocComment,
+
+    KwRecord,
+    KwType,
+    KwEnum,
+    KwUnion,
+    KwFunc,
+    KwRepr,
+    KwImpl,
+    KwConstraint,
+    KwTrait,
+    KwUse,
+    KwAs,
+    KwConst,
+    KwMod,
+    KwIf,
+    KwElse,
+    KwFor,
+    KwIn,
+    KwWhile,
+    KwTrue,
+    KwFalse,
+    KwLoop,
+    KwWhere,
+
+    /// Unknown character in lexer fragment. 
+    Unknown
+}
+
+impl<'src> Lexer<'src> {
+    /// Get the number of bytes remaining that we need to transform into tokens. 
+    pub const fn bytes_remaining(&self) -> usize {
+        self.remaining.len()
+    }
+
+    /// Construct a new lexer over a given reference to a source string. 
+    pub const fn new(source: &'src str) -> Self {
+        Lexer { remaining: Fragment { inner: source } }
+    }
+
+    /// Try to match a single character to a single character token if possible. 
+    #[rustfmt::skip]
+    const fn single_char_tokens(c: char) -> Option<TokenTy> {
+        use TokenTy::*;
+
+        match c {
+            '{' => Some(LeftCurly),
+            '}' => Some(RightCurly),
+            '[' => Some(LeftBracket),
+            ']' => Some(RightBracket),
+            '(' => Some(LeftParen),
+            ')' => Some(RightParen),
+            
+            '@' => Some(At),
+            '~' => Some(Tilde),
+            '_' => Some(Underscore),
+            '.' => Some(Dot),
+            ',' => Some(Comma),
+            ';' => Some(Semi),
+            '#' => Some(Hash),
+
+            _ => None,
+        }
+    }
+
+    /// Try to match a fragment recognized to be an identifier or keyword to
+    /// a keyword or return [TokenTy::Identifier]. 
+    fn identifier_or_keyword(fragment: Fragment<'src>) -> TokenTy {
+        use TokenTy::*;
+
+        match fragment.inner {
+            "record" => KwRecord,
+            "type" => KwType,
+            "enum" => KwEnum,
+            "union" => KwUnion,
+            "func" => KwFunc,
+            "repr" => KwRepr,
+            "impl" => KwImpl,
+            "constraint" => KwConstraint,
+            "trait" => KwTrait,
+            "const" => KwConst,
+            "where" => KwWhere,
+            
+            "use" => KwUse,
+            "as" => KwAs,
+            "mod" => KwMod,
+            
+            "if" => KwIf,
+            "else" => KwElse,
+
+            "for" => KwFor,
+            "in" => KwIn,
+            "while" => KwWhile,
+            "loop" => KwLoop,
+
+            "true" => KwTrue,
+            "false" => KwFalse,
+
+            _ => Identifier
+        }
+    }
+
+}

From 66a95d6a3c77af95aceadd224ea3730c6c391c54 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Wed, 31 Jan 2024 01:48:33 -0500
Subject: [PATCH 10/60] Get llvm install working and make some progress on
 re-creating the lexer.

---
 wright/Cargo.toml              |  85 ++++++++++++++++------
 wright/src/parser/fragment.rs  |  12 +++-
 wright/src/parser/lexer.rs     | 124 +++++++++++++++++++++++++--------
 wright/src/parser/old/lexer.rs |  13 +---
 4 files changed, 170 insertions(+), 64 deletions(-)

diff --git a/wright/Cargo.toml b/wright/Cargo.toml
index 813d6ae3..a69f80fb 100644
--- a/wright/Cargo.toml
+++ b/wright/Cargo.toml
@@ -1,8 +1,9 @@
+# PACKAGE METADATA
 [package]
 name = "wright"
-description = "The wright programming language interpreter and tooling."
+description = "The wright programming language compiler and tooling."
 license = "MIT"
-version = "0.10.0"
+version = "0.9.0"
 authors = ["Venus Xeon-Blonde <venusflameblonde@gmail.com>"]
 repository = "https://github.com/Alfriadox/wright-lang"
 documentation = "https://docs.rs/wright"
@@ -11,37 +12,77 @@ keywords = ["wright", "language", "bytecode", "compiler", "interpreter"]
 edition.workspace = true
 rust-version.workspace = true
 
+# LIBRARY METADATA
 [lib]
 name = "wright"
 test = true
 doctest = true
 doc = true
 
-[dependencies]
-clap = { version = "4", features = ["derive"] }
-anyhow = "1"
-derive_more = "0.99.17"
-unicode-ident = "1.0"
-codespan-reporting = "0.11.1"
-termcolor = "1.2.0"
+# BINARIES
+[[bin]]
+name = "wright"
+test = false
+doc = false
+doctest = false
+
+# CRATES.IO BADGES
+[badges.maintenance]
+status = "actively-developed"
+
+# DEPENDENCIES: 
+
+# Comand-line interface generator
+[dependencies.clap]
+version = "4"
+features = ["derive"]
+
+# Error handling glue code
+[dependencies.anyhow]
+version = "1"
+
+# Derives for various traits
+[dependencies.derive_more]
+version = "0.99.17"
 
-# Integers larger than 128 bits
-num = "0.4"
+# Unicode identifier functions
+[dependencies.unicode-ident]
+version = "1.0"
+
+# Source code location tracking and cli error rendering 
+[dependencies.codespan-reporting]
+version = "0.11.1"
+
+# Terminal output colors
+[dependencies.termcolor]
+version = "1.2.0"
+
+# Big Integers
+[dependencies.num] 
+version = "0.4"
 
 # Portable (windows, mac, linux) file locking
-fs4 = { version = "0.7.0", features = ["sync"] }
+[dependencies.fs4] 
+version = "0.7.0"
+features = ["sync"]
 
 # Memory mapped files. 
-memmap2 = "0.9.3"
+[dependencies.memmap2]
+version = "0.9.3"
 
-[[bin]]
-name = "wright"
-test = false
-doc = false
-doctest = false
+# Unsafe bindings to LLVM 
+# See https://llvm.org/.
+[dependencies.llvm-sys]
+version = "170"
+features = ["strict-versioning", "force-static"]
+
+# Safe bindings to llvm
+[dependencies.inkwell]
+version = "0.3"
+features = ["llvm17-0"]
 
-[badges]
-maintenance = { status = "actively-developed" }
+# TEST DEPENDENCIES
 
-[dev-dependencies]
-rayon = "1.8.0"
+# Rayon to speed up brute-force testing in some cases.
+[dev-dependencies.rayon]
+version = "1.8.0"
diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
index ae5d73da..8ef6c4ab 100644
--- a/wright/src/parser/fragment.rs
+++ b/wright/src/parser/fragment.rs
@@ -15,6 +15,11 @@ impl<'src> Fragment<'src> {
         self.inner.len()
     }
 
+    /// Check if the length of this fragment is zero. 
+    pub const fn is_empty(&self) -> bool {
+        self.inner.is_empty()
+    }
+
     /// Get a pair of pointers, the first one being at the beginning of the fragment, the second one pointing 
     /// to the byte after the end of the fragment.
     const fn start_and_end(&self) -> (*const u8, *const u8) {
@@ -46,8 +51,13 @@ impl<'src> Fragment<'src> {
 
     /// Split this fragment into two sub fragments, with the first one being `bytes` long and the second containing the
     /// rest of this fragment. 
+    /// 
+    /// Panics if the byte index is not in the fragment, or if it's on a char boundary. 
     pub fn split(&self, bytes: usize) -> (Self, Self) {
-        (Self { inner: &self.inner[..bytes] }, Self { inner: &self.inner[bytes..]})
+        // Use str's split_at. 
+        let (left, right) = self.inner.split_at(bytes);
+
+        (Self { inner: left }, Self { inner: right })
     }
 
     /// Get an iterator over the characters in this fragment. 
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index aeaccf78..a3f41870 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -5,6 +5,49 @@
 
 use super::fragment::Fragment;
 
+/// Constant table of single character tokens and the characters that match them. 
+pub const SINGLE_CHAR_TOKENS: &[(char, TokenTy)] = &[
+    ('(', TokenTy::LeftParen),
+    (')', TokenTy::RightParen),
+    ('[', TokenTy::LeftBracket),
+    (']', TokenTy::RightBracket),
+    ('{', TokenTy::LeftCurly),
+    ('}', TokenTy::RightCurly),
+    ('@', TokenTy::At),
+    (';', TokenTy::Semi),
+    ('?', TokenTy::Question),
+    (',', TokenTy::Comma),
+    ('#', TokenTy::Hash),
+    ('$', TokenTy::Dollar),
+];
+
+/// Tokens that can be either a single character or upgraded with an
+/// equals sign. 
+pub const POSSIBLE_EQ_UPGRADE_TOKENS: &[(char, TokenTy, TokenTy)] = &[
+    ('!', TokenTy::Bang, TokenTy::BangEq),
+    ('%', TokenTy::Mod, TokenTy::ModEq),
+    ('^', TokenTy::Xor, TokenTy::XorEq),
+    ('*', TokenTy::Star, TokenTy::StarEq),
+    ('+', TokenTy::Plus, TokenTy::PlusEq),
+    ('/', TokenTy::Div, TokenTy::DivEq),
+];
+
+/// Characters that can produce different tokens when followed by an equals sign or themselves. 
+pub const POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS: &[(char, TokenTy, TokenTy, TokenTy)] = &[
+    ('&', TokenTy::And, TokenTy::AndEq, TokenTy::AndAnd),
+    ('|', TokenTy::Or, TokenTy::OrEq, TokenTy::OrOr),
+    ('<', TokenTy::Lt, TokenTy::LtEq, TokenTy::LtLt),
+    ('>', TokenTy::Gt, TokenTy::GtEq, TokenTy::GtGt),
+    (':', TokenTy::Colon, TokenTy::ColonEq, TokenTy::ColonColon),
+];
+
+/// Characters that can produce different tokens when followed by an equals sign or 
+/// a `>` for arrows.
+pub const POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS: &[(char, TokenTy, TokenTy, TokenTy)] = &[
+    ('-', TokenTy::Minus, TokenTy::MinusEq, TokenTy::SingleArrow),
+    ('=', TokenTy::Eq, TokenTy::EqEq, TokenTy::DoubleArrow),
+];
+
 /// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language. 
 #[derive(Debug)]
 pub struct Lexer<'src> {
@@ -30,20 +73,20 @@ pub enum TokenTy {
     LeftParen, RightParen,
 
     Plus, PlusEq,
-    Minus, MinusEq,
     Star, StarEq,
     Div, DivEq,
     Xor, XorEq,
     Mod, ModEq,
     Bang, BangEq,
-    Eq, EqEq,
+
+    Minus, MinusEq, SingleArrow,
+    Eq, EqEq, DoubleArrow,
 
     Lt, LtEq, LtLt,
     Gt, GtEq, GtGt,
     And, AndEq, AndAnd,
     Or, OrEq, OrOr,
-    
-    Colon, ColonColon,
+    Colon, ColonEq, ColonColon,
 
     At,
     Tilde,
@@ -52,6 +95,8 @@ pub enum TokenTy {
     Dot,
     Comma,
     Hash,
+    Question,
+    Dollar,
 
     Identifier,
 
@@ -96,31 +141,6 @@ impl<'src> Lexer<'src> {
         Lexer { remaining: Fragment { inner: source } }
     }
 
-    /// Try to match a single character to a single character token if possible. 
-    #[rustfmt::skip]
-    const fn single_char_tokens(c: char) -> Option<TokenTy> {
-        use TokenTy::*;
-
-        match c {
-            '{' => Some(LeftCurly),
-            '}' => Some(RightCurly),
-            '[' => Some(LeftBracket),
-            ']' => Some(RightBracket),
-            '(' => Some(LeftParen),
-            ')' => Some(RightParen),
-            
-            '@' => Some(At),
-            '~' => Some(Tilde),
-            '_' => Some(Underscore),
-            '.' => Some(Dot),
-            ',' => Some(Comma),
-            ';' => Some(Semi),
-            '#' => Some(Hash),
-
-            _ => None,
-        }
-    }
-
     /// Try to match a fragment recognized to be an identifier or keyword to
     /// a keyword or return [TokenTy::Identifier]. 
     fn identifier_or_keyword(fragment: Fragment<'src>) -> TokenTy {
@@ -158,4 +178,50 @@ impl<'src> Lexer<'src> {
         }
     }
 
+    /// Make a token by splitting a given number of bytes off of the `self.remaining` fragment
+    /// and labeling them with the given kind. 
+    fn split_token(&mut self, bytes: usize, kind: TokenTy) -> Token<'src> {
+        let (token_fragment, new_remaining_fragment) = self.remaining.split(bytes);
+        self.remaining = new_remaining_fragment;
+        Token { variant: kind, fragment: token_fragment }
+    }
+
+    /// Get the next token from the lexer.
+    pub fn next_token(&mut self) -> Option<Token<'src>> {
+        // If the remaining input is empty, there is no token. 
+        if self.remaining.is_empty() {
+            return None;
+        }
+
+        // Otherwise create a char iterator on the fragment. 
+        // This one will be mainly used to check for shorter tokens -- a new one may be created later
+        // to check for identifiers and strings. 
+        let mut char_indices = self.remaining.inner.chars();
+
+        // Get the next character from the iterator. 
+        let next_char = char_indices.next().unwrap();
+
+        // Match a single character if possible. 
+        for (c, kind) in SINGLE_CHAR_TOKENS {
+            if next_char == *c {
+                return Some(self.split_token(next_char.len_utf8(), *kind));
+            }
+        }
+
+        // Get the character after the next char if there is one. 
+        let following_char_option = char_indices.next();
+
+        // Try to match a token that can be augmented with a possible additional equal sign. 
+        for (c, without_eq, with_eq) in POSSIBLE_EQ_UPGRADE_TOKENS {
+            if next_char == *c {
+                match following_char_option {
+                    Some('=') => return Some(self.split_token(next_char.len_utf8() + 1, *with_eq)),
+                    _ => return Some(self.split_token(next_char.len_utf8(), *without_eq)),
+                }   
+            }
+        }
+
+        unimplemented!()
+    }
+
 }
diff --git a/wright/src/parser/old/lexer.rs b/wright/src/parser/old/lexer.rs
index baf58553..051313dc 100644
--- a/wright/src/parser/old/lexer.rs
+++ b/wright/src/parser/old/lexer.rs
@@ -42,18 +42,7 @@ impl<'a> Iterator for Lexer<'a> {
 
         // Handle single character tokens first.
         let single_char_tokens = [
-            ('(', TokenTy::LeftParen),
-            (')', TokenTy::RightParen),
-            ('[', TokenTy::LeftSquare),
-            (']', TokenTy::RightSquare),
-            ('{', TokenTy::LeftBracket),
-            ('}', TokenTy::RightBracket),
-            ('@', TokenTy::At),
-            (';', TokenTy::Semi),
-            ('?', TokenTy::Question),
-            (',', TokenTy::Comma),
-            ('#', TokenTy::Pound),
-            ('$', TokenTy::Dollar),
+            
         ];
 
         for (c, variant) in single_char_tokens {

From b673f8e6a7a68c85230f871f034c439fb9cb1b25 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Wed, 31 Jan 2024 01:57:35 -0500
Subject: [PATCH 11/60] Add llvm packages to github actions

---
 .github/workflows/cargo-check.yml  | 8 +++-----
 .github/workflows/cargo-clippy.yml | 8 +++-----
 .github/workflows/cargo-test.yml   | 8 +++-----
 .github/workflows/grcov.yml        | 2 ++
 .github/workflows/pages.yml        | 2 ++
 5 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/cargo-check.yml b/.github/workflows/cargo-check.yml
index 6436c6aa..19168728 100644
--- a/.github/workflows/cargo-check.yml
+++ b/.github/workflows/cargo-check.yml
@@ -1,10 +1,6 @@
 name: Cargo Check 
 
-on:
-  push:
-    branches: [ master ]
-  pull_request:
-    branches: [ master ]
+on: ["push", "pull_request"]
 
 jobs:
   check:
@@ -13,6 +9,8 @@ jobs:
       working-directory: ./wright
     steps:
       - uses: actions/checkout@v4
+      - name: Install LLVM package
+        run: sudo apt-get install llvm-17-dev
       - name: Check rust code
         run: cargo check 
         working-directory: ${{env.working-directory}}
diff --git a/.github/workflows/cargo-clippy.yml b/.github/workflows/cargo-clippy.yml
index 6eb5ce97..a2b2b7a5 100644
--- a/.github/workflows/cargo-clippy.yml
+++ b/.github/workflows/cargo-clippy.yml
@@ -1,10 +1,6 @@
 name: Clippy 
 
-on:
-  push:
-    branches: [ master ]
-  pull_request:
-    branches: [ master ]
+on: ["push", "pull_request"]
 
 jobs:
   clippy:
@@ -13,6 +9,8 @@ jobs:
       working-directory: ./wright
     steps:
       - uses: actions/checkout@v4
+      - name: Install LLVM package
+        run: sudo apt-get install llvm-17-dev
       - name: Run Clippy
         run: cargo clippy -- --deny clippy::all --deny warnings
         working-directory: ${{env.working-directory}}
diff --git a/.github/workflows/cargo-test.yml b/.github/workflows/cargo-test.yml
index ba6d178a..2e15ce07 100644
--- a/.github/workflows/cargo-test.yml
+++ b/.github/workflows/cargo-test.yml
@@ -1,10 +1,6 @@
 name: Cargo Test 
 
-on:
-  push:
-    branches: [ master ]
-  pull_request:
-    branches: [ master ]
+on: ["push", "pull_request"]
 
 jobs:
   test:
@@ -13,6 +9,8 @@ jobs:
       working-directory: ./wright
     steps:
       - uses: actions/checkout@v4
+      - name: Install LLVM package
+        run: sudo apt-get install llvm-17-dev
       - name: Run tests
         run: cargo test
         working-directory: ${{env.working-directory}}
diff --git a/.github/workflows/grcov.yml b/.github/workflows/grcov.yml
index a120d13b..28f0a123 100644
--- a/.github/workflows/grcov.yml
+++ b/.github/workflows/grcov.yml
@@ -7,6 +7,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
+      - name: Install LLVM package
+        run: sudo apt-get install llvm-17-dev
       - uses: actions-rs/toolchain@v1
         with:
           toolchain: nightly
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index 497ffbd9..bb7db730 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -26,6 +26,8 @@ jobs:
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v3
+      - name: Install LLVM package
+        run: sudo apt-get install llvm-17-dev
       - name: Install mdBook
         run: |
           curl --proto '=https' --tlsv1.2 https://sh.rustup.rs -sSf -y | sh

From 64cab9afcaa1f092642289a545bbc19bc59fa557 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Wed, 31 Jan 2024 02:00:42 -0500
Subject: [PATCH 12/60] Fix llvm install

---
 .github/workflows/cargo-check.yml  | 7 +++++--
 .github/workflows/cargo-clippy.yml | 7 +++++--
 .github/workflows/cargo-test.yml   | 7 +++++--
 .github/workflows/grcov.yml        | 7 +++++--
 .github/workflows/pages.yml        | 9 ++++++---
 5 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/.github/workflows/cargo-check.yml b/.github/workflows/cargo-check.yml
index 19168728..8051e204 100644
--- a/.github/workflows/cargo-check.yml
+++ b/.github/workflows/cargo-check.yml
@@ -4,13 +4,16 @@ on: ["push", "pull_request"]
 
 jobs:
   check:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     env:
       working-directory: ./wright
     steps:
       - uses: actions/checkout@v4
       - name: Install LLVM package
-        run: sudo apt-get install llvm-17-dev
+        run: |
+          deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
+          deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
+          sudo apt-get install llvm-17-dev
       - name: Check rust code
         run: cargo check 
         working-directory: ${{env.working-directory}}
diff --git a/.github/workflows/cargo-clippy.yml b/.github/workflows/cargo-clippy.yml
index a2b2b7a5..75741485 100644
--- a/.github/workflows/cargo-clippy.yml
+++ b/.github/workflows/cargo-clippy.yml
@@ -4,13 +4,16 @@ on: ["push", "pull_request"]
 
 jobs:
   clippy:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     env:
       working-directory: ./wright
     steps:
       - uses: actions/checkout@v4
       - name: Install LLVM package
-        run: sudo apt-get install llvm-17-dev
+        run: |
+          deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
+          deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
+          sudo apt-get install llvm-17-dev
       - name: Run Clippy
         run: cargo clippy -- --deny clippy::all --deny warnings
         working-directory: ${{env.working-directory}}
diff --git a/.github/workflows/cargo-test.yml b/.github/workflows/cargo-test.yml
index 2e15ce07..a73e3070 100644
--- a/.github/workflows/cargo-test.yml
+++ b/.github/workflows/cargo-test.yml
@@ -4,13 +4,16 @@ on: ["push", "pull_request"]
 
 jobs:
   test:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     env:
       working-directory: ./wright
     steps:
       - uses: actions/checkout@v4
       - name: Install LLVM package
-        run: sudo apt-get install llvm-17-dev
+        run: |
+          deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
+          deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
+          sudo apt-get install llvm-17-dev
       - name: Run tests
         run: cargo test
         working-directory: ${{env.working-directory}}
diff --git a/.github/workflows/grcov.yml b/.github/workflows/grcov.yml
index 28f0a123..82b032e4 100644
--- a/.github/workflows/grcov.yml
+++ b/.github/workflows/grcov.yml
@@ -4,11 +4,14 @@ name: Code Coverage
 
 jobs:
   coverage:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
       - uses: actions/checkout@v4
       - name: Install LLVM package
-        run: sudo apt-get install llvm-17-dev
+        run: |
+          deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
+          deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
+          sudo apt-get install llvm-17-dev
       - uses: actions-rs/toolchain@v1
         with:
           toolchain: nightly
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index bb7db730..545c0963 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -23,11 +23,14 @@ concurrency:
 jobs:
   # Build job
   build:
-    runs-on: ubuntu-latest
+    runs-on: ubuntu-22.04
     steps:
-      - uses: actions/checkout@v3
+      - uses: actions/checkout@v4
       - name: Install LLVM package
-        run: sudo apt-get install llvm-17-dev
+        run: |
+          deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
+          deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
+          sudo apt-get install llvm-17-dev
       - name: Install mdBook
         run: |
           curl --proto '=https' --tlsv1.2 https://sh.rustup.rs -sSf -y | sh

From a0c14e532f20f055dd04af461867ef7fe10322f8 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Wed, 31 Jan 2024 02:04:18 -0500
Subject: [PATCH 13/60] Change llvm installation method

---
 .github/workflows/cargo-check.yml  | 8 ++++----
 .github/workflows/cargo-clippy.yml | 8 ++++----
 .github/workflows/cargo-test.yml   | 8 ++++----
 .github/workflows/grcov.yml        | 8 ++++----
 .github/workflows/pages.yml        | 8 ++++----
 5 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/.github/workflows/cargo-check.yml b/.github/workflows/cargo-check.yml
index 8051e204..b137995c 100644
--- a/.github/workflows/cargo-check.yml
+++ b/.github/workflows/cargo-check.yml
@@ -9,11 +9,11 @@ jobs:
       working-directory: ./wright
     steps:
       - uses: actions/checkout@v4
-      - name: Install LLVM package
+      - name: Install LLVM
         run: |
-          deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
-          deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
-          sudo apt-get install llvm-17-dev
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x llvm.sh
+          sudo ./llvm.sh 17
       - name: Check rust code
         run: cargo check 
         working-directory: ${{env.working-directory}}
diff --git a/.github/workflows/cargo-clippy.yml b/.github/workflows/cargo-clippy.yml
index 75741485..64f1401b 100644
--- a/.github/workflows/cargo-clippy.yml
+++ b/.github/workflows/cargo-clippy.yml
@@ -9,11 +9,11 @@ jobs:
       working-directory: ./wright
     steps:
       - uses: actions/checkout@v4
-      - name: Install LLVM package
+      - name: Install LLVM
         run: |
-          deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
-          deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
-          sudo apt-get install llvm-17-dev
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x llvm.sh
+          sudo ./llvm.sh 17
       - name: Run Clippy
         run: cargo clippy -- --deny clippy::all --deny warnings
         working-directory: ${{env.working-directory}}
diff --git a/.github/workflows/cargo-test.yml b/.github/workflows/cargo-test.yml
index a73e3070..9a12f561 100644
--- a/.github/workflows/cargo-test.yml
+++ b/.github/workflows/cargo-test.yml
@@ -9,11 +9,11 @@ jobs:
       working-directory: ./wright
     steps:
       - uses: actions/checkout@v4
-      - name: Install LLVM package
+      - name: Install LLVM
         run: |
-          deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
-          deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
-          sudo apt-get install llvm-17-dev
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x llvm.sh
+          sudo ./llvm.sh 17
       - name: Run tests
         run: cargo test
         working-directory: ${{env.working-directory}}
diff --git a/.github/workflows/grcov.yml b/.github/workflows/grcov.yml
index 82b032e4..547b7814 100644
--- a/.github/workflows/grcov.yml
+++ b/.github/workflows/grcov.yml
@@ -7,11 +7,11 @@ jobs:
     runs-on: ubuntu-22.04
     steps:
       - uses: actions/checkout@v4
-      - name: Install LLVM package
+      - name: Install LLVM
         run: |
-          deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
-          deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
-          sudo apt-get install llvm-17-dev
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x llvm.sh
+          sudo ./llvm.sh 17
       - uses: actions-rs/toolchain@v1
         with:
           toolchain: nightly
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index 545c0963..ae6bde09 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -26,11 +26,11 @@ jobs:
     runs-on: ubuntu-22.04
     steps:
       - uses: actions/checkout@v4
-      - name: Install LLVM package
+      - name: Install LLVM
         run: |
-          deb http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
-          deb-src http://apt.llvm.org/jammy/ llvm-toolchain-jammy-17 main
-          sudo apt-get install llvm-17-dev
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x llvm.sh
+          sudo ./llvm.sh 17
       - name: Install mdBook
         run: |
           curl --proto '=https' --tlsv1.2 https://sh.rustup.rs -sSf -y | sh

From ea8e3ce6f56333893e6d5d1892cd2ee99f2d3871 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Wed, 31 Jan 2024 02:10:10 -0500
Subject: [PATCH 14/60] Add missing libpolly to CI config

---
 .github/workflows/cargo-check.yml  | 1 +
 .github/workflows/cargo-clippy.yml | 1 +
 .github/workflows/cargo-test.yml   | 1 +
 .github/workflows/grcov.yml        | 1 +
 .github/workflows/pages.yml        | 1 +
 5 files changed, 5 insertions(+)

diff --git a/.github/workflows/cargo-check.yml b/.github/workflows/cargo-check.yml
index b137995c..97b90d89 100644
--- a/.github/workflows/cargo-check.yml
+++ b/.github/workflows/cargo-check.yml
@@ -14,6 +14,7 @@ jobs:
           wget https://apt.llvm.org/llvm.sh
           chmod +x llvm.sh
           sudo ./llvm.sh 17
+          sudo apt install libpolly-17-dev libz-dev
       - name: Check rust code
         run: cargo check 
         working-directory: ${{env.working-directory}}
diff --git a/.github/workflows/cargo-clippy.yml b/.github/workflows/cargo-clippy.yml
index 64f1401b..48c01189 100644
--- a/.github/workflows/cargo-clippy.yml
+++ b/.github/workflows/cargo-clippy.yml
@@ -14,6 +14,7 @@ jobs:
           wget https://apt.llvm.org/llvm.sh
           chmod +x llvm.sh
           sudo ./llvm.sh 17
+          sudo apt install libpolly-17-dev libz-dev
       - name: Run Clippy
         run: cargo clippy -- --deny clippy::all --deny warnings
         working-directory: ${{env.working-directory}}
diff --git a/.github/workflows/cargo-test.yml b/.github/workflows/cargo-test.yml
index 9a12f561..3c4786ca 100644
--- a/.github/workflows/cargo-test.yml
+++ b/.github/workflows/cargo-test.yml
@@ -14,6 +14,7 @@ jobs:
           wget https://apt.llvm.org/llvm.sh
           chmod +x llvm.sh
           sudo ./llvm.sh 17
+          sudo apt install libpolly-17-dev libz-dev
       - name: Run tests
         run: cargo test
         working-directory: ${{env.working-directory}}
diff --git a/.github/workflows/grcov.yml b/.github/workflows/grcov.yml
index 547b7814..f7c3026d 100644
--- a/.github/workflows/grcov.yml
+++ b/.github/workflows/grcov.yml
@@ -12,6 +12,7 @@ jobs:
           wget https://apt.llvm.org/llvm.sh
           chmod +x llvm.sh
           sudo ./llvm.sh 17
+          sudo apt install libpolly-17-dev libz-dev
       - uses: actions-rs/toolchain@v1
         with:
           toolchain: nightly
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index ae6bde09..c978cc6d 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -31,6 +31,7 @@ jobs:
           wget https://apt.llvm.org/llvm.sh
           chmod +x llvm.sh
           sudo ./llvm.sh 17
+          sudo apt install libpolly-17-dev libz-dev
       - name: Install mdBook
         run: |
           curl --proto '=https' --tlsv1.2 https://sh.rustup.rs -sSf -y | sh

From 8cef341f90963ae98254ea774ec1d7b66c6ba1b4 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Wed, 31 Jan 2024 02:14:03 -0500
Subject: [PATCH 15/60] Add comment about llvm installation method

---
 .github/workflows/cargo-check.yml  | 2 ++
 .github/workflows/cargo-clippy.yml | 2 ++
 .github/workflows/cargo-test.yml   | 2 ++
 .github/workflows/grcov.yml        | 2 ++
 .github/workflows/pages.yml        | 2 ++
 5 files changed, 10 insertions(+)

diff --git a/.github/workflows/cargo-check.yml b/.github/workflows/cargo-check.yml
index 97b90d89..2ffd82b9 100644
--- a/.github/workflows/cargo-check.yml
+++ b/.github/workflows/cargo-check.yml
@@ -10,6 +10,8 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install LLVM
+        # See: https://apt.llvm.org/
+        # Last line: https://gitlab.com/taricorp/llvm-sys.rs/-/issues/13
         run: |
           wget https://apt.llvm.org/llvm.sh
           chmod +x llvm.sh
diff --git a/.github/workflows/cargo-clippy.yml b/.github/workflows/cargo-clippy.yml
index 48c01189..3ebff030 100644
--- a/.github/workflows/cargo-clippy.yml
+++ b/.github/workflows/cargo-clippy.yml
@@ -10,6 +10,8 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install LLVM
+        # See: https://apt.llvm.org/
+        # Last line: https://gitlab.com/taricorp/llvm-sys.rs/-/issues/13
         run: |
           wget https://apt.llvm.org/llvm.sh
           chmod +x llvm.sh
diff --git a/.github/workflows/cargo-test.yml b/.github/workflows/cargo-test.yml
index 3c4786ca..1e23b635 100644
--- a/.github/workflows/cargo-test.yml
+++ b/.github/workflows/cargo-test.yml
@@ -10,6 +10,8 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install LLVM
+        # See: https://apt.llvm.org/
+        # Last line: https://gitlab.com/taricorp/llvm-sys.rs/-/issues/13
         run: |
           wget https://apt.llvm.org/llvm.sh
           chmod +x llvm.sh
diff --git a/.github/workflows/grcov.yml b/.github/workflows/grcov.yml
index f7c3026d..a098cbbe 100644
--- a/.github/workflows/grcov.yml
+++ b/.github/workflows/grcov.yml
@@ -8,6 +8,8 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install LLVM
+        # See: https://apt.llvm.org/
+        # Last line: https://gitlab.com/taricorp/llvm-sys.rs/-/issues/13
         run: |
           wget https://apt.llvm.org/llvm.sh
           chmod +x llvm.sh
diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml
index c978cc6d..d3aa3832 100644
--- a/.github/workflows/pages.yml
+++ b/.github/workflows/pages.yml
@@ -27,6 +27,8 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Install LLVM
+        # See: https://apt.llvm.org/
+        # Last line: https://gitlab.com/taricorp/llvm-sys.rs/-/issues/13
         run: |
           wget https://apt.llvm.org/llvm.sh
           chmod +x llvm.sh

From 42f96df5567e660474df7befa85e0b3ca59030e1 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Thu, 8 Feb 2024 00:22:40 -0500
Subject: [PATCH 16/60] Use huge compile time table for short symbol token
 lookup.

---
 wright/Cargo.toml             |  10 ++
 wright/benches/lexer.rs       |  14 +++
 wright/src/parser/fragment.rs |   8 ++
 wright/src/parser/lexer.rs    | 218 ++++++++++++++++++++++++++++++----
 4 files changed, 225 insertions(+), 25 deletions(-)
 create mode 100644 wright/benches/lexer.rs

diff --git a/wright/Cargo.toml b/wright/Cargo.toml
index a69f80fb..5a9b9fdc 100644
--- a/wright/Cargo.toml
+++ b/wright/Cargo.toml
@@ -26,6 +26,12 @@ test = false
 doc = false
 doctest = false
 
+# BENCHMARKS
+[[bench]]
+name = "lexer"
+harness = false
+
+
 # CRATES.IO BADGES
 [badges.maintenance]
 status = "actively-developed"
@@ -86,3 +92,7 @@ features = ["llvm17-0"]
 # Rayon to speed up brute-force testing in some cases.
 [dev-dependencies.rayon]
 version = "1.8.0"
+
+# Criterion is used for benchmarking. 
+[dev-dependencies]
+criterion = "0.5.1"
diff --git a/wright/benches/lexer.rs b/wright/benches/lexer.rs
new file mode 100644
index 00000000..81430d39
--- /dev/null
+++ b/wright/benches/lexer.rs
@@ -0,0 +1,14 @@
+//! Lexer benchmarks.
+
+
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use wright::parser::lexer::Lexer;
+
+fn bench_lex_plus_eq(c: &mut Criterion) {
+    c.bench_function("lex +=", |b| b.iter(|| {
+        Lexer::new(black_box("+=")).next_token();
+    }));
+}
+
+criterion_group!(benches, bench_lex_plus_eq);
+criterion_main!(benches);
diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
index 8ef6c4ab..f67b0265 100644
--- a/wright/src/parser/fragment.rs
+++ b/wright/src/parser/fragment.rs
@@ -83,4 +83,12 @@ mod tests {
         assert!(c.overlaps(&a));
         assert!(!a.overlaps(&d));
     }
+
+    #[test]
+    fn test_split_single() {
+        let a = Fragment { inner: "+" };
+        let (left, right) = a.split(1);
+        assert_eq!(left.inner, "+");
+        assert_eq!(right.inner, "");
+    }
 }
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index a3f41870..32aed7b7 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -48,6 +48,145 @@ pub const POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS: &[(char, TokenTy, TokenTy, TokenT
     ('=', TokenTy::Eq, TokenTy::EqEq, TokenTy::DoubleArrow),
 ];
 
+/// The number of rows of the generated prefix table. 
+pub const PREFIX_TABLE_ROWS: usize = {
+    SINGLE_CHAR_TOKENS.len() 
+    + 2 * POSSIBLE_EQ_UPGRADE_TOKENS.len()
+    + 3 * POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS.len()
+    + 3 * POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS.len()
+};
+
+/// A relationship between a prefix and the token that should be generated when that prefix matches. 
+#[derive(Copy, Clone, Debug)]
+pub struct PrefixToToken {
+    /// An array of two chars. In single char tokens, the second one should be a null character (`'\0'`). 
+    /// the char_length field will be used to slice this buffer to get the actual prefix. 
+    pub char_buffer: [char; 2],
+    /// The byte length of this prefix and all generated tokens by this prefix. 
+    pub byte_len: usize,
+    /// The kind of [Token] generated when this prefix matches. 
+    pub kind: TokenTy,
+}
+
+/// A full table generated at compile time using all the token tables 
+/// ([SINGLE_CHAR_TOKENS], [POSSIBLE_EQ_UPGRADE_TOKENS], [POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS], 
+/// [POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS]). 
+/// 
+/// This table can be iterated on in order when trying to match a token at the start of a fragment of source code. 
+pub const PREFIX_TABLE: [PrefixToToken; PREFIX_TABLE_ROWS] = {
+    // Make a mutable table with dummy values to replace with actual values. 
+    let mut table: [PrefixToToken; PREFIX_TABLE_ROWS] = 
+        [PrefixToToken { char_buffer: ['\0'; 2], byte_len: 0, kind: TokenTy::Unknown }; PREFIX_TABLE_ROWS];
+
+    // Current index to insert into table at.
+    let mut write_index: usize = 0;
+
+    // Index used for reading from various tables. 
+    let mut read_index: usize = 0;
+
+    // Iterate first over all the single char tokens. 
+    while read_index < SINGLE_CHAR_TOKENS.len() {
+        // Get row from source table.
+        let (c, token_kind) = SINGLE_CHAR_TOKENS[read_index];
+
+        // Put row in destination table.
+        table[write_index] = PrefixToToken {
+            char_buffer: [c, '\0'],
+            byte_len: c.len_utf8(),
+            kind: token_kind,
+        };
+
+        // Increment both indices. 
+        read_index += 1;
+        write_index += 1;
+    }
+
+    // Then do all the tokens that can be upgraded with an equals sign. 
+    // Add the row for the token with the equals sign first so that when we iterate over this table in order,
+    // the version without the equals sign does not match prematurely. 
+    read_index = 0;
+    while read_index < POSSIBLE_EQ_UPGRADE_TOKENS.len() {
+        let (c, without_eq, with_eq) = POSSIBLE_EQ_UPGRADE_TOKENS[read_index];
+
+        table[write_index] = PrefixToToken {
+            char_buffer: [c, '='],
+            byte_len: c.len_utf8() + '='.len_utf8(),
+            kind: with_eq,
+        };
+
+        write_index += 1;
+        table[write_index] = PrefixToToken {
+            char_buffer: [c, '\0'],
+            byte_len: c.len_utf8(),
+            kind: without_eq,
+        };
+
+        read_index += 1;
+        write_index += 1;
+    }
+
+    // Do the same for the tokens that can be upgraded with an equals sign or doubled. 
+    read_index = 0;
+    while read_index < POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS.len() {
+        let (c, without_eq, with_eq, doubled) = POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS[read_index];
+
+        table[write_index] = PrefixToToken {
+            char_buffer: [c, c],
+            byte_len: 2 * c.len_utf8(),
+            kind: doubled,
+        };
+
+        write_index += 1;
+        table[write_index] = PrefixToToken {
+            char_buffer: [c, '='],
+            byte_len: c.len_utf8() + '='.len_utf8(),
+            kind: with_eq,
+        };
+
+        write_index += 1;
+        table[write_index] = PrefixToToken {
+            char_buffer: [c, '\0'],
+            byte_len: c.len_utf8(),
+            kind: without_eq,
+        };
+
+        read_index += 1;
+        write_index += 1;
+    }
+
+    // Do the same for possible eq or arrow upgrades.
+    read_index = 0;
+    while read_index < POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS.len() {
+        let (c, without_eq, with_eq, with_arrow) = POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS[read_index];
+
+        table[write_index] = PrefixToToken {
+            char_buffer: [c, '>'],
+            byte_len: c.len_utf8() + '>'.len_utf8(),
+            kind: with_arrow,
+        };
+
+        write_index += 1;
+        table[write_index] = PrefixToToken {
+            char_buffer: [c, '='],
+            byte_len: c.len_utf8() + '='.len_utf8(),
+            kind: with_eq,
+        };
+
+        write_index += 1;
+        table[write_index] = PrefixToToken {
+            char_buffer: [c, '\0'],
+            byte_len: c.len_utf8(),
+            kind: without_eq,
+        };
+
+        read_index += 1;
+        write_index += 1;
+    }
+
+    table
+};
+
+
 /// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language. 
 #[derive(Debug)]
 pub struct Lexer<'src> {
@@ -186,42 +325,71 @@ impl<'src> Lexer<'src> {
         Token { variant: kind, fragment: token_fragment }
     }
 
+    /// See if the remaining fragment in this [Lexer] starts with a given [str] prefix and if so,
+    /// split off a token of the length of this prefix with the given variant. 
+    fn match_str_prefix(&mut self, prefix: &str, token_kind: TokenTy) -> Option<Token<'src>> {
+        if self.remaining.inner.starts_with(prefix) {
+            Some(self.split_token(prefix.len(), token_kind))
+        } else {
+            None
+        }
+    }
+
+
     /// Get the next token from the lexer.
     pub fn next_token(&mut self) -> Option<Token<'src>> {
         // If the remaining input is empty, there is no token. 
         if self.remaining.is_empty() {
             return None;
         }
-
-        // Otherwise create a char iterator on the fragment. 
-        // This one will be mainly used to check for shorter tokens -- a new one may be created later
-        // to check for identifiers and strings. 
-        let mut char_indices = self.remaining.inner.chars();
-
-        // Get the next character from the iterator. 
-        let next_char = char_indices.next().unwrap();
-
-        // Match a single character if possible. 
-        for (c, kind) in SINGLE_CHAR_TOKENS {
-            if next_char == *c {
-                return Some(self.split_token(next_char.len_utf8(), *kind));
+        
+        // To attempt to match a token from the prefix table, make a char iterator
+        // and get two chars from it to test equality. None of the tokens start with a
+        // null character so use that as a single of an unavailable char.
+        let mut char_iter = self.remaining.chars();
+        let char_array: [char; 2] = [
+            // Just unwrap here since we know there's at least one char. 
+            char_iter.next().unwrap(), 
+            char_iter.next().unwrap_or('\0')
+        ];
+
+        // Next iterate through the prefix table to try to get any tokens that are covered there.
+        for prefix_meta in PREFIX_TABLE.iter() {
+            if &prefix_meta.char_buffer == &char_array {
+                return Some(self.split_token(prefix_meta.byte_len, prefix_meta.kind));
             }
         }
 
-        // Get the character after the next char if there is one. 
-        let following_char_option = char_indices.next();
+        unimplemented!()
+    }
 
-        // Try to match a token that can be augmented with a possible additional equal sign. 
-        for (c, without_eq, with_eq) in POSSIBLE_EQ_UPGRADE_TOKENS {
-            if next_char == *c {
-                match following_char_option {
-                    Some('=') => return Some(self.split_token(next_char.len_utf8() + 1, *with_eq)),
-                    _ => return Some(self.split_token(next_char.len_utf8(), *without_eq)),
-                }   
-            }
-        }
+}
 
-        unimplemented!()
+#[cfg(test)]
+mod tests {
+    use crate::parser::lexer::TokenTy;
+
+    use super::Lexer;
+    use super::PREFIX_TABLE;
+
+    #[test]
+    #[ignore = "this test is just used for debugging the prefix table"]
+    /// Run this with `cargo test manual_debug_prefix_table -- --nocapture --ignored`.
+    fn manual_debug_prefix_table() {
+        dbg!(PREFIX_TABLE);
     }
 
+    #[test]
+    fn plus_and_plus_eq_tokens() {
+        let mut plus = Lexer::new("+");
+        let mut plus_eq = Lexer::new("+=");
+
+        let plus_token = plus.next_token().unwrap();
+        let plus_eq_token = plus_eq.next_token().unwrap();
+
+        assert_eq!(plus.bytes_remaining(), 0);
+        assert_eq!(plus_eq.bytes_remaining(), 0);
+        assert_eq!(plus_token.variant, TokenTy::Plus);
+        assert_eq!(plus_eq_token.variant, TokenTy::PlusEq);
+    }
 }

From f304ef8662e8d25d92bd0039235b05d92665731b Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sun, 11 Feb 2024 00:51:09 -0500
Subject: [PATCH 17/60] Add identifier parsing

---
 wright/Cargo.toml          |   4 +
 wright/benches/lexer.rs    |  22 +++--
 wright/src/bin/wright.rs   |   5 +-
 wright/src/parser/lexer.rs | 192 +++++++++++++++++++++----------------
 wright/src/repl.rs         |   8 +-
 5 files changed, 134 insertions(+), 97 deletions(-)

diff --git a/wright/Cargo.toml b/wright/Cargo.toml
index 5a9b9fdc..8957aebb 100644
--- a/wright/Cargo.toml
+++ b/wright/Cargo.toml
@@ -87,6 +87,10 @@ features = ["strict-versioning", "force-static"]
 version = "0.3"
 features = ["llvm17-0"]
 
+# Fast parsing for integers and floats from source code. 
+[dependencies.lexical-core]
+version = "0.8"
+
 # TEST DEPENDENCIES
 
 # Rayon to speed up brute-force testing in some cases.
diff --git a/wright/benches/lexer.rs b/wright/benches/lexer.rs
index 81430d39..49a95b6a 100644
--- a/wright/benches/lexer.rs
+++ b/wright/benches/lexer.rs
@@ -1,14 +1,24 @@
 //! Lexer benchmarks.
 
 
-use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use criterion::{black_box, criterion_group, criterion_main, Criterion, Bencher};
 use wright::parser::lexer::Lexer;
 
-fn bench_lex_plus_eq(c: &mut Criterion) {
-    c.bench_function("lex +=", |b| b.iter(|| {
-        Lexer::new(black_box("+=")).next_token();
-    }));
+fn bench_symbol_tokens(c: &mut Criterion) {
+    // Make a benchmark group.
+    let mut group = c.benchmark_group("lexer symbol benchmarks");
+
+    // Function to make a lexer and get a token from it. 
+    let make_lexer_and_get_token = |b: &mut Bencher, input: &str| {
+        b.iter(|| Lexer::new(black_box(input)).next_token())
+    };
+
+    let inputs = ["+", "+=", "*", "@", "?"];
+
+    for i in inputs {
+        group.bench_with_input(format!("lexer {i}"), i, make_lexer_and_get_token);
+    }
 }
 
-criterion_group!(benches, bench_lex_plus_eq);
+criterion_group!(benches, bench_symbol_tokens);
 criterion_main!(benches);
diff --git a/wright/src/bin/wright.rs b/wright/src/bin/wright.rs
index 4f548b1b..2e589622 100644
--- a/wright/src/bin/wright.rs
+++ b/wright/src/bin/wright.rs
@@ -2,9 +2,8 @@
 
 use anyhow::Result;
 use clap::{Parser, Subcommand};
-use codespan_reporting::files::SimpleFile;
-use std::{fs, path::PathBuf};
-use wright::{parser::lexer::Lexer, repl};
+use std::path::PathBuf;
+use wright::repl;
 
 /// The wright cli.
 #[derive(Parser, Debug)]
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 32aed7b7..08676d39 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -3,6 +3,9 @@
 //! Note that this will strip out comments and whitespace, returning only fragments that match one of the paterns 
 //! defined for tokens. 
 
+use std::ptr;
+use std::str::Chars;
+use unicode_ident::{is_xid_continue, is_xid_start};
 use super::fragment::Fragment;
 
 /// Constant table of single character tokens and the characters that match them. 
@@ -68,11 +71,24 @@ pub struct PrefixToToken {
     pub kind: TokenTy,
 }
 
+impl PrefixToToken {
+    /// Convenience function to construct a [`PrefixToToken`] by calculating the length of both chars 
+    /// (and ignoring the second one if it's null). 
+    pub const fn new(chars: [char; 2], kind: TokenTy) -> Self {
+        PrefixToToken {
+            char_buffer: chars,
+            byte_len: if chars[1] == '\0' { chars[0].len_utf8() } else { chars[0].len_utf8() + chars[1].len_utf8() },
+            kind,
+        }
+    }
+}
+
 /// A full table generated at compile time using all the token tables 
 /// ([SINGLE_CHAR_TOKENS], [POSSIBLE_EQ_UPGRADE_TOKENS], [POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS], 
 /// [POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS]). 
 /// 
 /// This table can be iterated on in order when trying to match a token at the start of a fragment of source code. 
+#[rustfmt::skip]
 pub const PREFIX_TABLE: [PrefixToToken; PREFIX_TABLE_ROWS] = {
     // Make a mutable table with dummy values to replace with actual values. 
     let mut table: [PrefixToToken; PREFIX_TABLE_ROWS] = 
@@ -90,11 +106,7 @@ pub const PREFIX_TABLE: [PrefixToToken; PREFIX_TABLE_ROWS] = {
         let (c, token_kind) = SINGLE_CHAR_TOKENS[read_index];
 
         // Put row in destination table.
-        table[write_index] = PrefixToToken {
-            char_buffer: [c, '\0'],
-            byte_len: c.len_utf8(),
-            kind: token_kind,
-        };
+        table[write_index] = PrefixToToken::new([c, '\0'], token_kind);
 
         // Increment both indices. 
         read_index += 1;
@@ -108,21 +120,11 @@ pub const PREFIX_TABLE: [PrefixToToken; PREFIX_TABLE_ROWS] = {
     while read_index < POSSIBLE_EQ_UPGRADE_TOKENS.len() {
         let (c, without_eq, with_eq) = POSSIBLE_EQ_UPGRADE_TOKENS[read_index];
 
-        table[write_index] = PrefixToToken {
-            char_buffer: [c, '='],
-            byte_len: c.len_utf8() + '='.len_utf8(),
-            kind: with_eq,
-        };
-
-        write_index += 1;
-        table[write_index] = PrefixToToken {
-            char_buffer: [c, '\0'],
-            byte_len: c.len_utf8(),
-            kind: without_eq,
-        };
+        table[write_index]     = PrefixToToken::new([c, '='], with_eq);
+        table[write_index + 1] = PrefixToToken::new([c, '\0'], without_eq);
 
         read_index += 1;
-        write_index += 1;
+        write_index += 2;
     }
 
     // Do the same for the tokens that can be upgraded with an equals sign or doubled. 
@@ -130,28 +132,12 @@ pub const PREFIX_TABLE: [PrefixToToken; PREFIX_TABLE_ROWS] = {
     while read_index < POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS.len() {
         let (c, without_eq, with_eq, doubled) = POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS[read_index];
 
-        table[write_index] = PrefixToToken {
-            char_buffer: [c, c],
-            byte_len: 2 * c.len_utf8(),
-            kind: doubled,
-        };
-
-        write_index += 1;
-        table[write_index] = PrefixToToken {
-            char_buffer: [c, '='],
-            byte_len: c.len_utf8() + '='.len_utf8(),
-            kind: with_eq,
-        };
-
-        write_index += 1;
-        table[write_index] = PrefixToToken {
-            char_buffer: [c, '\0'],
-            byte_len: c.len_utf8(),
-            kind: without_eq,
-        };
+        table[write_index]     = PrefixToToken::new([c, c], doubled);
+        table[write_index + 1] = PrefixToToken::new([c, '='], with_eq);
+        table[write_index + 2] = PrefixToToken::new([c, '\0'], without_eq);
 
         read_index += 1;
-        write_index += 1;
+        write_index += 3;
     }
 
     // Do the same for possible eq or arrow upgrades.
@@ -159,28 +145,12 @@ pub const PREFIX_TABLE: [PrefixToToken; PREFIX_TABLE_ROWS] = {
     while read_index < POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS.len() {
         let (c, without_eq, with_eq, with_arrow) = POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS[read_index];
 
-        table[write_index] = PrefixToToken {
-            char_buffer: [c, '>'],
-            byte_len: c.len_utf8() + '>'.len_utf8(),
-            kind: with_arrow,
-        };
-
-        write_index += 1;
-        table[write_index] = PrefixToToken {
-            char_buffer: [c, '='],
-            byte_len: c.len_utf8() + '='.len_utf8(),
-            kind: with_eq,
-        };
-
-        write_index += 1;
-        table[write_index] = PrefixToToken {
-            char_buffer: [c, '\0'],
-            byte_len: c.len_utf8(),
-            kind: without_eq,
-        };
+        table[write_index]     = PrefixToToken::new([c, '>'], with_arrow);
+        table[write_index + 1] = PrefixToToken::new([c, '='], with_eq);
+        table[write_index + 2] = PrefixToToken::new([c, '\0'], without_eq);
 
         read_index += 1;
-        write_index += 1;
+        write_index += 3;
     }
 
     table
@@ -229,13 +199,15 @@ pub enum TokenTy {
 
     At,
     Tilde,
-    Underscore,
     Semi,
     Dot,
     Comma,
     Hash,
     Question,
     Dollar,
+    
+    // Not in the same group as the other ones there since it can be used at the start of identifiers.
+    Underscore,
 
     Identifier,
 
@@ -313,6 +285,8 @@ impl<'src> Lexer<'src> {
             "true" => KwTrue,
             "false" => KwFalse,
 
+            "_" => Underscore,
+
             _ => Identifier
         }
     }
@@ -325,38 +299,76 @@ impl<'src> Lexer<'src> {
         Token { variant: kind, fragment: token_fragment }
     }
 
-    /// See if the remaining fragment in this [Lexer] starts with a given [str] prefix and if so,
-    /// split off a token of the length of this prefix with the given variant. 
-    fn match_str_prefix(&mut self, prefix: &str, token_kind: TokenTy) -> Option<Token<'src>> {
-        if self.remaining.inner.starts_with(prefix) {
-            Some(self.split_token(prefix.len(), token_kind))
-        } else {
-            None
-        }
-    }
-
-
     /// Get the next token from the lexer.
     pub fn next_token(&mut self) -> Option<Token<'src>> {
         // If the remaining input is empty, there is no token. 
         if self.remaining.is_empty() {
             return None;
         }
+
+        // Use blocks heavily in this function as we don't want to re-use iterators or variables 
+        // after we check them in most cases. 
+
+        // If there is whitespace at the start of the remaining fragment, strip it and re-run this 
+        // function to get the next token. 
+        {
+            let without_whitespace: &str = self.remaining.inner.trim_start();
+
+            if !ptr::eq(without_whitespace, self.remaining.inner) {
+                self.remaining.inner = without_whitespace;
+                return self.next_token();
+            }
+        }
         
         // To attempt to match a token from the prefix table, make a char iterator
         // and get two chars from it to test equality. None of the tokens start with a
         // null character so use that as a single of an unavailable char.
-        let mut char_iter = self.remaining.chars();
-        let char_array: [char; 2] = [
-            // Just unwrap here since we know there's at least one char. 
-            char_iter.next().unwrap(), 
-            char_iter.next().unwrap_or('\0')
-        ];
-
-        // Next iterate through the prefix table to try to get any tokens that are covered there.
-        for prefix_meta in PREFIX_TABLE.iter() {
-            if &prefix_meta.char_buffer == &char_array {
-                return Some(self.split_token(prefix_meta.byte_len, prefix_meta.kind));
+        {
+            let mut char_iter: Chars = self.remaining.chars();
+            let char_array: [char; 2] = [
+                // Unchecked unwrap here since we know there's at least one char. 
+                unsafe { char_iter.next().unwrap_unchecked() }, 
+                char_iter.next().unwrap_or('\0')
+            ];
+
+            // Next iterate through the prefix table to try to get any tokens that are covered there.
+            for prefix_meta in PREFIX_TABLE.iter() {
+                // If it's a single char comparison, only compare the first chars.
+                if prefix_meta.char_buffer[1] == '\0' && prefix_meta.char_buffer[0] == char_array[0] {
+                    return Some(self.split_token(prefix_meta.byte_len, prefix_meta.kind));
+                }
+
+                // Otherwise compare the whole slices. 
+                if &prefix_meta.char_buffer == &char_array {
+                    return Some(self.split_token(prefix_meta.byte_len, prefix_meta.kind));
+                }
+            }
+        }
+
+        // Next attempt to match a keyword or identifier. 
+        {
+            let mut chars: Chars = self.remaining.chars();
+
+            // The unsafe is fine here -- we've established that this lexer has bytes remaining. 
+            let next: char = unsafe { chars.next().unwrap_unchecked() };
+
+            if is_xid_start(next) || next == '_' {
+                let mut bytes_consumed: usize = next.len_utf8();
+
+                // Take remaining chars and add to sum. 
+                bytes_consumed += chars
+                    .take_while(|c| is_xid_continue(*c))
+                    .map(char::len_utf8)
+                    .sum::<usize>();
+
+                // Split the number of bytes we consumed. 
+                let (ident_frag, new_remaining) = self.remaining.split(bytes_consumed);
+                // Get the token kind to produce for this fragment. 
+                let variant = Lexer::identifier_or_keyword(ident_frag);
+                // Update the lexers remaining fragment. 
+                self.remaining = new_remaining;
+                // Return the identifier, keyword, or underscore. 
+                return Some(Token { variant, fragment: ident_frag });
             }
         }
 
@@ -368,7 +380,6 @@ impl<'src> Lexer<'src> {
 #[cfg(test)]
 mod tests {
     use crate::parser::lexer::TokenTy;
-
     use super::Lexer;
     use super::PREFIX_TABLE;
 
@@ -392,4 +403,21 @@ mod tests {
         assert_eq!(plus_token.variant, TokenTy::Plus);
         assert_eq!(plus_eq_token.variant, TokenTy::PlusEq);
     }
+
+    #[test]
+    fn plus_one_token() {
+        let mut plus_one = Lexer::new("+1");
+        let plus_token = plus_one.next_token().unwrap();
+        assert_eq!(plus_one.bytes_remaining(), 1);
+        assert_eq!(plus_token.variant, TokenTy::Plus);
+        assert_eq!(plus_token.fragment.len(), 1);
+    }
+
+    #[test]
+    fn identifiers_and_keywords() {
+        let mut lexer = Lexer::new("const TEST");
+
+        assert_eq!(lexer.next_token().unwrap().variant, TokenTy::KwConst);
+        assert_eq!(lexer.next_token().unwrap().variant, TokenTy::Identifier);
+    }
 }
diff --git a/wright/src/repl.rs b/wright/src/repl.rs
index 74560937..f0e024b5 100644
--- a/wright/src/repl.rs
+++ b/wright/src/repl.rs
@@ -1,10 +1,6 @@
 //! The Wright interactive REPL.
 
-use crate::{
-    filemap::{FileMap, FileName},
-    // parser::lexer::Lexer,
-    WRIGHT_VERSION,
-};
+use crate::WRIGHT_VERSION;
 use derive_more::Display;
 use std::io::{self, BufRead, Write};
 
@@ -58,7 +54,7 @@ pub fn start() -> anyhow::Result<()> {
     let mut repl_mode = ReplMode::Tokens;
 
     // Make a file map to track input.
-    let mut code_map = FileMap::new();
+    // let mut code_map = FileMap::new();
 
     // Loop until this returns/exits.
     loop {

From da25ba3c18978c533b87a4c053bf4d7669085c97 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sun, 11 Feb 2024 00:59:02 -0500
Subject: [PATCH 18/60] Reformatting

---
 wright/benches/lexer.rs       |  11 +-
 wright/src/filemap.rs         | 202 +++++++++++++++++++++-------------
 wright/src/parser/fragment.rs |  54 +++++----
 wright/src/parser/lexer.rs    | 130 ++++++++++++----------
 wright/src/repl.rs            |   2 +-
 5 files changed, 236 insertions(+), 163 deletions(-)

diff --git a/wright/benches/lexer.rs b/wright/benches/lexer.rs
index 49a95b6a..35f6a8ee 100644
--- a/wright/benches/lexer.rs
+++ b/wright/benches/lexer.rs
@@ -1,17 +1,16 @@
 //! Lexer benchmarks.
 
-
-use criterion::{black_box, criterion_group, criterion_main, Criterion, Bencher};
+use criterion::{black_box, criterion_group, criterion_main, Bencher, Criterion};
 use wright::parser::lexer::Lexer;
 
 fn bench_symbol_tokens(c: &mut Criterion) {
     // Make a benchmark group.
     let mut group = c.benchmark_group("lexer symbol benchmarks");
 
-    // Function to make a lexer and get a token from it. 
-    let make_lexer_and_get_token = |b: &mut Bencher, input: &str| {
-        b.iter(|| Lexer::new(black_box(input)).next_token())
-    };
+    // Function to make a lexer and get a token from it.
+    fn make_lexer_and_get_token(b: &mut Bencher, input: &str) {
+        b.iter(|| Lexer::new(black_box(input)).next_token());
+    }
 
     let inputs = ["+", "+=", "*", "@", "?"];
 
diff --git a/wright/src/filemap.rs b/wright/src/filemap.rs
index 0ce9afa0..eb9d8984 100644
--- a/wright/src/filemap.rs
+++ b/wright/src/filemap.rs
@@ -1,20 +1,24 @@
 //! Responsible for keeping track of different files added to the Wright build system.
 
-use codespan_reporting::{files::{Files, SimpleFile}, term::Config, diagnostic::Diagnostic};
+use crate::parser::fragment::Fragment;
+use codespan_reporting::{
+    diagnostic::Diagnostic,
+    files::{Files, SimpleFile},
+    term::Config,
+};
 use derive_more::Display;
 use fs4::FileExt;
 use memmap2::Mmap;
+use std::{fs::File, io, path::PathBuf, sync::mpsc, thread, time::Duration};
 use termcolor::{ColorChoice, StandardStream};
-use std::{path::PathBuf, io, fs::File, sync::mpsc, thread, time::Duration};
-use crate::parser::fragment::Fragment;
 
-/// Rename import for clarity. 
+/// Rename import for clarity.
 use codespan_reporting::files::Error as CodespanError;
 
-/// Convenience type alias. 
+/// Convenience type alias.
 type CodespanResult<T> = Result<T, CodespanError>;
 
-/// Amount of time before we should warn the user about locking the file taking too long. 
+/// Amount of time before we should warn the user about locking the file taking too long.
 const FILE_LOCK_WARNING_TIME: Duration = Duration::from_secs(5);
 
 /// Used to represent different file names used throughout this crate.
@@ -33,112 +37,127 @@ pub enum FileName {
     None,
 }
 
-/// An immutable string that either references a source file in memory using an `&` reference or using a [Box]. 
+/// An immutable string that either references a source file in memory using an `&` reference or using a [Box].
 #[derive(Debug)]
 enum ImmutableString<'src> {
-    /// An immutable reference to an existing string. 
+    /// An immutable reference to an existing string.
     Reference(&'src str),
 
-    /// An owned immutable string. 
+    /// An owned immutable string.
     Owned(Box<str>),
 
-    /// A locked, memory mapped file from the OS. 
+    /// A locked, memory mapped file from the OS.
     LockedFile {
-        /// The locked file that needs to be unlocked when this object is dropped. 
+        /// The locked file that needs to be unlocked when this object is dropped.
         locked_file: File,
         /// The memory locked file -- this is expected to be locked before
-        /// one creates it in the file 
+        /// one creates it in the file
         mem_map: Mmap,
-    }
+    },
 }
 
 /// The file map that we use throughout the rest of this crate.
 pub struct FileMap<'src> {
-    /// This is just a list of files we're keeping track of. 
+    /// This is just a list of files we're keeping track of.
     /// This is identical to the current implementation of [codespan_reporting::files::SimpleFiles],
-    /// but we don't use theirs because we need to iterate over each [SimpleFile] manually for various 
+    /// but we don't use theirs because we need to iterate over each [SimpleFile] manually for various
     /// parts of the implementation.
-    inner: Vec<SimpleFile<FileName, ImmutableString<'src>>>
+    inner: Vec<SimpleFile<FileName, ImmutableString<'src>>>,
 }
 
-
 impl<'src> FileMap<'src> {
-    /// Construct a new empty [FileMap]. 
+    /// Construct a new empty [FileMap].
     pub const fn new() -> Self {
         FileMap { inner: Vec::new() }
     }
 
-    /// Get a reference to a file from the internal [Vec] or return a [`CodespanError::FileMissing`] error. 
-    fn get(&self, file_id: <Self as Files<'src>>::FileId) -> CodespanResult<&SimpleFile<FileName, ImmutableString<'src>>> {
+    /// Get a reference to a file from the internal [Vec] or return a [`CodespanError::FileMissing`] error.
+    fn get(
+        &self,
+        file_id: <Self as Files<'src>>::FileId,
+    ) -> CodespanResult<&SimpleFile<FileName, ImmutableString<'src>>> {
         self.inner.get(file_id).ok_or(CodespanError::FileMissing)
     }
 
     /// Internal function to add a file to the vec. Public facing functions will need to do some conversion
-    /// and then call this. 
-    fn add(&mut self, name: FileName, source: ImmutableString<'src>) -> <Self as Files<'src>>::FileId {
+    /// and then call this.
+    fn add(
+        &mut self,
+        name: FileName,
+        source: ImmutableString<'src>,
+    ) -> <Self as Files<'src>>::FileId {
         // The file id is just the next index in the vec.
         let file_id: usize = self.inner.len();
         self.inner.push(SimpleFile::new(name, source));
         file_id
     }
 
-    /// Add a file (in the form of an owned string) to the file map. 
+    /// Add a file (in the form of an owned string) to the file map.
     pub fn add_string(&mut self, name: FileName, source: String) -> <Self as Files<'src>>::FileId {
         self.add(name, ImmutableString::Owned(source.into_boxed_str()))
     }
 
-    /// Add a file (in the form of a string reference) to the file map. 
-    pub fn add_str_ref(&mut self, name: FileName, source: &'src str) -> <Self as Files<'src>>::FileId {
+    /// Add a file (in the form of a string reference) to the file map.
+    pub fn add_str_ref(
+        &mut self,
+        name: FileName,
+        source: &'src str,
+    ) -> <Self as Files<'src>>::FileId {
         self.add(name, ImmutableString::Reference(source))
     }
 
-    /// Add a file from the file system. This file will be 
-    /// opened with read permissions, locked, memory mapped, 
-    /// and then added to the file map. The file name in the memory map will be the [PathBuf] passed to this function. 
+    /// Add a file from the file system. This file will be
+    /// opened with read permissions, locked, memory mapped,
+    /// and then added to the file map. The file name in the memory map will be the [PathBuf] passed to this function.
     pub fn add_file(&mut self, path: PathBuf) -> io::Result<<Self as Files<'src>>::FileId> {
         // Make a one-off enum here to use for channel messages.
         enum ChannelMessage {
             /// The file was successfully locked.
             FileLocked(File),
-            /// There was an error locking the file. 
+            /// There was an error locking the file.
             LockingError(io::Error),
-            /// File is taking a long time to lock. 
+            /// File is taking a long time to lock.
             FiveSecondWarning,
         }
 
-        // Open the file for reading. 
+        // Open the file for reading.
         let file: File = File::open(&path)?;
 
-        // Create two threads and a mpsc channel for warning the user if 
-        // locking the file takes longer than 5 seconds. 
+        // Create two threads and a mpsc channel for warning the user if
+        // locking the file takes longer than 5 seconds.
         let (tx, rx) = mpsc::sync_channel::<ChannelMessage>(1);
         let timout_tx = tx.clone();
 
-        // Thread to lock the file 
-        thread::spawn(move || { 
-            match file.lock_exclusive() {
-                Ok(_) => tx.send(ChannelMessage::FileLocked(file)),
-                Err(err) => tx.send(ChannelMessage::LockingError(err))
-            }
+        // Thread to lock the file
+        thread::spawn(move || match file.lock_exclusive() {
+            Ok(_) => tx.send(ChannelMessage::FileLocked(file)),
+            Err(err) => tx.send(ChannelMessage::LockingError(err)),
         });
 
-        // Thread to warn user if it takes too long. 
+        // Thread to warn user if it takes too long.
         thread::spawn(move || {
             thread::sleep(FILE_LOCK_WARNING_TIME);
             timout_tx.send(ChannelMessage::FiveSecondWarning)
         });
 
-        // Use an infinite loop to make sure we recieve all the messages from the senders. 
+        // Use an infinite loop to make sure we recieve all the messages from the senders.
         loop {
             match rx.recv() {
-                // Emit the diagnostic for the 5-second warning. 
+                // Emit the diagnostic for the 5-second warning.
                 Ok(ChannelMessage::FiveSecondWarning) => {
-                    // Get a lock on the standard out so that we don't get interrupted here. 
+                    // Get a lock on the standard out so that we don't get interrupted here.
                     let stdout = StandardStream::stdout(ColorChoice::Auto);
                     let mut stdout = stdout.lock();
                     // Make the diagnostic to show to the user.
-                    let message = format!("Getting a file lock on {} has taken more than {} seconds.", path.display(), FILE_LOCK_WARNING_TIME.as_secs());
-                    let diagnostic: Diagnostic<<FileMap<'src> as Files<'src>>::FileId> = Diagnostic::note().with_message(message);
+                    let message = format!(
+                        "Getting a file lock on {} has taken more than {} seconds.",
+                        path.display(),
+                        FILE_LOCK_WARNING_TIME.as_secs()
+                    );
+
+                    let diagnostic: Diagnostic<<FileMap<'src> as Files<'src>>::FileId> =
+                        Diagnostic::note().with_message(message);
+
                     // Emit the diagnostic to the user.
                     codespan_reporting::term::emit(&mut stdout, &Config::default(), self, &diagnostic)
                         // Convert from the potential codespan error to a normal IO err. 
@@ -148,53 +167,72 @@ impl<'src> FileMap<'src> {
                         })?
                 }
 
-                // Handle any io errors locking the file by returning them. 
+                // Handle any io errors locking the file by returning them.
                 Ok(ChannelMessage::LockingError(io_err)) => return Err(io_err),
 
-                // Handle success by finishing adding the file to the FileMap. 
+                // Handle success by finishing adding the file to the FileMap.
                 Ok(ChannelMessage::FileLocked(file)) => {
-                    // The file is now locked, we can memmory map it and add it ro the vec. 
-                    // SAFETY: The file should be locked at this point so undefined behaviour from concurrent 
-                    // modification is avoided. 
-                    let mem_map: Mmap = unsafe { 
+                    // The file is now locked, we can memmory map it and add it ro the vec.
+                    // SAFETY: The file should be locked at this point so undefined behaviour from concurrent
+                    // modification is avoided.
+                    let mem_map: Mmap = unsafe {
                         Mmap::map(&file)
-                            // Make sure we unlock the file if there's an issue memory mapping it. 
+                            // Make sure we unlock the file if there's an issue memory mapping it.
                             .map_err(|err| {
-                                file.unlock().map_err(|err| eprintln!("Error unlocking file: {:?}", err)).ok();
+                                file.unlock()
+                                    .map_err(|err| eprintln!("Error unlocking file: {:?}", err))
+                                    .ok();
                                 err
                             })
                     }?;
 
-                    // Double check that the file is valid utf-8. If not, return an IO error. 
+                    // Double check that the file is valid utf-8. If not, return an IO error.
                     let raw_data: &[u8] = mem_map.as_ref();
                     let as_str: Result<&str, std::str::Utf8Error> = std::str::from_utf8(raw_data);
                     if as_str.is_err() {
-                        // The file is not valid for us so we should unlock it and return an error. 
-                        file.unlock().map_err(|err| eprintln!("Error unlocking file: {:?}", err)).ok();
-                        return Err(io::Error::new(io::ErrorKind::InvalidData, as_str.unwrap_err()));
+                        // The file is not valid for us so we should unlock it and return an error.
+                        file.unlock()
+                            .map_err(|err| eprintln!("Error unlocking file: {:?}", err))
+                            .ok();
+                        
+                        return Err(io::Error::new(
+                            io::ErrorKind::InvalidData,
+                            as_str.unwrap_err(),
+                        ));
                     }
 
-                    // The file's contents are valid utf-8, add them to the file map. 
-                    return Ok(self.add(FileName::Real(path), ImmutableString::LockedFile { locked_file: file, mem_map }));
+                    // The file's contents are valid utf-8, add them to the file map.
+                    return Ok(self.add(
+                        FileName::Real(path),
+                        ImmutableString::LockedFile {
+                            locked_file: file,
+                            mem_map,
+                        },
+                    ));
                 }
 
-                Err(_) => unreachable!("The reciever should never reach a state where both senders are closed."),
-            }    
+                Err(_) => unreachable!(
+                    "The reciever should never reach a state where both senders are closed."
+                ),
+            }
         }
     }
 
-    /// Find the file ID of a given [Fragment] using the fragment's internal pointer. 
-    pub fn find_fragment(&self, fragment: &Fragment<'src>) -> Option<<Self as Files<'src>>::FileId> {
-        // Iterate on file IDs. 
+    /// Find the file ID of a given [Fragment] using the fragment's internal pointer.
+    pub fn find_fragment(
+        &self,
+        fragment: &Fragment<'src>,
+    ) -> Option<<Self as Files<'src>>::FileId> {
+        // Iterate on file IDs.
         for file_id in 0..self.inner.len() {
-            // Use expect because all of these file IDs should be fine. 
+            // Use expect because all of these file IDs should be fine.
             let source: &str = self.source(file_id).expect("All file IDs here are valid");
             if (Fragment { inner: source }).contains(fragment) {
                 return Some(file_id);
             }
         }
 
-        // If there was no file containing the given fragment, return none. 
+        // If there was no file containing the given fragment, return none.
         None
     }
 }
@@ -205,22 +243,23 @@ impl<'src> Drop for ImmutableString<'src> {
         match self {
             // Unlock locked files.
             ImmutableString::LockedFile { locked_file, .. } => {
-                locked_file.unlock()
-                    // Log the error if there is one, 
+                locked_file
+                    .unlock()
+                    // Log the error if there is one,
                     .map_err(|io_err: io::Error| eprintln!("{}", io_err))
                     // Discard value of result
                     .ok();
             }
 
-            // All other types drop trivially.  
+            // All other types drop trivially.
             ImmutableString::Owned(_) | ImmutableString::Reference(_) => {}
         }
     }
 }
 
-/// The implementation here is basically identical to the one for [codespan_reporting::files::SimpleFiles]. 
+/// The implementation here is basically identical to the one for [codespan_reporting::files::SimpleFiles].
 impl<'src> Files<'src> for FileMap<'src> {
-    /// File IDs here are just indices into [FileMap]'s internal [Vec]. 
+    /// File IDs here are just indices into [FileMap]'s internal [Vec].
     type FileId = usize;
 
     type Name = FileName;
@@ -231,15 +270,26 @@ impl<'src> Files<'src> for FileMap<'src> {
         Ok(self.get(id)?.name().clone())
     }
 
-    fn source(&'src self, id: Self::FileId) -> Result<Self::Source, codespan_reporting::files::Error> {
+    fn source(
+        &'src self,
+        id: Self::FileId,
+    ) -> Result<Self::Source, codespan_reporting::files::Error> {
         Ok(self.get(id)?.source().as_ref())
     }
 
-    fn line_index(&self, id: Self::FileId, byte_index: usize) -> Result<usize, codespan_reporting::files::Error> {
+    fn line_index(
+        &self,
+        id: Self::FileId,
+        byte_index: usize,
+    ) -> Result<usize, codespan_reporting::files::Error> {
         self.get(id)?.line_index((), byte_index)
     }
 
-    fn line_range(&self, id: Self::FileId, line_index: usize) -> Result<std::ops::Range<usize>, codespan_reporting::files::Error> {
+    fn line_range(
+        &self,
+        id: Self::FileId,
+        line_index: usize,
+    ) -> Result<std::ops::Range<usize>, codespan_reporting::files::Error> {
         self.get(id)?.line_range((), line_index)
     }
 }
@@ -250,9 +300,9 @@ impl<'src> AsRef<str> for ImmutableString<'src> {
             ImmutableString::Reference(str) => str,
             ImmutableString::Owned(str) => &str,
             ImmutableString::LockedFile { mem_map, .. } => {
-                // Get a direct reference to the data that is in the memory map. 
+                // Get a direct reference to the data that is in the memory map.
                 let raw_data: &[u8] = mem_map.as_ref();
-                // SAFETY: UTF-8 validity is checked when the file is added to the file map. 
+                // SAFETY: UTF-8 validity is checked when the file is added to the file map.
                 unsafe { std::str::from_utf8_unchecked(raw_data) }
             }
         }
diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
index f67b0265..62fe95a7 100644
--- a/wright/src/parser/fragment.rs
+++ b/wright/src/parser/fragment.rs
@@ -2,45 +2,45 @@
 
 use std::str::Chars;
 
-/// A fragment of source code. 
+/// A fragment of source code.
 #[derive(Clone, Copy, Debug)]
 pub struct Fragment<'src> {
-    /// Fragments are represented using direct string references into the source file itself. 
-    pub inner: &'src str
+    /// Fragments are represented using direct string references into the source file itself.
+    pub inner: &'src str,
 }
 
 impl<'src> Fragment<'src> {
-    /// Get the length (in bytes) of this fragment. 
+    /// Get the length (in bytes) of this fragment.
     pub const fn len(&self) -> usize {
         self.inner.len()
     }
 
-    /// Check if the length of this fragment is zero. 
+    /// Check if the length of this fragment is zero.
     pub const fn is_empty(&self) -> bool {
         self.inner.is_empty()
     }
 
-    /// Get a pair of pointers, the first one being at the beginning of the fragment, the second one pointing 
+    /// Get a pair of pointers, the first one being at the beginning of the fragment, the second one pointing
     /// to the byte after the end of the fragment.
     const fn start_and_end(&self) -> (*const u8, *const u8) {
-        // Get the pointer to the start of the fragment. 
+        // Get the pointer to the start of the fragment.
         let start: *const u8 = self.inner.as_ptr();
-        // Get a pointer just past the end of the string. 
-        // SAFETY: the resulting pointer is guarunteed to point at one byte past the end of the string. 
+        // Get a pointer just past the end of the string.
+        // SAFETY: the resulting pointer is guarunteed to point at one byte past the end of the string.
         (start, unsafe { start.add(self.len()) })
     }
 
-    /// Return true if this fragment overlaps at all with the other (either one contains the start of the other, 
+    /// Return true if this fragment overlaps at all with the other (either one contains the start of the other,
     /// by pointer).
     pub fn overlaps(&self, other: &Self) -> bool {
         // Get start and end pointers for both fragments.
         let (start, end) = self.start_and_end();
         let (other_start, other_end) = other.start_and_end();
-        // Check if this fragment contains either end of the other fragment. 
+        // Check if this fragment contains either end of the other fragment.
         (start <= other_start && other_start < end) || (other_start <= start && start < other_end)
     }
 
-    /// Return true if this fragment entirely contains another fragment using pointers. 
+    /// Return true if this fragment entirely contains another fragment using pointers.
     pub fn contains(&self, other: &Self) -> bool {
         // Get start and end pointers for both fragments.
         let (start, end) = self.start_and_end();
@@ -50,33 +50,43 @@ impl<'src> Fragment<'src> {
     }
 
     /// Split this fragment into two sub fragments, with the first one being `bytes` long and the second containing the
-    /// rest of this fragment. 
-    /// 
-    /// Panics if the byte index is not in the fragment, or if it's on a char boundary. 
+    /// rest of this fragment.
+    ///
+    /// Panics if the byte index is not in the fragment, or if it's on a char boundary.
     pub fn split(&self, bytes: usize) -> (Self, Self) {
-        // Use str's split_at. 
+        // Use str's split_at.
         let (left, right) = self.inner.split_at(bytes);
 
         (Self { inner: left }, Self { inner: right })
     }
 
-    /// Get an iterator over the characters in this fragment. 
+    /// Get an iterator over the characters in this fragment.
     pub fn chars(&self) -> Chars<'src> {
         self.inner.chars()
     }
 }
 
-
 #[cfg(test)]
 mod tests {
     use crate::parser::fragment::Fragment;
 
     #[test]
     fn test_overlap() {
-        let a = Fragment { inner: "Test string" };
-        let b = Fragment { inner: &a.inner[3..] };
-        let c = Fragment { inner: &a.inner[..a.len()-3] };
-        let d = Fragment { inner: "other string" };
+        let a = Fragment {
+            inner: "Test string",
+        };
+
+        let b = Fragment {
+            inner: &a.inner[3..],
+        };
+        
+        let c = Fragment {
+            inner: &a.inner[..a.len() - 3],
+        };
+        
+        let d = Fragment {
+            inner: "other string",
+        };
 
         assert!(a.overlaps(&b));
         assert!(b.overlaps(&c));
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 08676d39..38806906 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -1,14 +1,14 @@
 //! First pass lexer that gets run on the source code and returns a series of tokens with their associated [Fragment]s.
-//! 
-//! Note that this will strip out comments and whitespace, returning only fragments that match one of the paterns 
-//! defined for tokens. 
+//!
+//! Note that this will strip out comments and whitespace, returning only fragments that match one of the paterns
+//! defined for tokens.
 
+use super::fragment::Fragment;
 use std::ptr;
 use std::str::Chars;
 use unicode_ident::{is_xid_continue, is_xid_start};
-use super::fragment::Fragment;
 
-/// Constant table of single character tokens and the characters that match them. 
+/// Constant table of single character tokens and the characters that match them.
 pub const SINGLE_CHAR_TOKENS: &[(char, TokenTy)] = &[
     ('(', TokenTy::LeftParen),
     (')', TokenTy::RightParen),
@@ -25,7 +25,7 @@ pub const SINGLE_CHAR_TOKENS: &[(char, TokenTy)] = &[
 ];
 
 /// Tokens that can be either a single character or upgraded with an
-/// equals sign. 
+/// equals sign.
 pub const POSSIBLE_EQ_UPGRADE_TOKENS: &[(char, TokenTy, TokenTy)] = &[
     ('!', TokenTy::Bang, TokenTy::BangEq),
     ('%', TokenTy::Mod, TokenTy::ModEq),
@@ -35,7 +35,7 @@ pub const POSSIBLE_EQ_UPGRADE_TOKENS: &[(char, TokenTy, TokenTy)] = &[
     ('/', TokenTy::Div, TokenTy::DivEq),
 ];
 
-/// Characters that can produce different tokens when followed by an equals sign or themselves. 
+/// Characters that can produce different tokens when followed by an equals sign or themselves.
 pub const POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS: &[(char, TokenTy, TokenTy, TokenTy)] = &[
     ('&', TokenTy::And, TokenTy::AndEq, TokenTy::AndAnd),
     ('|', TokenTy::Or, TokenTy::OrEq, TokenTy::OrOr),
@@ -44,40 +44,46 @@ pub const POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS: &[(char, TokenTy, TokenTy, Toke
     (':', TokenTy::Colon, TokenTy::ColonEq, TokenTy::ColonColon),
 ];
 
-/// Characters that can produce different tokens when followed by an equals sign or 
+/// Characters that can produce different tokens when followed by an equals sign or
 /// a `>` for arrows.
 pub const POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS: &[(char, TokenTy, TokenTy, TokenTy)] = &[
     ('-', TokenTy::Minus, TokenTy::MinusEq, TokenTy::SingleArrow),
     ('=', TokenTy::Eq, TokenTy::EqEq, TokenTy::DoubleArrow),
 ];
 
-/// The number of rows of the generated prefix table. 
+/// The number of rows of the generated prefix table.
 pub const PREFIX_TABLE_ROWS: usize = {
-    SINGLE_CHAR_TOKENS.len() 
-    + 2 * POSSIBLE_EQ_UPGRADE_TOKENS.len()
-    + 3 * POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS.len()
-    + 3 * POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS.len()
+    SINGLE_CHAR_TOKENS.len()
+        + 2 * POSSIBLE_EQ_UPGRADE_TOKENS.len()
+        + 3 * POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS.len()
+        + 3 * POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS.len()
 };
 
-/// A relationship between a prefix and the token that should be generated when that prefix matches. 
+/// A relationship between a prefix and the token that should be generated when that prefix matches.
 #[derive(Copy, Clone, Debug)]
 pub struct PrefixToToken {
-    /// An array of two chars. In single char tokens, the second one should be a null character (`'\0'`). 
-    /// the char_length field will be used to slice this buffer to get the actual prefix. 
+    /// An array of two chars. In single char tokens, the second one should be a null character (`'\0'`).
+    /// the char_length field will be used to slice this buffer to get the actual prefix.
     pub char_buffer: [char; 2],
-    /// The byte length of this prefix and all generated tokens by this prefix. 
+    /// The byte length of this prefix and all generated tokens by this prefix.
     pub byte_len: usize,
-    /// The kind of [Token] generated when this prefix matches. 
+    /// The kind of [Token] generated when this prefix matches.
     pub kind: TokenTy,
 }
 
 impl PrefixToToken {
-    /// Convenience function to construct a [`PrefixToToken`] by calculating the length of both chars 
-    /// (and ignoring the second one if it's null). 
+    /// Convenience function to construct a [`PrefixToToken`] by calculating the length of both chars
+    /// (and ignoring the second one if it's null).
     pub const fn new(chars: [char; 2], kind: TokenTy) -> Self {
         PrefixToToken {
             char_buffer: chars,
-            byte_len: if chars[1] == '\0' { chars[0].len_utf8() } else { chars[0].len_utf8() + chars[1].len_utf8() },
+
+            byte_len: if chars[1] == '\0' {
+                chars[0].len_utf8()
+            } else {
+                chars[0].len_utf8() + chars[1].len_utf8()
+            },
+            
             kind,
         }
     }
@@ -156,21 +162,20 @@ pub const PREFIX_TABLE: [PrefixToToken; PREFIX_TABLE_ROWS] = {
     table
 };
 
-
-/// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language. 
+/// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language.
 #[derive(Debug)]
 pub struct Lexer<'src> {
-    /// The remaining source code that has not been processed and returned as a token from the iterator yet. 
+    /// The remaining source code that has not been processed and returned as a token from the iterator yet.
     pub remaining: Fragment<'src>,
 }
 
-/// A token in wright source code. 
+/// A token in wright source code.
 #[derive(Debug)]
 pub struct Token<'src> {
-    /// What type of token this is. 
+    /// What type of token this is.
     pub variant: TokenTy,
-    /// The matching fragment of source code -- this contains the location and length data for the token. 
-    pub fragment: Fragment<'src>
+    /// The matching fragment of source code -- this contains the location and length data for the token.
+    pub fragment: Fragment<'src>,
 }
 
 /// The different types of tokens in wright source.
@@ -242,18 +247,20 @@ pub enum TokenTy {
 }
 
 impl<'src> Lexer<'src> {
-    /// Get the number of bytes remaining that we need to transform into tokens. 
+    /// Get the number of bytes remaining that we need to transform into tokens.
     pub const fn bytes_remaining(&self) -> usize {
         self.remaining.len()
     }
 
-    /// Construct a new lexer over a given reference to a source string. 
+    /// Construct a new lexer over a given reference to a source string.
     pub const fn new(source: &'src str) -> Self {
-        Lexer { remaining: Fragment { inner: source } }
+        Lexer {
+            remaining: Fragment { inner: source },
+        }
     }
 
     /// Try to match a fragment recognized to be an identifier or keyword to
-    /// a keyword or return [TokenTy::Identifier]. 
+    /// a keyword or return [TokenTy::Identifier].
     fn identifier_or_keyword(fragment: Fragment<'src>) -> TokenTy {
         use TokenTy::*;
 
@@ -269,11 +276,11 @@ impl<'src> Lexer<'src> {
             "trait" => KwTrait,
             "const" => KwConst,
             "where" => KwWhere,
-            
+
             "use" => KwUse,
             "as" => KwAs,
             "mod" => KwMod,
-            
+
             "if" => KwIf,
             "else" => KwElse,
 
@@ -287,30 +294,34 @@ impl<'src> Lexer<'src> {
 
             "_" => Underscore,
 
-            _ => Identifier
+            _ => Identifier,
         }
     }
 
     /// Make a token by splitting a given number of bytes off of the `self.remaining` fragment
-    /// and labeling them with the given kind. 
+    /// and labeling them with the given kind.
     fn split_token(&mut self, bytes: usize, kind: TokenTy) -> Token<'src> {
         let (token_fragment, new_remaining_fragment) = self.remaining.split(bytes);
         self.remaining = new_remaining_fragment;
-        Token { variant: kind, fragment: token_fragment }
+        
+        Token {
+            variant: kind,
+            fragment: token_fragment,
+        }
     }
 
     /// Get the next token from the lexer.
     pub fn next_token(&mut self) -> Option<Token<'src>> {
-        // If the remaining input is empty, there is no token. 
+        // If the remaining input is empty, there is no token.
         if self.remaining.is_empty() {
             return None;
         }
 
-        // Use blocks heavily in this function as we don't want to re-use iterators or variables 
-        // after we check them in most cases. 
+        // Use blocks heavily in this function as we don't want to re-use iterators or variables
+        // after we check them in most cases.
 
-        // If there is whitespace at the start of the remaining fragment, strip it and re-run this 
-        // function to get the next token. 
+        // If there is whitespace at the start of the remaining fragment, strip it and re-run this
+        // function to get the next token.
         {
             let without_whitespace: &str = self.remaining.inner.trim_start();
 
@@ -319,69 +330,72 @@ impl<'src> Lexer<'src> {
                 return self.next_token();
             }
         }
-        
+
         // To attempt to match a token from the prefix table, make a char iterator
         // and get two chars from it to test equality. None of the tokens start with a
         // null character so use that as a single of an unavailable char.
         {
             let mut char_iter: Chars = self.remaining.chars();
             let char_array: [char; 2] = [
-                // Unchecked unwrap here since we know there's at least one char. 
-                unsafe { char_iter.next().unwrap_unchecked() }, 
-                char_iter.next().unwrap_or('\0')
+                // Unchecked unwrap here since we know there's at least one char.
+                unsafe { char_iter.next().unwrap_unchecked() },
+                char_iter.next().unwrap_or('\0'),
             ];
 
             // Next iterate through the prefix table to try to get any tokens that are covered there.
             for prefix_meta in PREFIX_TABLE.iter() {
                 // If it's a single char comparison, only compare the first chars.
-                if prefix_meta.char_buffer[1] == '\0' && prefix_meta.char_buffer[0] == char_array[0] {
+                if prefix_meta.char_buffer[1] == '\0' && prefix_meta.char_buffer[0] == char_array[0]
+                {
                     return Some(self.split_token(prefix_meta.byte_len, prefix_meta.kind));
                 }
 
-                // Otherwise compare the whole slices. 
+                // Otherwise compare the whole slices.
                 if &prefix_meta.char_buffer == &char_array {
                     return Some(self.split_token(prefix_meta.byte_len, prefix_meta.kind));
                 }
             }
         }
 
-        // Next attempt to match a keyword or identifier. 
+        // Next attempt to match a keyword or identifier.
         {
             let mut chars: Chars = self.remaining.chars();
 
-            // The unsafe is fine here -- we've established that this lexer has bytes remaining. 
+            // The unsafe is fine here -- we've established that this lexer has bytes remaining.
             let next: char = unsafe { chars.next().unwrap_unchecked() };
 
             if is_xid_start(next) || next == '_' {
                 let mut bytes_consumed: usize = next.len_utf8();
 
-                // Take remaining chars and add to sum. 
+                // Take remaining chars and add to sum.
                 bytes_consumed += chars
                     .take_while(|c| is_xid_continue(*c))
                     .map(char::len_utf8)
                     .sum::<usize>();
 
-                // Split the number of bytes we consumed. 
+                // Split the number of bytes we consumed.
                 let (ident_frag, new_remaining) = self.remaining.split(bytes_consumed);
-                // Get the token kind to produce for this fragment. 
+                // Get the token kind to produce for this fragment.
                 let variant = Lexer::identifier_or_keyword(ident_frag);
-                // Update the lexers remaining fragment. 
+                // Update the lexers remaining fragment.
                 self.remaining = new_remaining;
-                // Return the identifier, keyword, or underscore. 
-                return Some(Token { variant, fragment: ident_frag });
+                // Return the identifier, keyword, or underscore.
+                return Some(Token {
+                    variant,
+                    fragment: ident_frag,
+                });
             }
         }
 
         unimplemented!()
     }
-
 }
 
 #[cfg(test)]
 mod tests {
-    use crate::parser::lexer::TokenTy;
     use super::Lexer;
     use super::PREFIX_TABLE;
+    use crate::parser::lexer::TokenTy;
 
     #[test]
     #[ignore = "this test is just used for debugging the prefix table"]
diff --git a/wright/src/repl.rs b/wright/src/repl.rs
index f0e024b5..40d16370 100644
--- a/wright/src/repl.rs
+++ b/wright/src/repl.rs
@@ -116,6 +116,6 @@ pub fn start() -> anyhow::Result<()> {
         write!(&mut output, "[{}]: << ", input_number)?;
         output.flush()?;
 
-        unimplemented!("REPL needs to be re-worked a bit."); 
+        unimplemented!("REPL needs to be re-worked a bit.");
     }
 }

From 2f18bb7398f2e5faf93e03c386eb6aef4b8495f2 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sun, 11 Feb 2024 01:01:08 -0500
Subject: [PATCH 19/60] Clippy changes

---
 wright/src/filemap.rs      | 7 ++++---
 wright/src/parser/lexer.rs | 4 ++--
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/wright/src/filemap.rs b/wright/src/filemap.rs
index eb9d8984..d9718b8e 100644
--- a/wright/src/filemap.rs
+++ b/wright/src/filemap.rs
@@ -189,7 +189,8 @@ impl<'src> FileMap<'src> {
                     // Double check that the file is valid utf-8. If not, return an IO error.
                     let raw_data: &[u8] = mem_map.as_ref();
                     let as_str: Result<&str, std::str::Utf8Error> = std::str::from_utf8(raw_data);
-                    if as_str.is_err() {
+
+                    if let Err(utf8_err) = as_str {
                         // The file is not valid for us so we should unlock it and return an error.
                         file.unlock()
                             .map_err(|err| eprintln!("Error unlocking file: {:?}", err))
@@ -197,7 +198,7 @@ impl<'src> FileMap<'src> {
                         
                         return Err(io::Error::new(
                             io::ErrorKind::InvalidData,
-                            as_str.unwrap_err(),
+                            utf8_err,
                         ));
                     }
 
@@ -298,7 +299,7 @@ impl<'src> AsRef<str> for ImmutableString<'src> {
     fn as_ref(&self) -> &str {
         match self {
             ImmutableString::Reference(str) => str,
-            ImmutableString::Owned(str) => &str,
+            ImmutableString::Owned(str) => str,
             ImmutableString::LockedFile { mem_map, .. } => {
                 // Get a direct reference to the data that is in the memory map.
                 let raw_data: &[u8] = mem_map.as_ref();
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 38806906..b1294d17 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -303,7 +303,7 @@ impl<'src> Lexer<'src> {
     fn split_token(&mut self, bytes: usize, kind: TokenTy) -> Token<'src> {
         let (token_fragment, new_remaining_fragment) = self.remaining.split(bytes);
         self.remaining = new_remaining_fragment;
-        
+
         Token {
             variant: kind,
             fragment: token_fragment,
@@ -351,7 +351,7 @@ impl<'src> Lexer<'src> {
                 }
 
                 // Otherwise compare the whole slices.
-                if &prefix_meta.char_buffer == &char_array {
+                if prefix_meta.char_buffer == char_array {
                     return Some(self.split_token(prefix_meta.byte_len, prefix_meta.kind));
                 }
             }

From 2acce9def458723a465619da6d6c8df0b4aad6bc Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sun, 11 Feb 2024 01:01:32 -0500
Subject: [PATCH 20/60] cargo fmt

---
 wright/src/filemap.rs         | 7 ++-----
 wright/src/parser/fragment.rs | 4 ++--
 wright/src/parser/lexer.rs    | 2 +-
 3 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/wright/src/filemap.rs b/wright/src/filemap.rs
index d9718b8e..91d10985 100644
--- a/wright/src/filemap.rs
+++ b/wright/src/filemap.rs
@@ -195,11 +195,8 @@ impl<'src> FileMap<'src> {
                         file.unlock()
                             .map_err(|err| eprintln!("Error unlocking file: {:?}", err))
                             .ok();
-                        
-                        return Err(io::Error::new(
-                            io::ErrorKind::InvalidData,
-                            utf8_err,
-                        ));
+
+                        return Err(io::Error::new(io::ErrorKind::InvalidData, utf8_err));
                     }
 
                     // The file's contents are valid utf-8, add them to the file map.
diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
index 62fe95a7..6f159ec4 100644
--- a/wright/src/parser/fragment.rs
+++ b/wright/src/parser/fragment.rs
@@ -79,11 +79,11 @@ mod tests {
         let b = Fragment {
             inner: &a.inner[3..],
         };
-        
+
         let c = Fragment {
             inner: &a.inner[..a.len() - 3],
         };
-        
+
         let d = Fragment {
             inner: "other string",
         };
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index b1294d17..b6b917bd 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -83,7 +83,7 @@ impl PrefixToToken {
             } else {
                 chars[0].len_utf8() + chars[1].len_utf8()
             },
-            
+
             kind,
         }
     }

From 7af8d2155ad1ce9cb1bb986b280c8f6e7fe52e69 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Tue, 13 Feb 2024 01:55:51 -0500
Subject: [PATCH 21/60] Integer Literal tokens

---
 wright/Cargo.toml          |  6 ++---
 wright/src/parser/lexer.rs | 52 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/wright/Cargo.toml b/wright/Cargo.toml
index 8957aebb..df699765 100644
--- a/wright/Cargo.toml
+++ b/wright/Cargo.toml
@@ -87,9 +87,9 @@ features = ["strict-versioning", "force-static"]
 version = "0.3"
 features = ["llvm17-0"]
 
-# Fast parsing for integers and floats from source code. 
-[dependencies.lexical-core]
-version = "0.8"
+# # Fast parsing for integers and floats from source code. 
+# [dependencies.lexical-core]
+# version = "0.8"
 
 # TEST DEPENDENCIES
 
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index b6b917bd..f9728314 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -4,8 +4,8 @@
 //! defined for tokens.
 
 use super::fragment::Fragment;
-use std::ptr;
 use std::str::Chars;
+use std::{iter::Peekable, ptr};
 use unicode_ident::{is_xid_continue, is_xid_start};
 
 /// Constant table of single character tokens and the characters that match them.
@@ -242,6 +242,8 @@ pub enum TokenTy {
     KwLoop,
     KwWhere,
 
+    IntegerLiteral,
+
     /// Unknown character in lexer fragment. 
     Unknown
 }
@@ -360,7 +362,6 @@ impl<'src> Lexer<'src> {
         // Next attempt to match a keyword or identifier.
         {
             let mut chars: Chars = self.remaining.chars();
-
             // The unsafe is fine here -- we've established that this lexer has bytes remaining.
             let next: char = unsafe { chars.next().unwrap_unchecked() };
 
@@ -387,6 +388,43 @@ impl<'src> Lexer<'src> {
             }
         }
 
+        // Next attempt to parse a numerical literal.
+        {
+            let mut chars: Peekable<Chars> = self.remaining.chars().peekable();
+            // The unsafe is fine here -- we've established that this lexer has bytes remaining.
+            let next: char = unsafe { chars.next().unwrap_unchecked() };
+
+            if next.is_ascii_digit() {
+                // Accumulate the number of bytes consumed in the numeric literal.
+                let mut acc: usize = 1;
+                // Track the radix
+                let mut radix: u32 = 10;
+
+                // Change the radix if necessary
+                if next == '0' {
+                    if let Some(prefix) = chars.next_if(|x| ['x', 'o', 'b', 'X', 'B'].contains(x)) {
+                        // All the possible prefix chars are 1 byte ascii characters.
+                        acc += 1;
+
+                        radix = match prefix {
+                            'x' | 'X' => 16,
+                            'b' | 'B' => 2,
+                            'o' => 8,
+                            _ => unreachable!("the prefix byte is checked above"),
+                        };
+                    }
+                }
+
+                // Add the rest of the integer literal.
+                acc += chars
+                    .take_while(|c| c.is_digit(radix) || *c == '_')
+                    .map(char::len_utf8)
+                    .sum::<usize>();
+
+                return Some(self.split_token(acc, TokenTy::IntegerLiteral));
+            }
+        }
+
         unimplemented!()
     }
 }
@@ -434,4 +472,14 @@ mod tests {
         assert_eq!(lexer.next_token().unwrap().variant, TokenTy::KwConst);
         assert_eq!(lexer.next_token().unwrap().variant, TokenTy::Identifier);
     }
+
+    #[test]
+    fn intger_literal() {
+        let mut lexer = Lexer::new("123_456_789.");
+
+        let token = lexer.next_token().unwrap();
+
+        assert_eq!(token.fragment.inner, "123_456_789");
+        assert_eq!(token.variant, TokenTy::IntegerLiteral);
+    }
 }

From a345cb7bc9c898bc6529a7af90e402598b1dc503 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sun, 18 Feb 2024 02:30:28 -0500
Subject: [PATCH 22/60] Stub AST, add token debugging, refactor FileIds

---
 wright/src/bin/wright.rs   | 22 +++++++++++++++++++---
 wright/src/filemap.rs      | 29 +++++++++--------------------
 wright/src/parser.rs       |  1 +
 wright/src/parser/ast.rs   |  3 +++
 wright/src/parser/lexer.rs | 29 +++++++++++++++++++++++++++--
 5 files changed, 59 insertions(+), 25 deletions(-)
 create mode 100644 wright/src/parser/ast.rs

diff --git a/wright/src/bin/wright.rs b/wright/src/bin/wright.rs
index 2e589622..8ed5c89b 100644
--- a/wright/src/bin/wright.rs
+++ b/wright/src/bin/wright.rs
@@ -2,12 +2,13 @@
 
 use anyhow::Result;
 use clap::{Parser, Subcommand};
+use codespan_reporting::files::Files;
 use std::path::PathBuf;
-use wright::repl;
+use wright::{filemap::{FileId, FileMap}, parser::lexer::{Lexer, Token}, repl};
 
 /// The wright cli.
 #[derive(Parser, Debug)]
-#[command(author, version, about, long_about = None)]
+#[command(author, version, about, long_about = None, arg_required_else_help = true)]
 struct Cli {
     /// The subcommand passed to the wright cli.
     #[command(subcommand)]
@@ -49,6 +50,21 @@ fn main() -> Result<()> {
         // Start an interactive repl.
         Some(Commands::Repl) => repl::start(),
 
-        _ => unimplemented!(),
+        // Print all the tokens for a given file. 
+        Some(Commands::Debug { command: DebugCommands::Tokens { file, pretty: false } }) => {
+            let mut file_map: FileMap = FileMap::new();
+            // Add the given file to the file map. 
+            let file_id: FileId = file_map.add_file(file)?;
+            // Make a lexer over the entirety of the given file. 
+            // Use unwrap here, since we know we just added the file. 
+            let lexer: Lexer = Lexer::new(file_map.source(file_id).unwrap());
+            // Get all the tokens from the lexer and print them each. 
+            lexer.for_each(|token: Token| println!("{token:?}"));
+            // Return ok.
+            Ok(())
+        },
+
+
+        _ => unimplemented!()
     }
 }
diff --git a/wright/src/filemap.rs b/wright/src/filemap.rs
index 91d10985..d26d100e 100644
--- a/wright/src/filemap.rs
+++ b/wright/src/filemap.rs
@@ -65,6 +65,9 @@ pub struct FileMap<'src> {
     inner: Vec<SimpleFile<FileName, ImmutableString<'src>>>,
 }
 
+/// File Identifier used to refer to files. 
+pub type FileId = <FileMap<'static> as Files<'static>>::FileId;
+
 impl<'src> FileMap<'src> {
     /// Construct a new empty [FileMap].
     pub const fn new() -> Self {
@@ -72,20 +75,13 @@ impl<'src> FileMap<'src> {
     }
 
     /// Get a reference to a file from the internal [Vec] or return a [`CodespanError::FileMissing`] error.
-    fn get(
-        &self,
-        file_id: <Self as Files<'src>>::FileId,
-    ) -> CodespanResult<&SimpleFile<FileName, ImmutableString<'src>>> {
+    fn get(&self, file_id: FileId) -> CodespanResult<&SimpleFile<FileName, ImmutableString<'src>>> {
         self.inner.get(file_id).ok_or(CodespanError::FileMissing)
     }
 
     /// Internal function to add a file to the vec. Public facing functions will need to do some conversion
     /// and then call this.
-    fn add(
-        &mut self,
-        name: FileName,
-        source: ImmutableString<'src>,
-    ) -> <Self as Files<'src>>::FileId {
+    fn add(&mut self, name: FileName, source: ImmutableString<'src>) -> FileId {
         // The file id is just the next index in the vec.
         let file_id: usize = self.inner.len();
         self.inner.push(SimpleFile::new(name, source));
@@ -93,23 +89,19 @@ impl<'src> FileMap<'src> {
     }
 
     /// Add a file (in the form of an owned string) to the file map.
-    pub fn add_string(&mut self, name: FileName, source: String) -> <Self as Files<'src>>::FileId {
+    pub fn add_string(&mut self, name: FileName, source: String) -> FileId {
         self.add(name, ImmutableString::Owned(source.into_boxed_str()))
     }
 
     /// Add a file (in the form of a string reference) to the file map.
-    pub fn add_str_ref(
-        &mut self,
-        name: FileName,
-        source: &'src str,
-    ) -> <Self as Files<'src>>::FileId {
+    pub fn add_str_ref(&mut self, name: FileName, source: &'src str) -> FileId {
         self.add(name, ImmutableString::Reference(source))
     }
 
     /// Add a file from the file system. This file will be
     /// opened with read permissions, locked, memory mapped,
     /// and then added to the file map. The file name in the memory map will be the [PathBuf] passed to this function.
-    pub fn add_file(&mut self, path: PathBuf) -> io::Result<<Self as Files<'src>>::FileId> {
+    pub fn add_file(&mut self, path: PathBuf) -> io::Result<FileId> {
         // Make a one-off enum here to use for channel messages.
         enum ChannelMessage {
             /// The file was successfully locked.
@@ -217,10 +209,7 @@ impl<'src> FileMap<'src> {
     }
 
     /// Find the file ID of a given [Fragment] using the fragment's internal pointer.
-    pub fn find_fragment(
-        &self,
-        fragment: &Fragment<'src>,
-    ) -> Option<<Self as Files<'src>>::FileId> {
+    pub fn find_fragment(&self, fragment: &Fragment<'src>) -> Option<FileId> {
         // Iterate on file IDs.
         for file_id in 0..self.inner.len() {
             // Use expect because all of these file IDs should be fine.
diff --git a/wright/src/parser.rs b/wright/src/parser.rs
index a1c20b2e..142bca6b 100644
--- a/wright/src/parser.rs
+++ b/wright/src/parser.rs
@@ -7,3 +7,4 @@
 
 pub mod fragment;
 pub mod lexer;
+pub mod ast;
diff --git a/wright/src/parser/ast.rs b/wright/src/parser/ast.rs
new file mode 100644
index 00000000..ca1652f8
--- /dev/null
+++ b/wright/src/parser/ast.rs
@@ -0,0 +1,3 @@
+//! Abstract syntax tree representation for Wright source code. 
+
+
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index f9728314..7c14b9e8 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -4,9 +4,11 @@
 //! defined for tokens.
 
 use super::fragment::Fragment;
+use std::iter::FusedIterator;
 use std::str::Chars;
 use std::{iter::Peekable, ptr};
 use unicode_ident::{is_xid_continue, is_xid_start};
+use derive_more::Display;
 
 /// Constant table of single character tokens and the characters that match them.
 pub const SINGLE_CHAR_TOKENS: &[(char, TokenTy)] = &[
@@ -170,7 +172,8 @@ pub struct Lexer<'src> {
 }
 
 /// A token in wright source code.
-#[derive(Debug)]
+#[derive(Debug, Display)]
+#[display(fmt = "\"{}\" ({:?})", "fragment.inner", variant)]
 pub struct Token<'src> {
     /// What type of token this is.
     pub variant: TokenTy,
@@ -227,6 +230,7 @@ pub enum TokenTy {
     KwRepr,
     KwImpl,
     KwConstraint,
+    KwReferences,
     KwTrait,
     KwUse,
     KwAs,
@@ -275,6 +279,7 @@ impl<'src> Lexer<'src> {
             "repr" => KwRepr,
             "impl" => KwImpl,
             "constraint" => KwConstraint,
+            "references" => KwReferences,
             "trait" => KwTrait,
             "const" => KwConst,
             "where" => KwWhere,
@@ -425,10 +430,30 @@ impl<'src> Lexer<'src> {
             }
         }
 
-        unimplemented!()
+        // If we haven't matched at this point, produce a token marked as "Unknown".
+        // The unsafe is fine -- we know from above that there are remaining characters. 
+        let unknown_char = unsafe { self.remaining.chars().next().unwrap_unchecked() };
+        return Some(self.split_token(unknown_char.len_utf8(), TokenTy::Unknown));
+    }
+}
+
+/// Lexers can be considered token iterators. 
+impl<'src> Iterator for Lexer<'src> {
+    type Item = Token<'src>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        self.next_token()
+    }
+
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        // Lexers cannot return multiple tokens for a single byte. 
+        (0, Some(self.bytes_remaining()))
     }
 }
 
+// Lexers are fused -- they cannot generate tokens infinitely. 
+impl<'src> FusedIterator for Lexer<'src> {}
+
 #[cfg(test)]
 mod tests {
     use super::Lexer;

From 82187d3659cc446ae9c2aef16f1f3a98e7904b02 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sun, 18 Feb 2024 02:31:10 -0500
Subject: [PATCH 23/60] cargo fmt

---
 wright/src/bin/wright.rs   | 29 +++++++++++++++++++----------
 wright/src/filemap.rs      |  2 +-
 wright/src/parser.rs       |  2 +-
 wright/src/parser/ast.rs   |  4 +---
 wright/src/parser/lexer.rs | 10 +++++-----
 5 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/wright/src/bin/wright.rs b/wright/src/bin/wright.rs
index 8ed5c89b..61ce6bc3 100644
--- a/wright/src/bin/wright.rs
+++ b/wright/src/bin/wright.rs
@@ -4,7 +4,11 @@ use anyhow::Result;
 use clap::{Parser, Subcommand};
 use codespan_reporting::files::Files;
 use std::path::PathBuf;
-use wright::{filemap::{FileId, FileMap}, parser::lexer::{Lexer, Token}, repl};
+use wright::{
+    filemap::{FileId, FileMap},
+    parser::lexer::{Lexer, Token},
+    repl,
+};
 
 /// The wright cli.
 #[derive(Parser, Debug)]
@@ -50,21 +54,26 @@ fn main() -> Result<()> {
         // Start an interactive repl.
         Some(Commands::Repl) => repl::start(),
 
-        // Print all the tokens for a given file. 
-        Some(Commands::Debug { command: DebugCommands::Tokens { file, pretty: false } }) => {
+        // Print all the tokens for a given file.
+        Some(Commands::Debug {
+            command:
+                DebugCommands::Tokens {
+                    file,
+                    pretty: false,
+                },
+        }) => {
             let mut file_map: FileMap = FileMap::new();
-            // Add the given file to the file map. 
+            // Add the given file to the file map.
             let file_id: FileId = file_map.add_file(file)?;
-            // Make a lexer over the entirety of the given file. 
-            // Use unwrap here, since we know we just added the file. 
+            // Make a lexer over the entirety of the given file.
+            // Use unwrap here, since we know we just added the file.
             let lexer: Lexer = Lexer::new(file_map.source(file_id).unwrap());
-            // Get all the tokens from the lexer and print them each. 
+            // Get all the tokens from the lexer and print them each.
             lexer.for_each(|token: Token| println!("{token:?}"));
             // Return ok.
             Ok(())
-        },
+        }
 
-
-        _ => unimplemented!()
+        _ => unimplemented!(),
     }
 }
diff --git a/wright/src/filemap.rs b/wright/src/filemap.rs
index d26d100e..5c5612f8 100644
--- a/wright/src/filemap.rs
+++ b/wright/src/filemap.rs
@@ -65,7 +65,7 @@ pub struct FileMap<'src> {
     inner: Vec<SimpleFile<FileName, ImmutableString<'src>>>,
 }
 
-/// File Identifier used to refer to files. 
+/// File Identifier used to refer to files.
 pub type FileId = <FileMap<'static> as Files<'static>>::FileId;
 
 impl<'src> FileMap<'src> {
diff --git a/wright/src/parser.rs b/wright/src/parser.rs
index 142bca6b..6f03f9b7 100644
--- a/wright/src/parser.rs
+++ b/wright/src/parser.rs
@@ -5,6 +5,6 @@
 // pub mod state;
 // pub mod util;
 
+pub mod ast;
 pub mod fragment;
 pub mod lexer;
-pub mod ast;
diff --git a/wright/src/parser/ast.rs b/wright/src/parser/ast.rs
index ca1652f8..9b7bc8c1 100644
--- a/wright/src/parser/ast.rs
+++ b/wright/src/parser/ast.rs
@@ -1,3 +1 @@
-//! Abstract syntax tree representation for Wright source code. 
-
-
+//! Abstract syntax tree representation for Wright source code.
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 7c14b9e8..07518a29 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -4,11 +4,11 @@
 //! defined for tokens.
 
 use super::fragment::Fragment;
+use derive_more::Display;
 use std::iter::FusedIterator;
 use std::str::Chars;
 use std::{iter::Peekable, ptr};
 use unicode_ident::{is_xid_continue, is_xid_start};
-use derive_more::Display;
 
 /// Constant table of single character tokens and the characters that match them.
 pub const SINGLE_CHAR_TOKENS: &[(char, TokenTy)] = &[
@@ -431,13 +431,13 @@ impl<'src> Lexer<'src> {
         }
 
         // If we haven't matched at this point, produce a token marked as "Unknown".
-        // The unsafe is fine -- we know from above that there are remaining characters. 
+        // The unsafe is fine -- we know from above that there are remaining characters.
         let unknown_char = unsafe { self.remaining.chars().next().unwrap_unchecked() };
         return Some(self.split_token(unknown_char.len_utf8(), TokenTy::Unknown));
     }
 }
 
-/// Lexers can be considered token iterators. 
+/// Lexers can be considered token iterators.
 impl<'src> Iterator for Lexer<'src> {
     type Item = Token<'src>;
 
@@ -446,12 +446,12 @@ impl<'src> Iterator for Lexer<'src> {
     }
 
     fn size_hint(&self) -> (usize, Option<usize>) {
-        // Lexers cannot return multiple tokens for a single byte. 
+        // Lexers cannot return multiple tokens for a single byte.
         (0, Some(self.bytes_remaining()))
     }
 }
 
-// Lexers are fused -- they cannot generate tokens infinitely. 
+// Lexers are fused -- they cannot generate tokens infinitely.
 impl<'src> FusedIterator for Lexer<'src> {}
 
 #[cfg(test)]

From c9a3740b8e6ae71117fc13575f9f530eb8605f56 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Mon, 19 Feb 2024 01:55:06 -0500
Subject: [PATCH 24/60] Ignore single line comments in lexer, print add
 command-line arg to print time elapsed.

---
 wright/src/bin/wright.rs   | 22 +++++++++++++++++-----
 wright/src/parser/lexer.rs | 29 +++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/wright/src/bin/wright.rs b/wright/src/bin/wright.rs
index 61ce6bc3..5a43cf81 100644
--- a/wright/src/bin/wright.rs
+++ b/wright/src/bin/wright.rs
@@ -3,7 +3,7 @@
 use anyhow::Result;
 use clap::{Parser, Subcommand};
 use codespan_reporting::files::Files;
-use std::path::PathBuf;
+use std::{path::PathBuf, time::Instant};
 use wright::{
     filemap::{FileId, FileMap},
     parser::lexer::{Lexer, Token},
@@ -17,6 +17,10 @@ struct Cli {
     /// The subcommand passed to the wright cli.
     #[command(subcommand)]
     command: Option<Commands>,
+
+    /// Output elapsed timing information at the end of the command.
+    #[arg(short, long)]
+    timed: bool,
 }
 
 /// Different sub-commands that the wright cli supports.
@@ -48,11 +52,14 @@ enum DebugCommands {
 }
 
 fn main() -> Result<()> {
-    let cli = Cli::parse();
+    // Parse the command line arguments.
+    let cli: Cli = Cli::parse();
+    // Get the start time to track duration if asked. 
+    let start: Instant = Instant::now();
 
     match cli.command {
         // Start an interactive repl.
-        Some(Commands::Repl) => repl::start(),
+        Some(Commands::Repl) => { repl::start()?; },
 
         // Print all the tokens for a given file.
         Some(Commands::Debug {
@@ -70,10 +77,15 @@ fn main() -> Result<()> {
             let lexer: Lexer = Lexer::new(file_map.source(file_id).unwrap());
             // Get all the tokens from the lexer and print them each.
             lexer.for_each(|token: Token| println!("{token:?}"));
-            // Return ok.
-            Ok(())
         }
 
         _ => unimplemented!(),
     }
+
+    // Handle timing info.
+    if cli.timed {
+        println!("\nTime elapsed: {:?}", Instant::now() - start);
+    }
+
+    Ok(())
 }
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 07518a29..b62b9c92 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -338,6 +338,35 @@ impl<'src> Lexer<'src> {
             }
         }
 
+        // Discard any single-line comment at the start of this lexer and then re-run this function if there was one. 
+        // Note that this will not detect doc comments or multi-line comments. 
+        {
+            if let Some(without_comment_prefix) = self.remaining.inner.strip_prefix("//") {
+                // If the next character is not a slash or exclamation, indicating a doc comment.
+                if !without_comment_prefix.starts_with(&['/', '!']) {
+                    // Get the number of bytes between the start of the comment and the newline, or end of file.
+                    // Do not include bytes of whitespace at or past the newline -- those are handled above.
+                    let line_bytes: usize = without_comment_prefix
+                        // Make an iterator over the lines after this `//`. 
+                        .lines()
+                        // Get only the first line. 
+                        .next()
+                        // Map to the length of the line string.
+                        .map(str::len)
+                        // If there is no line after the start of this comment we have zero bytes to read.
+                        .unwrap_or(0);
+                    
+                    // Split this number of bytes from the string and ignore them. 
+                    let (_, new_remaining) = without_comment_prefix.split_at(line_bytes);
+                    // Put the split off string in a Fragment, and consider this fragment to be the
+                    // remaining Fragment for this lexer. 
+                    self.remaining = Fragment { inner: new_remaining };
+                    // Restart this function. 
+                    return self.next_token();
+                }
+            }
+        }
+
         // To attempt to match a token from the prefix table, make a char iterator
         // and get two chars from it to test equality. None of the tokens start with a
         // null character so use that as a single of an unavailable char.

From 9476c53424aae57deb90f2ff41c5f8d42c307020 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Mon, 19 Feb 2024 01:55:36 -0500
Subject: [PATCH 25/60] clippy

---
 wright/src/parser/lexer.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index b62b9c92..f4f00070 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -343,7 +343,7 @@ impl<'src> Lexer<'src> {
         {
             if let Some(without_comment_prefix) = self.remaining.inner.strip_prefix("//") {
                 // If the next character is not a slash or exclamation, indicating a doc comment.
-                if !without_comment_prefix.starts_with(&['/', '!']) {
+                if !without_comment_prefix.starts_with(['/', '!']) {
                     // Get the number of bytes between the start of the comment and the newline, or end of file.
                     // Do not include bytes of whitespace at or past the newline -- those are handled above.
                     let line_bytes: usize = without_comment_prefix

From 3fcf26e5a8eea16b28f3d03a43e4ade85d0225e2 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Mon, 19 Feb 2024 01:56:03 -0500
Subject: [PATCH 26/60] cargo fmt

---
 wright/src/bin/wright.rs   |  6 ++++--
 wright/src/parser/lexer.rs | 21 ++++++++++++---------
 2 files changed, 16 insertions(+), 11 deletions(-)

diff --git a/wright/src/bin/wright.rs b/wright/src/bin/wright.rs
index 5a43cf81..1469488d 100644
--- a/wright/src/bin/wright.rs
+++ b/wright/src/bin/wright.rs
@@ -54,12 +54,14 @@ enum DebugCommands {
 fn main() -> Result<()> {
     // Parse the command line arguments.
     let cli: Cli = Cli::parse();
-    // Get the start time to track duration if asked. 
+    // Get the start time to track duration if asked.
     let start: Instant = Instant::now();
 
     match cli.command {
         // Start an interactive repl.
-        Some(Commands::Repl) => { repl::start()?; },
+        Some(Commands::Repl) => {
+            repl::start()?;
+        }
 
         // Print all the tokens for a given file.
         Some(Commands::Debug {
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index f4f00070..29d26080 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -338,8 +338,8 @@ impl<'src> Lexer<'src> {
             }
         }
 
-        // Discard any single-line comment at the start of this lexer and then re-run this function if there was one. 
-        // Note that this will not detect doc comments or multi-line comments. 
+        // Discard any single-line comment at the start of this lexer and then re-run this function if there was one.
+        // Note that this will not detect doc comments or multi-line comments.
         {
             if let Some(without_comment_prefix) = self.remaining.inner.strip_prefix("//") {
                 // If the next character is not a slash or exclamation, indicating a doc comment.
@@ -347,21 +347,24 @@ impl<'src> Lexer<'src> {
                     // Get the number of bytes between the start of the comment and the newline, or end of file.
                     // Do not include bytes of whitespace at or past the newline -- those are handled above.
                     let line_bytes: usize = without_comment_prefix
-                        // Make an iterator over the lines after this `//`. 
+                        // Make an iterator over the lines after this `//`.
                         .lines()
-                        // Get only the first line. 
+                        // Get only the first line.
                         .next()
                         // Map to the length of the line string.
                         .map(str::len)
                         // If there is no line after the start of this comment we have zero bytes to read.
                         .unwrap_or(0);
-                    
-                    // Split this number of bytes from the string and ignore them. 
+
+                    // Split this number of bytes from the string and ignore them.
                     let (_, new_remaining) = without_comment_prefix.split_at(line_bytes);
                     // Put the split off string in a Fragment, and consider this fragment to be the
-                    // remaining Fragment for this lexer. 
-                    self.remaining = Fragment { inner: new_remaining };
-                    // Restart this function. 
+                    // remaining Fragment for this lexer.
+                    self.remaining = Fragment {
+                        inner: new_remaining,
+                    };
+                    
+                    // Restart this function.
                     return self.next_token();
                 }
             }

From 71bab84627df829dd76ce257400ab2584a06e910 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Mon, 19 Feb 2024 01:56:17 -0500
Subject: [PATCH 27/60] cargo fmt

---
 wright/src/parser/lexer.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 29d26080..01864fde 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -363,7 +363,7 @@ impl<'src> Lexer<'src> {
                     self.remaining = Fragment {
                         inner: new_remaining,
                     };
-                    
+
                     // Restart this function.
                     return self.next_token();
                 }

From 05570fe97431e34bb31f1378b2f36b1ddf109e12 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Mon, 19 Feb 2024 02:06:09 -0500
Subject: [PATCH 28/60] Remove num dependency for now

---
 wright/Cargo.toml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/wright/Cargo.toml b/wright/Cargo.toml
index df699765..42e13fc8 100644
--- a/wright/Cargo.toml
+++ b/wright/Cargo.toml
@@ -63,9 +63,9 @@ version = "0.11.1"
 [dependencies.termcolor]
 version = "1.2.0"
 
-# Big Integers
-[dependencies.num] 
-version = "0.4"
+# # Big Integers
+# [dependencies.num] 
+# version = "0.4"
 
 # Portable (windows, mac, linux) file locking
 [dependencies.fs4] 

From 70fbeb53dd5fd5e52392455cdc581e6aad23d04d Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Mon, 19 Feb 2024 02:09:33 -0500
Subject: [PATCH 29/60] Tweak timing message

---
 wright/src/bin/wright.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wright/src/bin/wright.rs b/wright/src/bin/wright.rs
index 1469488d..a445a556 100644
--- a/wright/src/bin/wright.rs
+++ b/wright/src/bin/wright.rs
@@ -86,7 +86,7 @@ fn main() -> Result<()> {
 
     // Handle timing info.
     if cli.timed {
-        println!("\nTime elapsed: {:?}", Instant::now() - start);
+        println!("\nTotal time elapsed since parsing arguments: {:?}", Instant::now() - start);
     }
 
     Ok(())

From 48a96e2d01e95890d48a03b44356685cb3632763 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Thu, 22 Feb 2024 21:09:18 -0500
Subject: [PATCH 30/60] Start multi-line comments

---
 wright/src/parser/lexer.rs | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 01864fde..93015cf3 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -339,7 +339,7 @@ impl<'src> Lexer<'src> {
         }
 
         // Discard any single-line comment at the start of this lexer and then re-run this function if there was one.
-        // Note that this will not detect doc comments or multi-line comments.
+        // Note that this will not detect/discard doc comments or multi-line comments.
         {
             if let Some(without_comment_prefix) = self.remaining.inner.strip_prefix("//") {
                 // If the next character is not a slash or exclamation, indicating a doc comment.
@@ -370,6 +370,11 @@ impl<'src> Lexer<'src> {
             }
         }
 
+        // Discard any multi-line comments we encounter, sparing doc comments.
+        {
+            
+        }
+
         // To attempt to match a token from the prefix table, make a char iterator
         // and get two chars from it to test equality. None of the tokens start with a
         // null character so use that as a single of an unavailable char.

From 6d9001ac2d375a4176a3014f1859fd1fd02b3a88 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Thu, 22 Feb 2024 23:35:22 -0500
Subject: [PATCH 31/60] Refactor single line comment and whitespace lexing

---
 wright/src/parser/fragment.rs |  29 ++++++
 wright/src/parser/lexer.rs    | 191 ++++++++++++++++++++++++++--------
 2 files changed, 177 insertions(+), 43 deletions(-)

diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
index 6f159ec4..ddc37006 100644
--- a/wright/src/parser/fragment.rs
+++ b/wright/src/parser/fragment.rs
@@ -30,6 +30,12 @@ impl<'src> Fragment<'src> {
         (start, unsafe { start.add(self.len()) })
     }
 
+    /// Return true if both of these [`Fragment`]s point to the exact same slice of source code. 
+    pub fn ptr_eq(&self, other: &Self) -> bool {
+        // Since std::ptr::eq works for fat pointers, we can use it here.
+        std::ptr::eq(self.inner, other.inner)
+    } 
+
     /// Return true if this fragment overlaps at all with the other (either one contains the start of the other,
     /// by pointer).
     pub fn overlaps(&self, other: &Self) -> bool {
@@ -64,6 +70,29 @@ impl<'src> Fragment<'src> {
     pub fn chars(&self) -> Chars<'src> {
         self.inner.chars()
     }
+
+    /// Get the number of bytes between the beginning of [`origin`] and the beginning of [`self`]. 
+    /// 
+    /// # Panics:
+    /// - Panics if [`self`] is not a fragment within [`origin`] according to [`Fragment::contains`].
+    pub fn offset_from(&self, origin: &Self) -> usize {
+        if !origin.contains(self) {
+            panic!("This fragment must be contained in the original fragment");
+        }
+
+        // Get a pointer to the start of the original fragment.
+        let start: *const u8 = origin.inner.as_ptr();
+        // Do the same for the subslice.
+        let subslice_start: *const u8 = self.inner.as_ptr();
+
+        // SAFETY: Since the subslice is contained (by pointer) by the origin slice, both of them 
+        // necessarily satisfy the safety requirements of offset_from to be pointers to the same 
+        // allocation. 
+        // 
+        // We can always cast to a usize since this should always be a positive offset, as long 
+        // as the subslice is contained in the origin fragment. 
+        unsafe { subslice_start.offset_from(start) as usize } 
+    }
 }
 
 #[cfg(test)]
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 93015cf3..15b4b4a5 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -164,8 +164,11 @@ pub const PREFIX_TABLE: [PrefixToToken; PREFIX_TABLE_ROWS] = {
     table
 };
 
+/// The pattern that begins any single line comments (including doc comments). 
+pub const SINGLE_LINE_COMMENT_PREFIX: &str = "//";
+
 /// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language.
-#[derive(Debug)]
+#[derive(Debug, Clone, Copy)]
 pub struct Lexer<'src> {
     /// The remaining source code that has not been processed and returned as a token from the iterator yet.
     pub remaining: Fragment<'src>,
@@ -317,57 +320,152 @@ impl<'src> Lexer<'src> {
         }
     }
 
-    /// Get the next token from the lexer.
-    pub fn next_token(&mut self) -> Option<Token<'src>> {
-        // If the remaining input is empty, there is no token.
-        if self.remaining.is_empty() {
-            return None;
+    /// "Fork" this lexer, creating a new [`Lexer`] at the same position as this one that can be used for 
+    /// failable parsing. This can be compared to the original lexer it was forked from using [Fragment::offset_from]
+    /// on the underlying `remaining` fragments. 
+    fn fork(&self) -> Self {
+        *self
+    }
+
+    /// Remove and ignore any whitespace at the start of the remaining fragment. 
+    fn ignore_whitespace(&mut self) {
+        // Get a reference to the slice of the string past any whitespace at the start. 
+        let without_whitespace: &str = self.remaining.inner.trim_start();
+
+        // If the references aren't equal, update the remaining fragment. 
+        if !ptr::eq(without_whitespace, self.remaining.inner) {
+            self.remaining.inner = without_whitespace;
+        }
+    }
+
+    /// Check if a pattern matches at the start of the remaining fragment, and if so return the number of bytes.
+    fn matches(&self, pattern: &str) -> bool {
+        self.remaining.inner.starts_with(pattern)
+    }
+
+    /// If the remaining fragment starts with the given `pattern`, strip it from the remaining fragment and return 
+    /// true. Otherwise return false. 
+    fn consume(&mut self, pattern: &str) -> bool {
+        if let Some(stripped) = self.remaining.inner.strip_prefix(pattern) {
+            self.remaining.inner = stripped;
+            true
+        } else {
+            false
         }
+    }
 
-        // Use blocks heavily in this function as we don't want to re-use iterators or variables
-        // after we check them in most cases.
+    /// Remove a character from the start of the `remaining` [`Fragment`], return the character 
+    /// consumed if there was a character available to consume. 
+    fn consume_any(&mut self) -> Option<char> {
+        // Make a character iterator. 
+        let mut chars: Chars = self.remaining.chars();
+
+        if let Some(c) = chars.next() {
+            // Consumed a char, update the remaining fragment of this lexer.
+            let char_bytes: usize = c.len_utf8();
+            // SAFETY: we know that this is not on a char boundary and does not exceed the length of the slice,
+            // since we just pulled it from a `Chars` iterator. 
+            self.remaining.inner = unsafe { self.remaining.inner.get_unchecked(char_bytes..) };
+            // Return the character. 
+            Some(c)
+        } else {
+            // No characters available, return nothing. 
+            None
+        }
+    }
 
-        // If there is whitespace at the start of the remaining fragment, strip it and re-run this
-        // function to get the next token.
-        {
-            let without_whitespace: &str = self.remaining.inner.trim_start();
+    // /// Consume characters from the lexer until given pattern matches. Do not consume the pattern or
+    // /// any characters in it. This will consumed to the end of the lexer if the pattern is not found. 
+    // fn consume_until(&mut self, pattern: &str) {
+    //     while !self.remaining.is_empty() && !self.matches(pattern) {
+    //         self.consume_any();
+    //     }
+    // }
+
+    /// Attempt to read/handle a single line comment from the start of the 
+    /// remaining fragment. If there's a doc-style single line comment, return a [`Token`],
+    /// otherwise return [`None`]. 
+    /// 
+    /// Generally I'm trying to follow the [rust comment spec] here.
+    ///
+    /// [rust comment spec]: https://doc.rust-lang.org/reference/comments.html 
+    fn handle_single_line_comment(&mut self) -> Option<Token<'src>> {
+        // Fork the lexer to attempt to consume a single line comment. 
+        let mut fork: Self = self.fork();
+
+        // Try to consume the single line comment prefix from the fork. 
+        if fork.consume(SINGLE_LINE_COMMENT_PREFIX) {
+            // We consumed it successfully, read through a newline or the end of the forked lexer if we get there. 
+            
+            // First determine if this is a doc comment of some kind. 
+            let is_inner_doc_comment: bool = fork.matches("/") && !fork.matches("//");
+            let is_outer_doc_comment: bool = fork.matches("!");
 
-            if !ptr::eq(without_whitespace, self.remaining.inner) {
-                self.remaining.inner = without_whitespace;
-                return self.next_token();
+            // The consume until a newline, carraige return, or the end of the source fragment. 
+            while !fork.remaining.is_empty() && !fork.matches("\r") && !fork.matches("\n") {
+                fork.consume_any();
             }
+
+            // Determine the kind of token to produce (if any).
+            let variant: Option<TokenTy>; 
+            
+            if is_inner_doc_comment { 
+                variant = Some(TokenTy::InnerDocComment);
+            } 
+            else if is_outer_doc_comment { 
+                variant = Some(TokenTy::OuterDocComment);
+            } 
+            else {
+                variant = None;
+            }
+
+            // Map the variant to a token to return. 
+            let token: Option<Token> = variant.map(|kind| {
+                // Get the number of bytes we have consumed using `offset_from`. 
+                let bytes_consumed: usize = fork.remaining.offset_from(&self.remaining);
+                // Split this token from `self` rather than `fork` since self is still in an unmodified position.
+                self.split_token(bytes_consumed, kind)
+            });
+
+            // Update this lexer to match the state of the forked lexer. 
+            *self = fork;
+            // Consume any outstanding whitespace. 
+            self.ignore_whitespace();
+            // Return any token produced.
+            return token;
         }
 
-        // Discard any single-line comment at the start of this lexer and then re-run this function if there was one.
-        // Note that this will not detect/discard doc comments or multi-line comments.
-        {
-            if let Some(without_comment_prefix) = self.remaining.inner.strip_prefix("//") {
-                // If the next character is not a slash or exclamation, indicating a doc comment.
-                if !without_comment_prefix.starts_with(['/', '!']) {
-                    // Get the number of bytes between the start of the comment and the newline, or end of file.
-                    // Do not include bytes of whitespace at or past the newline -- those are handled above.
-                    let line_bytes: usize = without_comment_prefix
-                        // Make an iterator over the lines after this `//`.
-                        .lines()
-                        // Get only the first line.
-                        .next()
-                        // Map to the length of the line string.
-                        .map(str::len)
-                        // If there is no line after the start of this comment we have zero bytes to read.
-                        .unwrap_or(0);
-
-                    // Split this number of bytes from the string and ignore them.
-                    let (_, new_remaining) = without_comment_prefix.split_at(line_bytes);
-                    // Put the split off string in a Fragment, and consider this fragment to be the
-                    // remaining Fragment for this lexer.
-                    self.remaining = Fragment {
-                        inner: new_remaining,
-                    };
-
-                    // Restart this function.
+        // If there was no comment prefix, there is no comment immediately available. 
+        None
+    }
+
+    /// Get the next token from the lexer.
+    pub fn next_token(&mut self) -> Option<Token<'src>> {
+        // Ignore any whitespace at the start of the lexer.
+        self.ignore_whitespace();
+
+        // Attempt to parse a single line comment. Return it if it's documentation.
+        // Rerun this function if there was a comment and it was ignored successfully.
+        let initial_lexer: Self = self.fork();
+        match self.handle_single_line_comment() {
+            // There was a single line comment ignored or no single line comment. 
+            None => {
+                // Check if the remaining fragment changed.
+                if !self.remaining.ptr_eq(&initial_lexer.remaining) {
+                    // If so, re-run this function.  
                     return self.next_token();
                 }
-            }
+                
+                // If the lexer was unchanged, then there was no comment -- keep trying to match tokens.
+            },
+
+            // If there was some token, return it. 
+            token => return token,
+        }
+
+        // If the remaining input is empty, there is no token.
+        if self.remaining.is_empty() {
+            return None;
         }
 
         // Discard any multi-line comments we encounter, sparing doc comments.
@@ -544,4 +642,11 @@ mod tests {
         assert_eq!(token.fragment.inner, "123_456_789");
         assert_eq!(token.variant, TokenTy::IntegerLiteral);
     }
+
+    #[test]
+    fn ignored_single_line_comment() {
+        let mut lexer = Lexer::new("// test comment ");
+        assert!(lexer.next_token().is_none());
+        assert_eq!(lexer.remaining.len(), 0);
+    }
 }

From 25b72007ff4c0fd0612c9f86b526acb5b6e59bf3 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Thu, 22 Feb 2024 23:36:00 -0500
Subject: [PATCH 32/60] cargo fmt

---
 wright/src/parser/fragment.rs | 22 ++++-----
 wright/src/parser/lexer.rs    | 86 +++++++++++++++++------------------
 2 files changed, 52 insertions(+), 56 deletions(-)

diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
index ddc37006..2bf718a0 100644
--- a/wright/src/parser/fragment.rs
+++ b/wright/src/parser/fragment.rs
@@ -30,11 +30,11 @@ impl<'src> Fragment<'src> {
         (start, unsafe { start.add(self.len()) })
     }
 
-    /// Return true if both of these [`Fragment`]s point to the exact same slice of source code. 
+    /// Return true if both of these [`Fragment`]s point to the exact same slice of source code.
     pub fn ptr_eq(&self, other: &Self) -> bool {
         // Since std::ptr::eq works for fat pointers, we can use it here.
         std::ptr::eq(self.inner, other.inner)
-    } 
+    }
 
     /// Return true if this fragment overlaps at all with the other (either one contains the start of the other,
     /// by pointer).
@@ -71,8 +71,8 @@ impl<'src> Fragment<'src> {
         self.inner.chars()
     }
 
-    /// Get the number of bytes between the beginning of [`origin`] and the beginning of [`self`]. 
-    /// 
+    /// Get the number of bytes between the beginning of [`origin`] and the beginning of [`self`].
+    ///
     /// # Panics:
     /// - Panics if [`self`] is not a fragment within [`origin`] according to [`Fragment::contains`].
     pub fn offset_from(&self, origin: &Self) -> usize {
@@ -85,13 +85,13 @@ impl<'src> Fragment<'src> {
         // Do the same for the subslice.
         let subslice_start: *const u8 = self.inner.as_ptr();
 
-        // SAFETY: Since the subslice is contained (by pointer) by the origin slice, both of them 
-        // necessarily satisfy the safety requirements of offset_from to be pointers to the same 
-        // allocation. 
-        // 
-        // We can always cast to a usize since this should always be a positive offset, as long 
-        // as the subslice is contained in the origin fragment. 
-        unsafe { subslice_start.offset_from(start) as usize } 
+        // SAFETY: Since the subslice is contained (by pointer) by the origin slice, both of them
+        // necessarily satisfy the safety requirements of offset_from to be pointers to the same
+        // allocation.
+        //
+        // We can always cast to a usize since this should always be a positive offset, as long
+        // as the subslice is contained in the origin fragment.
+        unsafe { subslice_start.offset_from(start) as usize }
     }
 }
 
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 15b4b4a5..04296b81 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -164,7 +164,7 @@ pub const PREFIX_TABLE: [PrefixToToken; PREFIX_TABLE_ROWS] = {
     table
 };
 
-/// The pattern that begins any single line comments (including doc comments). 
+/// The pattern that begins any single line comments (including doc comments).
 pub const SINGLE_LINE_COMMENT_PREFIX: &str = "//";
 
 /// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language.
@@ -320,19 +320,19 @@ impl<'src> Lexer<'src> {
         }
     }
 
-    /// "Fork" this lexer, creating a new [`Lexer`] at the same position as this one that can be used for 
+    /// "Fork" this lexer, creating a new [`Lexer`] at the same position as this one that can be used for
     /// failable parsing. This can be compared to the original lexer it was forked from using [Fragment::offset_from]
-    /// on the underlying `remaining` fragments. 
+    /// on the underlying `remaining` fragments.
     fn fork(&self) -> Self {
         *self
     }
 
-    /// Remove and ignore any whitespace at the start of the remaining fragment. 
+    /// Remove and ignore any whitespace at the start of the remaining fragment.
     fn ignore_whitespace(&mut self) {
-        // Get a reference to the slice of the string past any whitespace at the start. 
+        // Get a reference to the slice of the string past any whitespace at the start.
         let without_whitespace: &str = self.remaining.inner.trim_start();
 
-        // If the references aren't equal, update the remaining fragment. 
+        // If the references aren't equal, update the remaining fragment.
         if !ptr::eq(without_whitespace, self.remaining.inner) {
             self.remaining.inner = without_whitespace;
         }
@@ -343,8 +343,8 @@ impl<'src> Lexer<'src> {
         self.remaining.inner.starts_with(pattern)
     }
 
-    /// If the remaining fragment starts with the given `pattern`, strip it from the remaining fragment and return 
-    /// true. Otherwise return false. 
+    /// If the remaining fragment starts with the given `pattern`, strip it from the remaining fragment and return
+    /// true. Otherwise return false.
     fn consume(&mut self, pattern: &str) -> bool {
         if let Some(stripped) = self.remaining.inner.strip_prefix(pattern) {
             self.remaining.inner = stripped;
@@ -354,88 +354,86 @@ impl<'src> Lexer<'src> {
         }
     }
 
-    /// Remove a character from the start of the `remaining` [`Fragment`], return the character 
-    /// consumed if there was a character available to consume. 
+    /// Remove a character from the start of the `remaining` [`Fragment`], return the character
+    /// consumed if there was a character available to consume.
     fn consume_any(&mut self) -> Option<char> {
-        // Make a character iterator. 
+        // Make a character iterator.
         let mut chars: Chars = self.remaining.chars();
 
         if let Some(c) = chars.next() {
             // Consumed a char, update the remaining fragment of this lexer.
             let char_bytes: usize = c.len_utf8();
             // SAFETY: we know that this is not on a char boundary and does not exceed the length of the slice,
-            // since we just pulled it from a `Chars` iterator. 
+            // since we just pulled it from a `Chars` iterator.
             self.remaining.inner = unsafe { self.remaining.inner.get_unchecked(char_bytes..) };
-            // Return the character. 
+            // Return the character.
             Some(c)
         } else {
-            // No characters available, return nothing. 
+            // No characters available, return nothing.
             None
         }
     }
 
     // /// Consume characters from the lexer until given pattern matches. Do not consume the pattern or
-    // /// any characters in it. This will consumed to the end of the lexer if the pattern is not found. 
+    // /// any characters in it. This will consumed to the end of the lexer if the pattern is not found.
     // fn consume_until(&mut self, pattern: &str) {
     //     while !self.remaining.is_empty() && !self.matches(pattern) {
     //         self.consume_any();
     //     }
     // }
 
-    /// Attempt to read/handle a single line comment from the start of the 
+    /// Attempt to read/handle a single line comment from the start of the
     /// remaining fragment. If there's a doc-style single line comment, return a [`Token`],
-    /// otherwise return [`None`]. 
-    /// 
+    /// otherwise return [`None`].
+    ///
     /// Generally I'm trying to follow the [rust comment spec] here.
     ///
-    /// [rust comment spec]: https://doc.rust-lang.org/reference/comments.html 
+    /// [rust comment spec]: https://doc.rust-lang.org/reference/comments.html
     fn handle_single_line_comment(&mut self) -> Option<Token<'src>> {
-        // Fork the lexer to attempt to consume a single line comment. 
+        // Fork the lexer to attempt to consume a single line comment.
         let mut fork: Self = self.fork();
 
-        // Try to consume the single line comment prefix from the fork. 
+        // Try to consume the single line comment prefix from the fork.
         if fork.consume(SINGLE_LINE_COMMENT_PREFIX) {
-            // We consumed it successfully, read through a newline or the end of the forked lexer if we get there. 
-            
-            // First determine if this is a doc comment of some kind. 
+            // We consumed it successfully, read through a newline or the end of the forked lexer if we get there.
+
+            // First determine if this is a doc comment of some kind.
             let is_inner_doc_comment: bool = fork.matches("/") && !fork.matches("//");
             let is_outer_doc_comment: bool = fork.matches("!");
 
-            // The consume until a newline, carraige return, or the end of the source fragment. 
+            // The consume until a newline, carraige return, or the end of the source fragment.
             while !fork.remaining.is_empty() && !fork.matches("\r") && !fork.matches("\n") {
                 fork.consume_any();
             }
 
             // Determine the kind of token to produce (if any).
-            let variant: Option<TokenTy>; 
-            
-            if is_inner_doc_comment { 
+            let variant: Option<TokenTy>;
+
+            if is_inner_doc_comment {
                 variant = Some(TokenTy::InnerDocComment);
-            } 
-            else if is_outer_doc_comment { 
+            } else if is_outer_doc_comment {
                 variant = Some(TokenTy::OuterDocComment);
-            } 
-            else {
+            } else {
                 variant = None;
             }
 
-            // Map the variant to a token to return. 
+            // Map the variant to a token to return.
             let token: Option<Token> = variant.map(|kind| {
-                // Get the number of bytes we have consumed using `offset_from`. 
+                // Get the number of bytes we have consumed using `offset_from`.
                 let bytes_consumed: usize = fork.remaining.offset_from(&self.remaining);
                 // Split this token from `self` rather than `fork` since self is still in an unmodified position.
                 self.split_token(bytes_consumed, kind)
             });
 
-            // Update this lexer to match the state of the forked lexer. 
+            // Update this lexer to match the state of the forked lexer.
             *self = fork;
-            // Consume any outstanding whitespace. 
+            // Consume any outstanding whitespace.
             self.ignore_whitespace();
             // Return any token produced.
             return token;
         }
 
-        // If there was no comment prefix, there is no comment immediately available. 
+        // If there was no comment prefix, there is no comment immediately available.
         None
     }
 
@@ -448,18 +446,18 @@ impl<'src> Lexer<'src> {
         // Rerun this function if there was a comment and it was ignored successfully.
         let initial_lexer: Self = self.fork();
         match self.handle_single_line_comment() {
-            // There was a single line comment ignored or no single line comment. 
+            // There was a single line comment ignored or no single line comment.
             None => {
                 // Check if the remaining fragment changed.
                 if !self.remaining.ptr_eq(&initial_lexer.remaining) {
-                    // If so, re-run this function.  
+                    // If so, re-run this function.
                     return self.next_token();
                 }
-                
+
                 // If the lexer was unchanged, then there was no comment -- keep trying to match tokens.
-            },
+            }
 
-            // If there was some token, return it. 
+            // If there was some token, return it.
             token => return token,
         }
 
@@ -469,9 +467,7 @@ impl<'src> Lexer<'src> {
         }
 
         // Discard any multi-line comments we encounter, sparing doc comments.
-        {
-            
-        }
+        {}
 
         // To attempt to match a token from the prefix table, make a char iterator
         // and get two chars from it to test equality. None of the tokens start with a

From 1267d35664c72d7832caa643d62e1975059aca41 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Fri, 23 Feb 2024 23:56:50 -0500
Subject: [PATCH 33/60] Multi-line comment handling

---
 wright/src/bin/wright.rs      |  11 ++-
 wright/src/parser/fragment.rs |  22 +++++-
 wright/src/parser/lexer.rs    | 133 ++++++++++++++++++++++++++++------
 3 files changed, 136 insertions(+), 30 deletions(-)

diff --git a/wright/src/bin/wright.rs b/wright/src/bin/wright.rs
index a445a556..ab022fda 100644
--- a/wright/src/bin/wright.rs
+++ b/wright/src/bin/wright.rs
@@ -44,10 +44,10 @@ enum DebugCommands {
         /// A file of wright source code.
         file: PathBuf,
 
-        /// Pretty print the source code with the tokens lined under them.
-        /// If not used, a list of tokens will be printed with their metadata.
-        #[arg(short, long)]
-        pretty: bool,
+        // /// Pretty print the source code with the tokens lined under them.
+        // /// If not used, a list of tokens will be printed with their metadata.
+        // #[arg(short, long)]
+        // pretty: bool,
     },
 }
 
@@ -68,7 +68,6 @@ fn main() -> Result<()> {
             command:
                 DebugCommands::Tokens {
                     file,
-                    pretty: false,
                 },
         }) => {
             let mut file_map: FileMap = FileMap::new();
@@ -78,7 +77,7 @@ fn main() -> Result<()> {
             // Use unwrap here, since we know we just added the file.
             let lexer: Lexer = Lexer::new(file_map.source(file_id).unwrap());
             // Get all the tokens from the lexer and print them each.
-            lexer.for_each(|token: Token| println!("{token:?}"));
+            lexer.for_each(|token: Token| println!("{token}"));
         }
 
         _ => unimplemented!(),
diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
index 2bf718a0..a49561d8 100644
--- a/wright/src/parser/fragment.rs
+++ b/wright/src/parser/fragment.rs
@@ -22,7 +22,7 @@ impl<'src> Fragment<'src> {
 
     /// Get a pair of pointers, the first one being at the beginning of the fragment, the second one pointing
     /// to the byte after the end of the fragment.
-    const fn start_and_end(&self) -> (*const u8, *const u8) {
+    pub const fn start_and_end(&self) -> (*const u8, *const u8) {
         // Get the pointer to the start of the fragment.
         let start: *const u8 = self.inner.as_ptr();
         // Get a pointer just past the end of the string.
@@ -74,7 +74,7 @@ impl<'src> Fragment<'src> {
     /// Get the number of bytes between the beginning of [`origin`] and the beginning of [`self`].
     ///
     /// # Panics:
-    /// - Panics if [`self`] is not a fragment within [`origin`] according to [`Fragment::contains`].
+    /// - Panics if [`self`] is not a fragment within `origin` according to [`Fragment::contains`].
     pub fn offset_from(&self, origin: &Self) -> usize {
         if !origin.contains(self) {
             panic!("This fragment must be contained in the original fragment");
@@ -130,4 +130,22 @@ mod tests {
         assert_eq!(left.inner, "+");
         assert_eq!(right.inner, "");
     }
+
+    #[test]
+    fn test_offset_from() {
+        let a = Fragment { inner: "abcde" };
+        let (b, c) = a.split(2);
+        assert_eq!(b.offset_from(&a), 0);
+        assert_eq!(c.offset_from(&a), 2);
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_offset_panics() {
+        let a = Fragment { inner: "abc" };
+        let b = Fragment { inner: "def" };
+        a.offset_from(&b);
+    }
+
+
 }
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 04296b81..337885d2 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -167,6 +167,12 @@ pub const PREFIX_TABLE: [PrefixToToken; PREFIX_TABLE_ROWS] = {
 /// The pattern that begins any single line comments (including doc comments).
 pub const SINGLE_LINE_COMMENT_PREFIX: &str = "//";
 
+/// The pattern that starts any multi-line comments (including doc comments).
+pub const MULTI_LINE_COMMENT_START: &str = "/*";
+
+/// The pattern that ends any multi-line comments (including doc comments).
+pub const MULTI_LINE_COMMENT_END: &str = "*/";
+
 /// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language.
 #[derive(Debug, Clone, Copy)]
 pub struct Lexer<'src> {
@@ -224,6 +230,10 @@ pub enum TokenTy {
 
     OuterDocComment, OuterBlockDocComment,
     InnerDocComment, InnerBlockDocComment,
+    
+    /// Indicates a block style comment without termination. 
+    UnterminatedBlockComment,
+
 
     KwRecord,
     KwType,
@@ -374,14 +384,6 @@ impl<'src> Lexer<'src> {
         }
     }
 
-    // /// Consume characters from the lexer until given pattern matches. Do not consume the pattern or
-    // /// any characters in it. This will consumed to the end of the lexer if the pattern is not found.
-    // fn consume_until(&mut self, pattern: &str) {
-    //     while !self.remaining.is_empty() && !self.matches(pattern) {
-    //         self.consume_any();
-    //     }
-    // }
-
     /// Attempt to read/handle a single line comment from the start of the
     /// remaining fragment. If there's a doc-style single line comment, return a [`Token`],
     /// otherwise return [`None`].
@@ -398,8 +400,8 @@ impl<'src> Lexer<'src> {
             // We consumed it successfully, read through a newline or the end of the forked lexer if we get there.
 
             // First determine if this is a doc comment of some kind.
-            let is_inner_doc_comment: bool = fork.matches("/") && !fork.matches("//");
-            let is_outer_doc_comment: bool = fork.matches("!");
+            let is_inner_doc: bool = fork.matches("/") && !fork.matches("//");
+            let is_outer_doc: bool = fork.matches("!");
 
             // The consume until a newline, carraige return, or the end of the source fragment.
             while !fork.remaining.is_empty() && !fork.matches("\r") && !fork.matches("\n") {
@@ -407,15 +409,12 @@ impl<'src> Lexer<'src> {
             }
 
             // Determine the kind of token to produce (if any).
-            let variant: Option<TokenTy>;
-
-            if is_inner_doc_comment {
-                variant = Some(TokenTy::InnerDocComment);
-            } else if is_outer_doc_comment {
-                variant = Some(TokenTy::OuterDocComment);
-            } else {
-                variant = None;
-            }
+            let variant: Option<TokenTy> = match (is_inner_doc, is_outer_doc) {
+                (true, false) => Some(TokenTy::InnerDocComment),
+                (false, true) => Some(TokenTy::OuterDocComment),
+                (false, false) => None,
+                (true, true) => unreachable!("Lexer should not match multiple comment types at once."),
+            };
 
             // Map the variant to a token to return.
             let token: Option<Token> = variant.map(|kind| {
@@ -437,14 +436,92 @@ impl<'src> Lexer<'src> {
         None
     }
 
+    /// Attempt to read/consume a multi-line comment from the start of the `remaining` fragment. 
+    fn handle_multi_line_comment(&mut self) -> Option<Token<'src>> {
+        // Handle corner cases here so we don't have to below. 
+        // These are both considered empty non-documenting comments.
+        if self.consume("/***/") {
+            return None;
+        }
+
+        if self.consume("/**/") {
+            return None;
+        }
+
+        // Make a fork of the lexer to avoid modifying this lexer if we fail to parse. 
+        let mut fork: Self = self.fork();
+
+        // Try to parse the start of a multi-line comment. 
+        if fork.consume(MULTI_LINE_COMMENT_START) {
+            // Check if this is a doc comment. 
+            let is_outer_doc: bool = fork.matches("!");
+            // Use this to indicate that more than one following asterix is not a doc comment. 
+            let is_inner_doc: bool = fork.matches("*") && !fork.matches("**");
+
+            // Consume until we see the end of the doc comment. If we run out of characters, consider the 
+            // comment unterminated. 
+            while !fork.matches(MULTI_LINE_COMMENT_END) {
+                // Handle nested comments here: 
+                if fork.matches(MULTI_LINE_COMMENT_START) { 
+                    // Discard the output -- don't care about doc comments in other comments. 
+                    fork.handle_multi_line_comment();
+                    continue;
+                }
+
+                // Handle unterminated comments here.
+                if fork.remaining.is_empty() {
+                    // If we have not hit a "*/" before the end of the input, return an unterminated block comment. 
+                    let bytes_consumed: usize = fork.remaining.offset_from(&self.remaining);
+                    // Split the token and return it. 
+                    return Some(self.split_token(bytes_consumed, TokenTy::UnterminatedBlockComment));
+                }
+                
+                // If there's still input, and not a nested comment, consume it. 
+                fork.consume_any();
+            }
+
+            // If we get here, the comment was terminated. Consume the terminating characters, and return. 
+            // Use debug assert here to make sure that the comment is actually terminated. 
+            debug_assert!(fork.consume(MULTI_LINE_COMMENT_END), "comment is actually terminated");
+
+            // Determine the kind of token to produce (if any).
+            let variant: Option<TokenTy> = match (is_inner_doc, is_outer_doc) {
+                (true, false) => Some(TokenTy::InnerBlockDocComment),
+                (false, true) => Some(TokenTy::OuterBlockDocComment),
+                (false, false) => None,
+                (true, true) => unreachable!("Lexer should not match multiple comment types at once."),
+            };
+
+            // Make the token to return. 
+            let token: Option<Token> = variant.map(|kind| {
+                // Get the number of bytes we have consumed using `offset_from`.
+                let bytes_consumed: usize = fork.remaining.offset_from(&self.remaining);
+                // Split this token from `self` rather than `fork` since self is still in an unmodified position.
+                self.split_token(bytes_consumed, kind)
+            });
+
+            // Update this lexer to match the state of the fork.
+            *self = fork;
+            // Return token if there was one.
+            return token;
+        }
+
+        // If the fork did not consume a multi-line comment start, return None and do 
+        // not update this lexer. 
+        None
+    }
+
     /// Get the next token from the lexer.
     pub fn next_token(&mut self) -> Option<Token<'src>> {
         // Ignore any whitespace at the start of the lexer.
         self.ignore_whitespace();
 
+        // Grab a copy of the initial lexer to compare and check when progress has been made.
+        let initial_lexer: Self = self.fork();
+
         // Attempt to parse a single line comment. Return it if it's documentation.
         // Rerun this function if there was a comment and it was ignored successfully.
-        let initial_lexer: Self = self.fork();
+        
         match self.handle_single_line_comment() {
             // There was a single line comment ignored or no single line comment.
             None => {
@@ -466,8 +543,20 @@ impl<'src> Lexer<'src> {
             return None;
         }
 
-        // Discard any multi-line comments we encounter, sparing doc comments.
-        {}
+        // Try to handle a multi-line comment if there is one. 
+        match self.handle_multi_line_comment() {
+            // There was an ignored comment or no comment. 
+            None => {
+                // If the lexer was changed, restart this function. 
+                if !self.remaining.ptr_eq(&initial_lexer.remaining) {
+                    return self.next_token();
+                }
+            }
+
+            // If there was a block style doc-comment, or an unterminated multi-line comment
+            // return. 
+            token => return token,
+        }
 
         // To attempt to match a token from the prefix table, make a char iterator
         // and get two chars from it to test equality. None of the tokens start with a

From c92b3cb2f1c6929fad3c1c30c54e6c627d845bbc Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 24 Feb 2024 00:04:46 -0500
Subject: [PATCH 34/60] Add block-style doc-comment benchmark

---
 wright/benches/lexer.rs | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/wright/benches/lexer.rs b/wright/benches/lexer.rs
index 35f6a8ee..3ec4dfc8 100644
--- a/wright/benches/lexer.rs
+++ b/wright/benches/lexer.rs
@@ -9,7 +9,7 @@ fn bench_symbol_tokens(c: &mut Criterion) {
 
     // Function to make a lexer and get a token from it.
     fn make_lexer_and_get_token(b: &mut Bencher, input: &str) {
-        b.iter(|| Lexer::new(black_box(input)).next_token());
+        b.iter(|| black_box(Lexer::new(input).next_token()));
     }
 
     let inputs = ["+", "+=", "*", "@", "?"];
@@ -19,5 +19,13 @@ fn bench_symbol_tokens(c: &mut Criterion) {
     }
 }
 
-criterion_group!(benches, bench_symbol_tokens);
+fn bench_block_doc_comment(c: &mut Criterion) {
+    c.bench_function("lexer block style doc comment", move |b: &mut Bencher| {
+        b.iter(move || {
+            black_box(Lexer::new("/*! \n this is a block-style comment \n\n */").next_token())
+        });
+    });
+}
+
+criterion_group!(benches, bench_symbol_tokens, bench_block_doc_comment);
 criterion_main!(benches);

From ec2b1f9764efcab5ae7f4daf0674d54e99215e63 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 24 Feb 2024 00:59:28 -0500
Subject: [PATCH 35/60] Simplify lexing of trivial tokens

---
 wright/Cargo.toml          |  12 +-
 wright/src/parser/lexer.rs | 269 ++++++++++++-------------------------
 2 files changed, 88 insertions(+), 193 deletions(-)

diff --git a/wright/Cargo.toml b/wright/Cargo.toml
index 225dcd5c..8f19ac51 100644
--- a/wright/Cargo.toml
+++ b/wright/Cargo.toml
@@ -87,16 +87,12 @@ features = ["strict-versioning", "force-static"]
 version = "0.3"
 features = ["llvm17-0"]
 
-# # Fast parsing for integers and floats from source code. 
-# [dependencies.lexical-core]
-# version = "0.8"
-
 # TEST DEPENDENCIES
 
-# Rayon to speed up brute-force testing in some cases.
-[dev-dependencies.rayon]
-version = "1.8.0"
-
 # Criterion is used for benchmarking. 
 [dev-dependencies.criterion] 
 version = "0.5.1"
+
+# Rayon is used to do various brute-force tests in parallel
+[dev-dependencies.rayon]
+version = "1.8.0"
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 337885d2..5d8e4ac7 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -4,166 +4,71 @@
 //! defined for tokens.
 
 use super::fragment::Fragment;
-use derive_more::Display;
 use std::iter::FusedIterator;
 use std::str::Chars;
 use std::{iter::Peekable, ptr};
+use derive_more::Display;
 use unicode_ident::{is_xid_continue, is_xid_start};
 
-/// Constant table of single character tokens and the characters that match them.
-pub const SINGLE_CHAR_TOKENS: &[(char, TokenTy)] = &[
-    ('(', TokenTy::LeftParen),
-    (')', TokenTy::RightParen),
-    ('[', TokenTy::LeftBracket),
-    (']', TokenTy::RightBracket),
-    ('{', TokenTy::LeftCurly),
-    ('}', TokenTy::RightCurly),
-    ('@', TokenTy::At),
-    (';', TokenTy::Semi),
-    ('?', TokenTy::Question),
-    (',', TokenTy::Comma),
-    ('#', TokenTy::Hash),
-    ('$', TokenTy::Dollar),
-];
-
-/// Tokens that can be either a single character or upgraded with an
-/// equals sign.
-pub const POSSIBLE_EQ_UPGRADE_TOKENS: &[(char, TokenTy, TokenTy)] = &[
-    ('!', TokenTy::Bang, TokenTy::BangEq),
-    ('%', TokenTy::Mod, TokenTy::ModEq),
-    ('^', TokenTy::Xor, TokenTy::XorEq),
-    ('*', TokenTy::Star, TokenTy::StarEq),
-    ('+', TokenTy::Plus, TokenTy::PlusEq),
-    ('/', TokenTy::Div, TokenTy::DivEq),
-];
-
-/// Characters that can produce different tokens when followed by an equals sign or themselves.
-pub const POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS: &[(char, TokenTy, TokenTy, TokenTy)] = &[
-    ('&', TokenTy::And, TokenTy::AndEq, TokenTy::AndAnd),
-    ('|', TokenTy::Or, TokenTy::OrEq, TokenTy::OrOr),
-    ('<', TokenTy::Lt, TokenTy::LtEq, TokenTy::LtLt),
-    ('>', TokenTy::Gt, TokenTy::GtEq, TokenTy::GtGt),
-    (':', TokenTy::Colon, TokenTy::ColonEq, TokenTy::ColonColon),
+/// Trivial tokens that are two ASCII characters and can be matched directly 
+/// against the input source code. 
+pub const TWO_ASCII_TRIVIAL_TOKENS: &[(&[u8; 2], TokenTy)] = &[
+    (b"->", TokenTy::SingleArrow),
+    (b"-=", TokenTy::MinusEq),
+
+    (b"=>", TokenTy::DoubleArrow),
+    (b"==", TokenTy::EqEq),
+
+    (b"&&", TokenTy::AndAnd),
+    (b"||", TokenTy::OrOr),
+    (b"<<", TokenTy::LtLt),
+    (b">>", TokenTy::GtGt),
+    (b"::", TokenTy::ColonColon),
+
+    (b"|=", TokenTy::OrEq),
+    (b"&=", TokenTy::AndEq),
+    (b":=", TokenTy::ColonEq),
+    (b">=", TokenTy::GtEq),
+    (b"<=", TokenTy::LtEq),
+    (b"!=", TokenTy::BangEq),
+    (b"%=", TokenTy::ModEq),
+    (b"^=", TokenTy::XorEq),
+    (b"*=", TokenTy::StarEq),
+    (b"+=", TokenTy::PlusEq),
+    (b"/=", TokenTy::DivEq),
 ];
 
-/// Characters that can produce different tokens when followed by an equals sign or
-/// a `>` for arrows.
-pub const POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS: &[(char, TokenTy, TokenTy, TokenTy)] = &[
-    ('-', TokenTy::Minus, TokenTy::MinusEq, TokenTy::SingleArrow),
-    ('=', TokenTy::Eq, TokenTy::EqEq, TokenTy::DoubleArrow),
+/// Single ASCII character trivial tokens that can be matched directly against 
+/// the source code. 
+pub const SINGLE_ASCII_CHAR_TRIVIAL_TOKENS: &[(u8, TokenTy)] = &[
+    (b'(', TokenTy::LeftParen),
+    (b')', TokenTy::RightParen),
+    (b'[', TokenTy::LeftBracket),
+    (b']', TokenTy::RightBracket),
+    (b'{', TokenTy::LeftCurly),
+    (b'}', TokenTy::RightCurly),
+    (b'@', TokenTy::At),
+    (b';', TokenTy::Semi),
+    (b'?', TokenTy::Question),
+    (b',', TokenTy::Comma),
+    (b'#', TokenTy::Hash),
+    (b'$', TokenTy::Dollar),
+
+    (b'>', TokenTy::Gt),
+    (b'<', TokenTy::Lt),
+    (b'-', TokenTy::Minus),
+    (b':', TokenTy::Colon),
+    (b'!', TokenTy::Bang),
+    (b'=', TokenTy::Eq),
+    (b'&', TokenTy::And),
+    (b'|', TokenTy::Or),
+    (b'/', TokenTy::Div),
+    (b'+', TokenTy::Plus),
+    (b'^', TokenTy::Xor),
+    (b'*', TokenTy::Star),
+    (b'%', TokenTy::Mod),
 ];
 
-/// The number of rows of the generated prefix table.
-pub const PREFIX_TABLE_ROWS: usize = {
-    SINGLE_CHAR_TOKENS.len()
-        + 2 * POSSIBLE_EQ_UPGRADE_TOKENS.len()
-        + 3 * POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS.len()
-        + 3 * POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS.len()
-};
-
-/// A relationship between a prefix and the token that should be generated when that prefix matches.
-#[derive(Copy, Clone, Debug)]
-pub struct PrefixToToken {
-    /// An array of two chars. In single char tokens, the second one should be a null character (`'\0'`).
-    /// the char_length field will be used to slice this buffer to get the actual prefix.
-    pub char_buffer: [char; 2],
-    /// The byte length of this prefix and all generated tokens by this prefix.
-    pub byte_len: usize,
-    /// The kind of [Token] generated when this prefix matches.
-    pub kind: TokenTy,
-}
-
-impl PrefixToToken {
-    /// Convenience function to construct a [`PrefixToToken`] by calculating the length of both chars
-    /// (and ignoring the second one if it's null).
-    pub const fn new(chars: [char; 2], kind: TokenTy) -> Self {
-        PrefixToToken {
-            char_buffer: chars,
-
-            byte_len: if chars[1] == '\0' {
-                chars[0].len_utf8()
-            } else {
-                chars[0].len_utf8() + chars[1].len_utf8()
-            },
-
-            kind,
-        }
-    }
-}
-
-/// A full table generated at compile time using all the token tables 
-/// ([SINGLE_CHAR_TOKENS], [POSSIBLE_EQ_UPGRADE_TOKENS], [POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS], 
-/// [POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS]). 
-/// 
-/// This table can be iterated on in order when trying to match a token at the start of a fragment of source code. 
-#[rustfmt::skip]
-pub const PREFIX_TABLE: [PrefixToToken; PREFIX_TABLE_ROWS] = {
-    // Make a mutable table with dummy values to replace with actual values. 
-    let mut table: [PrefixToToken; PREFIX_TABLE_ROWS] = 
-        [PrefixToToken { char_buffer: ['\0'; 2], byte_len: 0, kind: TokenTy::Unknown }; PREFIX_TABLE_ROWS];
-
-    // Current index to insert into table at.
-    let mut write_index: usize = 0;
-
-    // Index used for reading from various tables. 
-    let mut read_index: usize = 0;
-
-    // Iterate first over all the single char tokens. 
-    while read_index < SINGLE_CHAR_TOKENS.len() {
-        // Get row from source table.
-        let (c, token_kind) = SINGLE_CHAR_TOKENS[read_index];
-
-        // Put row in destination table.
-        table[write_index] = PrefixToToken::new([c, '\0'], token_kind);
-
-        // Increment both indices. 
-        read_index += 1;
-        write_index += 1;
-    }
-
-    // Then do all the tokens that can be upgraded with an equals sign. 
-    // Add the row for the token with the equals sign first so that when we iterate over this table in order,
-    // the version without the equals sign does not match prematurely. 
-    read_index = 0;
-    while read_index < POSSIBLE_EQ_UPGRADE_TOKENS.len() {
-        let (c, without_eq, with_eq) = POSSIBLE_EQ_UPGRADE_TOKENS[read_index];
-
-        table[write_index]     = PrefixToToken::new([c, '='], with_eq);
-        table[write_index + 1] = PrefixToToken::new([c, '\0'], without_eq);
-
-        read_index += 1;
-        write_index += 2;
-    }
-
-    // Do the same for the tokens that can be upgraded with an equals sign or doubled. 
-    read_index = 0;
-    while read_index < POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS.len() {
-        let (c, without_eq, with_eq, doubled) = POSSIBLE_EQ_OR_DOUBLED_UPGRADE_TOKENS[read_index];
-
-        table[write_index]     = PrefixToToken::new([c, c], doubled);
-        table[write_index + 1] = PrefixToToken::new([c, '='], with_eq);
-        table[write_index + 2] = PrefixToToken::new([c, '\0'], without_eq);
-
-        read_index += 1;
-        write_index += 3;
-    }
-
-    // Do the same for possible eq or arrow upgrades.
-    read_index = 0;
-    while read_index < POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS.len() {
-        let (c, without_eq, with_eq, with_arrow) = POSSIBLE_EQ_OR_ARROW_UPGRADE_TOKENS[read_index];
-
-        table[write_index]     = PrefixToToken::new([c, '>'], with_arrow);
-        table[write_index + 1] = PrefixToToken::new([c, '='], with_eq);
-        table[write_index + 2] = PrefixToToken::new([c, '\0'], without_eq);
-
-        read_index += 1;
-        write_index += 3;
-    }
-
-    table
-};
-
 /// The pattern that begins any single line comments (including doc comments).
 pub const SINGLE_LINE_COMMENT_PREFIX: &str = "//";
 
@@ -516,12 +421,16 @@ impl<'src> Lexer<'src> {
         // Ignore any whitespace at the start of the lexer.
         self.ignore_whitespace();
 
+        // If the remaining input is empty, there is no token.
+        if self.remaining.is_empty() {
+            return None;
+        }
+
         // Grab a copy of the initial lexer to compare and check when progress has been made.
         let initial_lexer: Self = self.fork();
 
         // Attempt to parse a single line comment. Return it if it's documentation.
         // Rerun this function if there was a comment and it was ignored successfully.
-        
         match self.handle_single_line_comment() {
             // There was a single line comment ignored or no single line comment.
             None => {
@@ -538,11 +447,6 @@ impl<'src> Lexer<'src> {
             token => return token,
         }
 
-        // If the remaining input is empty, there is no token.
-        if self.remaining.is_empty() {
-            return None;
-        }
-
         // Try to handle a multi-line comment if there is one. 
         match self.handle_multi_line_comment() {
             // There was an ignored comment or no comment. 
@@ -558,28 +462,31 @@ impl<'src> Lexer<'src> {
             token => return token,
         }
 
-        // To attempt to match a token from the prefix table, make a char iterator
-        // and get two chars from it to test equality. None of the tokens start with a
-        // null character so use that as a single of an unavailable char.
-        {
-            let mut char_iter: Chars = self.remaining.chars();
-            let char_array: [char; 2] = [
-                // Unchecked unwrap here since we know there's at least one char.
-                unsafe { char_iter.next().unwrap_unchecked() },
-                char_iter.next().unwrap_or('\0'),
-            ];
-
-            // Next iterate through the prefix table to try to get any tokens that are covered there.
-            for prefix_meta in PREFIX_TABLE.iter() {
-                // If it's a single char comparison, only compare the first chars.
-                if prefix_meta.char_buffer[1] == '\0' && prefix_meta.char_buffer[0] == char_array[0]
-                {
-                    return Some(self.split_token(prefix_meta.byte_len, prefix_meta.kind));
+        // Do all trivial matching after matching comments to avoid matching "/" for "//".
+        
+        // Attempt to match any two-byte ASCII trivial tokens. 
+        // This must be done before single-ascii byte tokens since matching is greedy. 
+        if self.remaining.len() >= 2 {
+            // Get the first two bytes of the remaining fragment. 
+            // SAFETY: We just checked length. 
+            let bytes: &[u8] = unsafe { self.remaining.inner.as_bytes().get_unchecked(0..2) };
+            // Match against each possible token pattern.
+            for (pattern, kind) in TWO_ASCII_TRIVIAL_TOKENS {
+                if bytes == *pattern {
+                    return Some(self.split_token(2, *kind));
                 }
+            }
+        }
+
+        // Do the same for single byte patterns.
+        { 
+            // We can assume there is at least one more byte since we check above if the fragment
+            // is empty and return early if not. 
+            let byte: &u8 = unsafe { self.remaining.inner.as_bytes().get_unchecked(0) };
 
-                // Otherwise compare the whole slices.
-                if prefix_meta.char_buffer == char_array {
-                    return Some(self.split_token(prefix_meta.byte_len, prefix_meta.kind));
+            for (pattern, kind) in SINGLE_ASCII_CHAR_TRIVIAL_TOKENS {
+                if byte == pattern {
+                    return Some(self.split_token(1, *kind));
                 }
             }
         }
@@ -677,16 +584,8 @@ impl<'src> FusedIterator for Lexer<'src> {}
 #[cfg(test)]
 mod tests {
     use super::Lexer;
-    use super::PREFIX_TABLE;
     use crate::parser::lexer::TokenTy;
 
-    #[test]
-    #[ignore = "this test is just used for debugging the prefix table"]
-    /// Run this with `cargo test manual_debug_prefix_table -- --nocapture --ignored`.
-    fn manual_debug_prefix_table() {
-        dbg!(PREFIX_TABLE);
-    }
-
     #[test]
     fn plus_and_plus_eq_tokens() {
         let mut plus = Lexer::new("+");

From f9b9a9892e9b8dce9580caa1da27a72f9f60f49e Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 24 Feb 2024 01:03:20 -0500
Subject: [PATCH 36/60] Docs

---
 wright/src/parser/fragment.rs | 3 ++-
 wright/src/parser/lexer.rs    | 4 +++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
index a49561d8..3afd68d7 100644
--- a/wright/src/parser/fragment.rs
+++ b/wright/src/parser/fragment.rs
@@ -58,7 +58,8 @@ impl<'src> Fragment<'src> {
     /// Split this fragment into two sub fragments, with the first one being `bytes` long and the second containing the
     /// rest of this fragment.
     ///
-    /// Panics if the byte index is not in the fragment, or if it's on a char boundary.
+    /// # Panics:
+    /// - Panics if the byte index is not in the fragment, or if it's on a char boundary.
     pub fn split(&self, bytes: usize) -> (Self, Self) {
         // Use str's split_at.
         let (left, right) = self.inner.split_at(bytes);
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 5d8e4ac7..f5c0b035 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -53,7 +53,6 @@ pub const SINGLE_ASCII_CHAR_TRIVIAL_TOKENS: &[(u8, TokenTy)] = &[
     (b',', TokenTy::Comma),
     (b'#', TokenTy::Hash),
     (b'$', TokenTy::Dollar),
-
     (b'>', TokenTy::Gt),
     (b'<', TokenTy::Lt),
     (b'-', TokenTy::Minus),
@@ -225,6 +224,9 @@ impl<'src> Lexer<'src> {
 
     /// Make a token by splitting a given number of bytes off of the `self.remaining` fragment
     /// and labeling them with the given kind.
+    /// 
+    /// # Panics:
+    /// - Panics if the number of bytes lands out of bounds or in the middle of a character. 
     fn split_token(&mut self, bytes: usize, kind: TokenTy) -> Token<'src> {
         let (token_fragment, new_remaining_fragment) = self.remaining.split(bytes);
         self.remaining = new_remaining_fragment;

From 13807c78dac2235d073c0f4927705ede82287922 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 24 Feb 2024 01:04:15 -0500
Subject: [PATCH 37/60] rustfmt

---
 wright/src/bin/wright.rs      |  6 +--
 wright/src/parser/fragment.rs |  2 -
 wright/src/parser/lexer.rs    | 91 ++++++++++++++++++-----------------
 3 files changed, 48 insertions(+), 51 deletions(-)

diff --git a/wright/src/bin/wright.rs b/wright/src/bin/wright.rs
index ab022fda..cde9308b 100644
--- a/wright/src/bin/wright.rs
+++ b/wright/src/bin/wright.rs
@@ -43,7 +43,6 @@ enum DebugCommands {
     Tokens {
         /// A file of wright source code.
         file: PathBuf,
-
         // /// Pretty print the source code with the tokens lined under them.
         // /// If not used, a list of tokens will be printed with their metadata.
         // #[arg(short, long)]
@@ -65,10 +64,7 @@ fn main() -> Result<()> {
 
         // Print all the tokens for a given file.
         Some(Commands::Debug {
-            command:
-                DebugCommands::Tokens {
-                    file,
-                },
+            command: DebugCommands::Tokens { file },
         }) => {
             let mut file_map: FileMap = FileMap::new();
             // Add the given file to the file map.
diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
index 3afd68d7..fbec7851 100644
--- a/wright/src/parser/fragment.rs
+++ b/wright/src/parser/fragment.rs
@@ -147,6 +147,4 @@ mod tests {
         let b = Fragment { inner: "def" };
         a.offset_from(&b);
     }
-
-
 }
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index f5c0b035..edd65cb6 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -4,27 +4,24 @@
 //! defined for tokens.
 
 use super::fragment::Fragment;
+use derive_more::Display;
 use std::iter::FusedIterator;
 use std::str::Chars;
 use std::{iter::Peekable, ptr};
-use derive_more::Display;
 use unicode_ident::{is_xid_continue, is_xid_start};
 
-/// Trivial tokens that are two ASCII characters and can be matched directly 
-/// against the input source code. 
+/// Trivial tokens that are two ASCII characters and can be matched directly
+/// against the input source code.
 pub const TWO_ASCII_TRIVIAL_TOKENS: &[(&[u8; 2], TokenTy)] = &[
     (b"->", TokenTy::SingleArrow),
     (b"-=", TokenTy::MinusEq),
-
     (b"=>", TokenTy::DoubleArrow),
     (b"==", TokenTy::EqEq),
-
     (b"&&", TokenTy::AndAnd),
     (b"||", TokenTy::OrOr),
     (b"<<", TokenTy::LtLt),
     (b">>", TokenTy::GtGt),
     (b"::", TokenTy::ColonColon),
-
     (b"|=", TokenTy::OrEq),
     (b"&=", TokenTy::AndEq),
     (b":=", TokenTy::ColonEq),
@@ -38,8 +35,8 @@ pub const TWO_ASCII_TRIVIAL_TOKENS: &[(&[u8; 2], TokenTy)] = &[
     (b"/=", TokenTy::DivEq),
 ];
 
-/// Single ASCII character trivial tokens that can be matched directly against 
-/// the source code. 
+/// Single ASCII character trivial tokens that can be matched directly against
+/// the source code.
 pub const SINGLE_ASCII_CHAR_TRIVIAL_TOKENS: &[(u8, TokenTy)] = &[
     (b'(', TokenTy::LeftParen),
     (b')', TokenTy::RightParen),
@@ -224,9 +221,9 @@ impl<'src> Lexer<'src> {
 
     /// Make a token by splitting a given number of bytes off of the `self.remaining` fragment
     /// and labeling them with the given kind.
-    /// 
+    ///
     /// # Panics:
-    /// - Panics if the number of bytes lands out of bounds or in the middle of a character. 
+    /// - Panics if the number of bytes lands out of bounds or in the middle of a character.
     fn split_token(&mut self, bytes: usize, kind: TokenTy) -> Token<'src> {
         let (token_fragment, new_remaining_fragment) = self.remaining.split(bytes);
         self.remaining = new_remaining_fragment;
@@ -320,7 +317,9 @@ impl<'src> Lexer<'src> {
                 (true, false) => Some(TokenTy::InnerDocComment),
                 (false, true) => Some(TokenTy::OuterDocComment),
                 (false, false) => None,
-                (true, true) => unreachable!("Lexer should not match multiple comment types at once."),
+                (true, true) => {
+                    unreachable!("Lexer should not match multiple comment types at once.")
+                }
             };
 
             // Map the variant to a token to return.
@@ -343,9 +342,9 @@ impl<'src> Lexer<'src> {
         None
     }
 
-    /// Attempt to read/consume a multi-line comment from the start of the `remaining` fragment. 
+    /// Attempt to read/consume a multi-line comment from the start of the `remaining` fragment.
     fn handle_multi_line_comment(&mut self) -> Option<Token<'src>> {
-        // Handle corner cases here so we don't have to below. 
+        // Handle corner cases here so we don't have to below.
         // These are both considered empty non-documenting comments.
         if self.consume("/***/") {
             return None;
@@ -355,40 +354,42 @@ impl<'src> Lexer<'src> {
             return None;
         }
 
-        // Make a fork of the lexer to avoid modifying this lexer if we fail to parse. 
+        // Make a fork of the lexer to avoid modifying this lexer if we fail to parse.
         let mut fork: Self = self.fork();
 
-        // Try to parse the start of a multi-line comment. 
+        // Try to parse the start of a multi-line comment.
         if fork.consume(MULTI_LINE_COMMENT_START) {
-            // Check if this is a doc comment. 
+            // Check if this is a doc comment.
             let is_outer_doc: bool = fork.matches("!");
-            // Use this to indicate that more than one following asterix is not a doc comment. 
+            // Use this to indicate that more than one following asterix is not a doc comment.
             let is_inner_doc: bool = fork.matches("*") && !fork.matches("**");
 
-            // Consume until we see the end of the doc comment. If we run out of characters, consider the 
-            // comment unterminated. 
+            // Consume until we see the end of the doc comment. If we run out of characters, consider the
+            // comment unterminated.
             while !fork.matches(MULTI_LINE_COMMENT_END) {
-                // Handle nested comments here: 
-                if fork.matches(MULTI_LINE_COMMENT_START) { 
-                    // Discard the output -- don't care about doc comments in other comments. 
+                // Handle nested comments here:
+                if fork.matches(MULTI_LINE_COMMENT_START) {
+                    // Discard the output -- don't care about doc comments in other comments.
                     fork.handle_multi_line_comment();
                     continue;
                 }
 
                 // Handle unterminated comments here.
                 if fork.remaining.is_empty() {
-                    // If we have not hit a "*/" before the end of the input, return an unterminated block comment. 
+                    // If we have not hit a "*/" before the end of the input, return an unterminated block comment.
                     let bytes_consumed: usize = fork.remaining.offset_from(&self.remaining);
-                    // Split the token and return it. 
-                    return Some(self.split_token(bytes_consumed, TokenTy::UnterminatedBlockComment));
+                    // Split the token and return it.
+                    return Some(
+                        self.split_token(bytes_consumed, TokenTy::UnterminatedBlockComment),
+                    );
                 }
-                
-                // If there's still input, and not a nested comment, consume it. 
+
+                // If there's still input, and not a nested comment, consume it.
                 fork.consume_any();
             }
 
-            // If we get here, the comment was terminated. Consume the terminating characters, and return. 
-            // Use debug assert here to make sure that the comment is actually terminated. 
+            // If we get here, the comment was terminated. Consume the terminating characters, and return.
+            // Use debug assert here to make sure that the comment is actually terminated.
             debug_assert!(fork.consume(MULTI_LINE_COMMENT_END), "comment is actually terminated");
 
             // Determine the kind of token to produce (if any).
@@ -396,10 +397,12 @@ impl<'src> Lexer<'src> {
                 (true, false) => Some(TokenTy::InnerBlockDocComment),
                 (false, true) => Some(TokenTy::OuterBlockDocComment),
                 (false, false) => None,
-                (true, true) => unreachable!("Lexer should not match multiple comment types at once."),
+                (true, true) => {
+                    unreachable!("Lexer should not match multiple comment types at once.")
+                }
             };
 
-            // Make the token to return. 
+            // Make the token to return.
             let token: Option<Token> = variant.map(|kind| {
                 // Get the number of bytes we have consumed using `offset_from`.
                 let bytes_consumed: usize = fork.remaining.offset_from(&self.remaining);
@@ -413,8 +416,8 @@ impl<'src> Lexer<'src> {
             return token;
         }
 
-        // If the fork did not consume a multi-line comment start, return None and do 
-        // not update this lexer. 
+        // If the fork did not consume a multi-line comment start, return None and do
+        // not update this lexer.
         None
     }
 
@@ -449,28 +452,28 @@ impl<'src> Lexer<'src> {
             token => return token,
         }
 
-        // Try to handle a multi-line comment if there is one. 
+        // Try to handle a multi-line comment if there is one.
         match self.handle_multi_line_comment() {
-            // There was an ignored comment or no comment. 
+            // There was an ignored comment or no comment.
             None => {
-                // If the lexer was changed, restart this function. 
+                // If the lexer was changed, restart this function.
                 if !self.remaining.ptr_eq(&initial_lexer.remaining) {
                     return self.next_token();
                 }
             }
 
             // If there was a block style doc-comment, or an unterminated multi-line comment
-            // return. 
+            // return.
             token => return token,
         }
 
         // Do all trivial matching after matching comments to avoid matching "/" for "//".
-        
-        // Attempt to match any two-byte ASCII trivial tokens. 
-        // This must be done before single-ascii byte tokens since matching is greedy. 
+
+        // Attempt to match any two-byte ASCII trivial tokens.
+        // This must be done before single-ascii byte tokens since matching is greedy.
         if self.remaining.len() >= 2 {
-            // Get the first two bytes of the remaining fragment. 
-            // SAFETY: We just checked length. 
+            // Get the first two bytes of the remaining fragment.
+            // SAFETY: We just checked length.
             let bytes: &[u8] = unsafe { self.remaining.inner.as_bytes().get_unchecked(0..2) };
             // Match against each possible token pattern.
             for (pattern, kind) in TWO_ASCII_TRIVIAL_TOKENS {
@@ -481,9 +484,9 @@ impl<'src> Lexer<'src> {
         }
 
         // Do the same for single byte patterns.
-        { 
+        {
             // We can assume there is at least one more byte since we check above if the fragment
-            // is empty and return early if not. 
+            // is empty and return early if not.
             let byte: &u8 = unsafe { self.remaining.inner.as_bytes().get_unchecked(0) };
 
             for (pattern, kind) in SINGLE_ASCII_CHAR_TRIVIAL_TOKENS {

From 450daa34ab241da43cfe225df70714c8b19e0be6 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 24 Feb 2024 04:03:04 -0500
Subject: [PATCH 38/60] Refactor parts of the lexer

---
 wright/src/bin/wright.rs           |   2 +-
 wright/src/parser/fragment.rs      |  23 +++-
 wright/src/parser/lexer.rs         | 188 +++--------------------------
 wright/src/parser/lexer/token.rs   |  90 ++++++++++++++
 wright/src/parser/lexer/trivial.rs | 103 ++++++++++++++++
 5 files changed, 232 insertions(+), 174 deletions(-)
 create mode 100644 wright/src/parser/lexer/token.rs
 create mode 100644 wright/src/parser/lexer/trivial.rs

diff --git a/wright/src/bin/wright.rs b/wright/src/bin/wright.rs
index cde9308b..669a3afb 100644
--- a/wright/src/bin/wright.rs
+++ b/wright/src/bin/wright.rs
@@ -6,7 +6,7 @@ use codespan_reporting::files::Files;
 use std::{path::PathBuf, time::Instant};
 use wright::{
     filemap::{FileId, FileMap},
-    parser::lexer::{Lexer, Token},
+    parser::lexer::{Lexer, token::Token},
     repl,
 };
 
diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
index fbec7851..3e2bce43 100644
--- a/wright/src/parser/fragment.rs
+++ b/wright/src/parser/fragment.rs
@@ -60,19 +60,34 @@ impl<'src> Fragment<'src> {
     ///
     /// # Panics:
     /// - Panics if the byte index is not in the fragment, or if it's on a char boundary.
-    pub fn split(&self, bytes: usize) -> (Self, Self) {
+    pub fn split_at(&self, bytes: usize) -> (Self, Self) {
         // Use str's split_at.
         let (left, right) = self.inner.split_at(bytes);
 
         (Self { inner: left }, Self { inner: right })
     }
 
+    /// Unsafe version of [`Fragment::split_at`]. Splits this [Fragment] into two subfragments,
+    /// where the left one contains the first `bytes` bytes of the fragment, and the right one 
+    /// contains the rest. 
+    /// 
+    /// # Safety: 
+    /// - Undefined Behavior occurs if `bytes` is greater than the length of the [Fragment].
+    /// - Undefined Behavior occurs if `bytes` is not on a UTF-8 character boundary. 
+    /// - See [str::get_unchecked] for more details. 
+    pub unsafe fn split_at_unchecked(&self, bytes: usize) -> (Self, Self) {
+        let left: &str = self.inner.get_unchecked(..bytes);
+        let right: &str = self.inner.get_unchecked(bytes..);
+
+        (Fragment { inner: left }, Fragment { inner: right })
+    }
+
     /// Get an iterator over the characters in this fragment.
     pub fn chars(&self) -> Chars<'src> {
         self.inner.chars()
     }
 
-    /// Get the number of bytes between the beginning of [`origin`] and the beginning of [`self`].
+    /// Get the number of bytes between the beginning of `origin` and the beginning of [`self`].
     ///
     /// # Panics:
     /// - Panics if [`self`] is not a fragment within `origin` according to [`Fragment::contains`].
@@ -127,7 +142,7 @@ mod tests {
     #[test]
     fn test_split_single() {
         let a = Fragment { inner: "+" };
-        let (left, right) = a.split(1);
+        let (left, right) = a.split_at(1);
         assert_eq!(left.inner, "+");
         assert_eq!(right.inner, "");
     }
@@ -135,7 +150,7 @@ mod tests {
     #[test]
     fn test_offset_from() {
         let a = Fragment { inner: "abcde" };
-        let (b, c) = a.split(2);
+        let (b, c) = a.split_at(2);
         assert_eq!(b.offset_from(&a), 0);
         assert_eq!(c.offset_from(&a), 2);
     }
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index edd65cb6..35f4e9a9 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -4,66 +4,14 @@
 //! defined for tokens.
 
 use super::fragment::Fragment;
-use derive_more::Display;
 use std::iter::FusedIterator;
 use std::str::Chars;
 use std::{iter::Peekable, ptr};
 use unicode_ident::{is_xid_continue, is_xid_start};
+use token::{Token, TokenTy};
 
-/// Trivial tokens that are two ASCII characters and can be matched directly
-/// against the input source code.
-pub const TWO_ASCII_TRIVIAL_TOKENS: &[(&[u8; 2], TokenTy)] = &[
-    (b"->", TokenTy::SingleArrow),
-    (b"-=", TokenTy::MinusEq),
-    (b"=>", TokenTy::DoubleArrow),
-    (b"==", TokenTy::EqEq),
-    (b"&&", TokenTy::AndAnd),
-    (b"||", TokenTy::OrOr),
-    (b"<<", TokenTy::LtLt),
-    (b">>", TokenTy::GtGt),
-    (b"::", TokenTy::ColonColon),
-    (b"|=", TokenTy::OrEq),
-    (b"&=", TokenTy::AndEq),
-    (b":=", TokenTy::ColonEq),
-    (b">=", TokenTy::GtEq),
-    (b"<=", TokenTy::LtEq),
-    (b"!=", TokenTy::BangEq),
-    (b"%=", TokenTy::ModEq),
-    (b"^=", TokenTy::XorEq),
-    (b"*=", TokenTy::StarEq),
-    (b"+=", TokenTy::PlusEq),
-    (b"/=", TokenTy::DivEq),
-];
-
-/// Single ASCII character trivial tokens that can be matched directly against
-/// the source code.
-pub const SINGLE_ASCII_CHAR_TRIVIAL_TOKENS: &[(u8, TokenTy)] = &[
-    (b'(', TokenTy::LeftParen),
-    (b')', TokenTy::RightParen),
-    (b'[', TokenTy::LeftBracket),
-    (b']', TokenTy::RightBracket),
-    (b'{', TokenTy::LeftCurly),
-    (b'}', TokenTy::RightCurly),
-    (b'@', TokenTy::At),
-    (b';', TokenTy::Semi),
-    (b'?', TokenTy::Question),
-    (b',', TokenTy::Comma),
-    (b'#', TokenTy::Hash),
-    (b'$', TokenTy::Dollar),
-    (b'>', TokenTy::Gt),
-    (b'<', TokenTy::Lt),
-    (b'-', TokenTy::Minus),
-    (b':', TokenTy::Colon),
-    (b'!', TokenTy::Bang),
-    (b'=', TokenTy::Eq),
-    (b'&', TokenTy::And),
-    (b'|', TokenTy::Or),
-    (b'/', TokenTy::Div),
-    (b'+', TokenTy::Plus),
-    (b'^', TokenTy::Xor),
-    (b'*', TokenTy::Star),
-    (b'%', TokenTy::Mod),
-];
+pub mod token;
+pub mod trivial;
 
 /// The pattern that begins any single line comments (including doc comments).
 pub const SINGLE_LINE_COMMENT_PREFIX: &str = "//";
@@ -81,91 +29,6 @@ pub struct Lexer<'src> {
     pub remaining: Fragment<'src>,
 }
 
-/// A token in wright source code.
-#[derive(Debug, Display)]
-#[display(fmt = "\"{}\" ({:?})", "fragment.inner", variant)]
-pub struct Token<'src> {
-    /// What type of token this is.
-    pub variant: TokenTy,
-    /// The matching fragment of source code -- this contains the location and length data for the token.
-    pub fragment: Fragment<'src>,
-}
-
-/// The different types of tokens in wright source.
-#[rustfmt::skip] // Turn off auto reformat. 
-#[derive(Clone, Copy, Debug, PartialEq, Eq)]
-pub enum TokenTy {
-    LeftCurly, RightCurly,
-    LeftBracket, RightBracket,
-    LeftParen, RightParen,
-
-    Plus, PlusEq,
-    Star, StarEq,
-    Div, DivEq,
-    Xor, XorEq,
-    Mod, ModEq,
-    Bang, BangEq,
-
-    Minus, MinusEq, SingleArrow,
-    Eq, EqEq, DoubleArrow,
-
-    Lt, LtEq, LtLt,
-    Gt, GtEq, GtGt,
-    And, AndEq, AndAnd,
-    Or, OrEq, OrOr,
-    Colon, ColonEq, ColonColon,
-
-    At,
-    Tilde,
-    Semi,
-    Dot,
-    Comma,
-    Hash,
-    Question,
-    Dollar,
-    
-    // Not in the same group as the other ones there since it can be used at the start of identifiers.
-    Underscore,
-
-    Identifier,
-
-    OuterDocComment, OuterBlockDocComment,
-    InnerDocComment, InnerBlockDocComment,
-    
-    /// Indicates a block style comment without termination. 
-    UnterminatedBlockComment,
-
-
-    KwRecord,
-    KwType,
-    KwEnum,
-    KwUnion,
-    KwFunc,
-    KwRepr,
-    KwImpl,
-    KwConstraint,
-    KwReferences,
-    KwTrait,
-    KwUse,
-    KwAs,
-    KwConst,
-    KwMod,
-    KwIf,
-    KwElse,
-    KwFor,
-    KwIn,
-    KwWhile,
-    KwTrue,
-    KwFalse,
-    KwLoop,
-    KwWhere,
-
-    IntegerLiteral,
-
-    /// Unknown character in lexer fragment. 
-    Unknown
-}
-
 impl<'src> Lexer<'src> {
     /// Get the number of bytes remaining that we need to transform into tokens.
     pub const fn bytes_remaining(&self) -> usize {
@@ -225,7 +88,7 @@ impl<'src> Lexer<'src> {
     /// # Panics:
     /// - Panics if the number of bytes lands out of bounds or in the middle of a character.
     fn split_token(&mut self, bytes: usize, kind: TokenTy) -> Token<'src> {
-        let (token_fragment, new_remaining_fragment) = self.remaining.split(bytes);
+        let (token_fragment, new_remaining_fragment) = self.remaining.split_at(bytes);
         self.remaining = new_remaining_fragment;
 
         Token {
@@ -234,6 +97,17 @@ impl<'src> Lexer<'src> {
         }
     }
 
+    /// Unsafe version of [Lexer::split_token]. 
+    /// 
+    /// # Safety:
+    /// - This function matches the safety guarantees of [Fragment::split_at_unchecked].
+    unsafe fn split_token_unchecked(&mut self, bytes: usize, kind: TokenTy) -> Token<'src> {
+        let (token_fragment, new_remaining_fragment) = self.remaining.split_at_unchecked(bytes);
+        self.remaining = new_remaining_fragment;
+
+        Token { variant: kind, fragment: token_fragment }
+    }
+
     /// "Fork" this lexer, creating a new [`Lexer`] at the same position as this one that can be used for
     /// failable parsing. This can be compared to the original lexer it was forked from using [Fragment::offset_from]
     /// on the underlying `remaining` fragments.
@@ -467,33 +341,9 @@ impl<'src> Lexer<'src> {
             token => return token,
         }
 
-        // Do all trivial matching after matching comments to avoid matching "/" for "//".
-
-        // Attempt to match any two-byte ASCII trivial tokens.
-        // This must be done before single-ascii byte tokens since matching is greedy.
-        if self.remaining.len() >= 2 {
-            // Get the first two bytes of the remaining fragment.
-            // SAFETY: We just checked length.
-            let bytes: &[u8] = unsafe { self.remaining.inner.as_bytes().get_unchecked(0..2) };
-            // Match against each possible token pattern.
-            for (pattern, kind) in TWO_ASCII_TRIVIAL_TOKENS {
-                if bytes == *pattern {
-                    return Some(self.split_token(2, *kind));
-                }
-            }
-        }
-
-        // Do the same for single byte patterns.
-        {
-            // We can assume there is at least one more byte since we check above if the fragment
-            // is empty and return early if not.
-            let byte: &u8 = unsafe { self.remaining.inner.as_bytes().get_unchecked(0) };
-
-            for (pattern, kind) in SINGLE_ASCII_CHAR_TRIVIAL_TOKENS {
-                if byte == pattern {
-                    return Some(self.split_token(1, *kind));
-                }
-            }
+        // Handle a trivial token if there is one. 
+        if let Some(token) = trivial::try_consume_trivial_token(self) {
+            return Some(token);
         }
 
         // Next attempt to match a keyword or identifier.
@@ -512,7 +362,7 @@ impl<'src> Lexer<'src> {
                     .sum::<usize>();
 
                 // Split the number of bytes we consumed.
-                let (ident_frag, new_remaining) = self.remaining.split(bytes_consumed);
+                let (ident_frag, new_remaining) = self.remaining.split_at(bytes_consumed);
                 // Get the token kind to produce for this fragment.
                 let variant = Lexer::identifier_or_keyword(ident_frag);
                 // Update the lexers remaining fragment.
diff --git a/wright/src/parser/lexer/token.rs b/wright/src/parser/lexer/token.rs
new file mode 100644
index 00000000..e30b6851
--- /dev/null
+++ b/wright/src/parser/lexer/token.rs
@@ -0,0 +1,90 @@
+//! Token models. 
+
+use derive_more::Display;
+use crate::parser::fragment::Fragment;
+
+/// A token in wright source code.
+#[derive(Debug, Display)]
+#[display(fmt = "\"{}\" ({:?})", "fragment.inner", variant)]
+pub struct Token<'src> {
+    /// What type of token this is.
+    pub variant: TokenTy,
+    /// The matching fragment of source code -- this contains the location and length data for the token.
+    pub fragment: Fragment<'src>,
+}
+
+/// The different types of tokens in wright source.
+#[rustfmt::skip] // Turn off auto reformat. 
+#[derive(Clone, Copy, Debug, PartialEq, Eq)]
+pub enum TokenTy {
+    LeftCurly, RightCurly,
+    LeftBracket, RightBracket,
+    LeftParen, RightParen,
+
+    Plus, PlusEq,
+    Star, StarEq,
+    Div, DivEq,
+    Xor, XorEq,
+    Mod, ModEq,
+    Bang, BangEq,
+
+    Minus, MinusEq, SingleArrow,
+    Eq, EqEq, DoubleArrow,
+
+    Lt, LtEq, LtLt,
+    Gt, GtEq, GtGt,
+    And, AndEq, AndAnd,
+    Or, OrEq, OrOr,
+    Colon, ColonEq, ColonColon,
+
+    At,
+    Tilde,
+    Semi,
+    Dot,
+    Comma,
+    Hash,
+    Question,
+    Dollar,
+    
+    // Not in the same group as the other ones there since it can be used at the start of identifiers.
+    Underscore,
+
+    Identifier,
+
+    OuterDocComment, OuterBlockDocComment,
+    InnerDocComment, InnerBlockDocComment,
+    
+    /// Indicates a block style comment without termination. 
+    UnterminatedBlockComment,
+
+    KwRecord,
+    KwType,
+    KwEnum,
+    KwUnion,
+    KwFunc,
+    KwRepr,
+    KwImpl,
+    KwConstraint,
+    KwReferences,
+    KwTrait,
+    KwUse,
+    KwAs,
+    KwConst,
+    KwMod,
+    KwIf,
+    KwElse,
+    KwFor,
+    KwIn,
+    KwWhile,
+    KwTrue,
+    KwFalse,
+    KwLoop,
+    KwWhere,
+
+    IntegerLiteral,
+    StringLiteral, 
+    CharLiteral,
+
+    /// Unknown character in lexer fragment. 
+    Unknown
+}
diff --git a/wright/src/parser/lexer/trivial.rs b/wright/src/parser/lexer/trivial.rs
new file mode 100644
index 00000000..7fb9284d
--- /dev/null
+++ b/wright/src/parser/lexer/trivial.rs
@@ -0,0 +1,103 @@
+//! Trivial tokens and their implementation.
+
+use super::{token::{Token, TokenTy}, Lexer};
+
+/// Trivial tokens that are two ASCII characters and can be matched directly
+/// against the input source code.
+pub const TWO_ASCII_TRIVIAL_TOKENS: &[(&[u8; 2], TokenTy)] = &[
+    (b"->", TokenTy::SingleArrow),
+    (b"-=", TokenTy::MinusEq),
+    (b"=>", TokenTy::DoubleArrow),
+    (b"==", TokenTy::EqEq),
+    (b"&&", TokenTy::AndAnd),
+    (b"||", TokenTy::OrOr),
+    (b"<<", TokenTy::LtLt),
+    (b">>", TokenTy::GtGt),
+    (b"::", TokenTy::ColonColon),
+    (b"|=", TokenTy::OrEq),
+    (b"&=", TokenTy::AndEq),
+    (b":=", TokenTy::ColonEq),
+    (b">=", TokenTy::GtEq),
+    (b"<=", TokenTy::LtEq),
+    (b"!=", TokenTy::BangEq),
+    (b"%=", TokenTy::ModEq),
+    (b"^=", TokenTy::XorEq),
+    (b"*=", TokenTy::StarEq),
+    (b"+=", TokenTy::PlusEq),
+    (b"/=", TokenTy::DivEq),
+];
+
+/// Single ASCII character trivial tokens that can be matched directly against
+/// the source code.
+pub const SINGLE_ASCII_CHAR_TRIVIAL_TOKENS: &[(u8, TokenTy)] = &[
+    (b'(', TokenTy::LeftParen),
+    (b')', TokenTy::RightParen),
+    (b'[', TokenTy::LeftBracket),
+    (b']', TokenTy::RightBracket),
+    (b'{', TokenTy::LeftCurly),
+    (b'}', TokenTy::RightCurly),
+    (b'@', TokenTy::At),
+    (b';', TokenTy::Semi),
+    (b'?', TokenTy::Question),
+    (b',', TokenTy::Comma),
+    (b'#', TokenTy::Hash),
+    (b'$', TokenTy::Dollar),
+    (b'>', TokenTy::Gt),
+    (b'<', TokenTy::Lt),
+    (b'-', TokenTy::Minus),
+    (b':', TokenTy::Colon),
+    (b'!', TokenTy::Bang),
+    (b'=', TokenTy::Eq),
+    (b'&', TokenTy::And),
+    (b'|', TokenTy::Or),
+    (b'/', TokenTy::Div),
+    (b'+', TokenTy::Plus),
+    (b'^', TokenTy::Xor),
+    (b'*', TokenTy::Star),
+    (b'%', TokenTy::Mod),
+];
+
+
+/// Attempt to consume a "trivial" token from the start of the [Lexer]'s [Lexer::remaining] fragment. 
+/// 
+/// Leave the lexer unmodified if one is not available. 
+#[inline]
+pub fn try_consume_trivial_token<'src>(lexer: &mut Lexer<'src>) -> Option<Token<'src>> {
+    // Get the number of bytes remaining, since we need at least 1 to parse anything.
+    let bytes_remaining: usize = lexer.bytes_remaining();
+
+    // No token if there are no bytes of source left. 
+    if bytes_remaining == 0 { return None; }
+
+    // Attempt to match any two-byte ASCII trivial tokens.
+    // This must be done before single-ascii byte tokens since matching is greedy.
+    if bytes_remaining >= 2 {
+        // Get the first two bytes of the remaining fragment.
+        // SAFETY: We just checked length.
+        let bytes: &[u8] = unsafe { lexer.remaining.inner.as_bytes().get_unchecked(0..2) };
+
+        // Match against each possible token pattern.
+        for (pattern, kind) in TWO_ASCII_TRIVIAL_TOKENS {
+            if bytes == *pattern {
+                // SAFETY: We have already done bounds checking, and this cannot be a character 
+                // boundary since we just matched against ASCII characters. 
+                return Some(unsafe { lexer.split_token_unchecked(2, *kind) });
+            }
+        }
+    }
+
+    // Do the same for single byte patterns.
+    // SAFETY: We checked that the number of bytes remaining is not 0 above. 
+    let byte: &u8 = unsafe { lexer.remaining.inner.as_bytes().get_unchecked(0) };
+
+    for (pattern, kind) in SINGLE_ASCII_CHAR_TRIVIAL_TOKENS {
+        if byte == pattern {
+            // SAFETTY: If we matched, then the first byte is ASCII, and therefor we don't have to worry
+            // about bounds or unicode boundaries. 
+            return Some(unsafe { lexer.split_token_unchecked(1, *kind) });
+        }
+    }
+
+    // If nothing else has matched, there is no trivial token available. 
+    None
+}

From 7f8919dd5cd61fd251389b3d1246e9ac798d8222 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 24 Feb 2024 04:03:30 -0500
Subject: [PATCH 39/60] clippy

---
 wright/src/parser/fragment.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
index 3e2bce43..21ffa775 100644
--- a/wright/src/parser/fragment.rs
+++ b/wright/src/parser/fragment.rs
@@ -71,7 +71,7 @@ impl<'src> Fragment<'src> {
     /// where the left one contains the first `bytes` bytes of the fragment, and the right one 
     /// contains the rest. 
     /// 
-    /// # Safety: 
+    /// # Safety
     /// - Undefined Behavior occurs if `bytes` is greater than the length of the [Fragment].
     /// - Undefined Behavior occurs if `bytes` is not on a UTF-8 character boundary. 
     /// - See [str::get_unchecked] for more details. 

From bea8f047c7dd5b95ece27018d2a407e0e6c38b8c Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 24 Feb 2024 04:04:19 -0500
Subject: [PATCH 40/60] fmt

---
 wright/src/bin/wright.rs           |  2 +-
 wright/src/parser/fragment.rs      | 10 +++++-----
 wright/src/parser/lexer.rs         | 13 ++++++++-----
 wright/src/parser/lexer/token.rs   |  4 ++--
 wright/src/parser/lexer/trivial.rs | 28 ++++++++++++++++------------
 5 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/wright/src/bin/wright.rs b/wright/src/bin/wright.rs
index 669a3afb..16e6ef90 100644
--- a/wright/src/bin/wright.rs
+++ b/wright/src/bin/wright.rs
@@ -6,7 +6,7 @@ use codespan_reporting::files::Files;
 use std::{path::PathBuf, time::Instant};
 use wright::{
     filemap::{FileId, FileMap},
-    parser::lexer::{Lexer, token::Token},
+    parser::lexer::{token::Token, Lexer},
     repl,
 };
 
diff --git a/wright/src/parser/fragment.rs b/wright/src/parser/fragment.rs
index 21ffa775..01b3eedf 100644
--- a/wright/src/parser/fragment.rs
+++ b/wright/src/parser/fragment.rs
@@ -68,13 +68,13 @@ impl<'src> Fragment<'src> {
     }
 
     /// Unsafe version of [`Fragment::split_at`]. Splits this [Fragment] into two subfragments,
-    /// where the left one contains the first `bytes` bytes of the fragment, and the right one 
-    /// contains the rest. 
-    /// 
+    /// where the left one contains the first `bytes` bytes of the fragment, and the right one
+    /// contains the rest.
+    ///
     /// # Safety
     /// - Undefined Behavior occurs if `bytes` is greater than the length of the [Fragment].
-    /// - Undefined Behavior occurs if `bytes` is not on a UTF-8 character boundary. 
-    /// - See [str::get_unchecked] for more details. 
+    /// - Undefined Behavior occurs if `bytes` is not on a UTF-8 character boundary.
+    /// - See [str::get_unchecked] for more details.
     pub unsafe fn split_at_unchecked(&self, bytes: usize) -> (Self, Self) {
         let left: &str = self.inner.get_unchecked(..bytes);
         let right: &str = self.inner.get_unchecked(bytes..);
diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 35f4e9a9..f7b8dc55 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -7,8 +7,8 @@ use super::fragment::Fragment;
 use std::iter::FusedIterator;
 use std::str::Chars;
 use std::{iter::Peekable, ptr};
-use unicode_ident::{is_xid_continue, is_xid_start};
 use token::{Token, TokenTy};
+use unicode_ident::{is_xid_continue, is_xid_start};
 
 pub mod token;
 pub mod trivial;
@@ -97,15 +97,18 @@ impl<'src> Lexer<'src> {
         }
     }
 
-    /// Unsafe version of [Lexer::split_token]. 
-    /// 
+    /// Unsafe version of [Lexer::split_token].
+    ///
     /// # Safety:
     /// - This function matches the safety guarantees of [Fragment::split_at_unchecked].
     unsafe fn split_token_unchecked(&mut self, bytes: usize, kind: TokenTy) -> Token<'src> {
         let (token_fragment, new_remaining_fragment) = self.remaining.split_at_unchecked(bytes);
         self.remaining = new_remaining_fragment;
 
-        Token { variant: kind, fragment: token_fragment }
+        Token {
+            variant: kind,
+            fragment: token_fragment,
+        }
     }
 
     /// "Fork" this lexer, creating a new [`Lexer`] at the same position as this one that can be used for
@@ -341,7 +344,7 @@ impl<'src> Lexer<'src> {
             token => return token,
         }
 
-        // Handle a trivial token if there is one. 
+        // Handle a trivial token if there is one.
         if let Some(token) = trivial::try_consume_trivial_token(self) {
             return Some(token);
         }
diff --git a/wright/src/parser/lexer/token.rs b/wright/src/parser/lexer/token.rs
index e30b6851..f6a9f9e8 100644
--- a/wright/src/parser/lexer/token.rs
+++ b/wright/src/parser/lexer/token.rs
@@ -1,7 +1,7 @@
-//! Token models. 
+//! Token models.
 
-use derive_more::Display;
 use crate::parser::fragment::Fragment;
+use derive_more::Display;
 
 /// A token in wright source code.
 #[derive(Debug, Display)]
diff --git a/wright/src/parser/lexer/trivial.rs b/wright/src/parser/lexer/trivial.rs
index 7fb9284d..1e2f52a1 100644
--- a/wright/src/parser/lexer/trivial.rs
+++ b/wright/src/parser/lexer/trivial.rs
@@ -1,6 +1,9 @@
 //! Trivial tokens and their implementation.
 
-use super::{token::{Token, TokenTy}, Lexer};
+use super::{
+    token::{Token, TokenTy},
+    Lexer,
+};
 
 /// Trivial tokens that are two ASCII characters and can be matched directly
 /// against the input source code.
@@ -57,17 +60,18 @@ pub const SINGLE_ASCII_CHAR_TRIVIAL_TOKENS: &[(u8, TokenTy)] = &[
     (b'%', TokenTy::Mod),
 ];
 
-
-/// Attempt to consume a "trivial" token from the start of the [Lexer]'s [Lexer::remaining] fragment. 
-/// 
-/// Leave the lexer unmodified if one is not available. 
+/// Attempt to consume a "trivial" token from the start of the [Lexer]'s [Lexer::remaining] fragment.
+///
+/// Leave the lexer unmodified if one is not available.
 #[inline]
 pub fn try_consume_trivial_token<'src>(lexer: &mut Lexer<'src>) -> Option<Token<'src>> {
     // Get the number of bytes remaining, since we need at least 1 to parse anything.
     let bytes_remaining: usize = lexer.bytes_remaining();
 
-    // No token if there are no bytes of source left. 
-    if bytes_remaining == 0 { return None; }
+    // No token if there are no bytes of source left.
+    if bytes_remaining == 0 {
+        return None;
+    }
 
     // Attempt to match any two-byte ASCII trivial tokens.
     // This must be done before single-ascii byte tokens since matching is greedy.
@@ -79,25 +83,25 @@ pub fn try_consume_trivial_token<'src>(lexer: &mut Lexer<'src>) -> Option<Token<
         // Match against each possible token pattern.
         for (pattern, kind) in TWO_ASCII_TRIVIAL_TOKENS {
             if bytes == *pattern {
-                // SAFETY: We have already done bounds checking, and this cannot be a character 
-                // boundary since we just matched against ASCII characters. 
+                // SAFETY: We have already done bounds checking, and this cannot be a character
+                // boundary since we just matched against ASCII characters.
                 return Some(unsafe { lexer.split_token_unchecked(2, *kind) });
             }
         }
     }
 
     // Do the same for single byte patterns.
-    // SAFETY: We checked that the number of bytes remaining is not 0 above. 
+    // SAFETY: We checked that the number of bytes remaining is not 0 above.
     let byte: &u8 = unsafe { lexer.remaining.inner.as_bytes().get_unchecked(0) };
 
     for (pattern, kind) in SINGLE_ASCII_CHAR_TRIVIAL_TOKENS {
         if byte == pattern {
             // SAFETTY: If we matched, then the first byte is ASCII, and therefor we don't have to worry
-            // about bounds or unicode boundaries. 
+            // about bounds or unicode boundaries.
             return Some(unsafe { lexer.split_token_unchecked(1, *kind) });
         }
     }
 
-    // If nothing else has matched, there is no trivial token available. 
+    // If nothing else has matched, there is no trivial token available.
     None
 }

From ca1dfc21e1cf67bb2bc0ab4524ab88cfb005aa95 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Mon, 26 Feb 2024 01:19:44 -0500
Subject: [PATCH 41/60] refactor comments out to their own file

---
 wright/src/parser/lexer.rs          | 220 +++++++---------------------
 wright/src/parser/lexer/comments.rs | 133 +++++++++++++++++
 2 files changed, 184 insertions(+), 169 deletions(-)
 create mode 100644 wright/src/parser/lexer/comments.rs

diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index f7b8dc55..601d8233 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -3,6 +3,8 @@
 //! Note that this will strip out comments and whitespace, returning only fragments that match one of the paterns
 //! defined for tokens.
 
+use self::comments::{try_match_block_comment, try_match_single_line_comment};
+
 use super::fragment::Fragment;
 use std::iter::FusedIterator;
 use std::str::Chars;
@@ -12,15 +14,7 @@ use unicode_ident::{is_xid_continue, is_xid_start};
 
 pub mod token;
 pub mod trivial;
-
-/// The pattern that begins any single line comments (including doc comments).
-pub const SINGLE_LINE_COMMENT_PREFIX: &str = "//";
-
-/// The pattern that starts any multi-line comments (including doc comments).
-pub const MULTI_LINE_COMMENT_START: &str = "/*";
-
-/// The pattern that ends any multi-line comments (including doc comments).
-pub const MULTI_LINE_COMMENT_END: &str = "*/";
+pub mod comments;
 
 /// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language.
 #[derive(Debug, Clone, Copy)]
@@ -112,12 +106,24 @@ impl<'src> Lexer<'src> {
     }
 
     /// "Fork" this lexer, creating a new [`Lexer`] at the same position as this one that can be used for
-    /// failable parsing. This can be compared to the original lexer it was forked from using [Fragment::offset_from]
+    /// failable parsing. This can be compared to the original lexer it was forked from using [Lexer::offset_from]
     /// on the underlying `remaining` fragments.
     fn fork(&self) -> Self {
         *self
     }
 
+    /// Get the number of bytes between the origin's [remaining](Lexer::remaining) and 
+    /// this [Lexer]'s [remaining](Lexer::remaining) using [`Fragment::offset_from`]. 
+    /// 
+    /// # Panics
+    /// - This function panics under the same conditions as [`Fragment::offset_from`].
+    /// - Generally the best way to avoid panics is to only call this function on 
+    ///     [Lexer]s created using [Lexer::fork] on the `origin` lexer. 
+    #[inline]
+    fn offset_from(&self, origin: &Self) -> usize {
+        self.remaining.offset_from(&origin.remaining)
+    }
+
     /// Remove and ignore any whitespace at the start of the remaining fragment.
     fn ignore_whitespace(&mut self) {
         // Get a reference to the slice of the string past any whitespace at the start.
@@ -165,137 +171,25 @@ impl<'src> Lexer<'src> {
         }
     }
 
-    /// Attempt to read/handle a single line comment from the start of the
-    /// remaining fragment. If there's a doc-style single line comment, return a [`Token`],
-    /// otherwise return [`None`].
-    ///
-    /// Generally I'm trying to follow the [rust comment spec] here.
-    ///
-    /// [rust comment spec]: https://doc.rust-lang.org/reference/comments.html
-    fn handle_single_line_comment(&mut self) -> Option<Token<'src>> {
-        // Fork the lexer to attempt to consume a single line comment.
-        let mut fork: Self = self.fork();
-
-        // Try to consume the single line comment prefix from the fork.
-        if fork.consume(SINGLE_LINE_COMMENT_PREFIX) {
-            // We consumed it successfully, read through a newline or the end of the forked lexer if we get there.
-
-            // First determine if this is a doc comment of some kind.
-            let is_inner_doc: bool = fork.matches("/") && !fork.matches("//");
-            let is_outer_doc: bool = fork.matches("!");
-
-            // The consume until a newline, carraige return, or the end of the source fragment.
-            while !fork.remaining.is_empty() && !fork.matches("\r") && !fork.matches("\n") {
-                fork.consume_any();
-            }
-
-            // Determine the kind of token to produce (if any).
-            let variant: Option<TokenTy> = match (is_inner_doc, is_outer_doc) {
-                (true, false) => Some(TokenTy::InnerDocComment),
-                (false, true) => Some(TokenTy::OuterDocComment),
-                (false, false) => None,
-                (true, true) => {
-                    unreachable!("Lexer should not match multiple comment types at once.")
-                }
-            };
-
-            // Map the variant to a token to return.
-            let token: Option<Token> = variant.map(|kind| {
-                // Get the number of bytes we have consumed using `offset_from`.
-                let bytes_consumed: usize = fork.remaining.offset_from(&self.remaining);
-                // Split this token from `self` rather than `fork` since self is still in an unmodified position.
-                self.split_token(bytes_consumed, kind)
-            });
-
-            // Update this lexer to match the state of the forked lexer.
-            *self = fork;
-            // Consume any outstanding whitespace.
-            self.ignore_whitespace();
-            // Return any token produced.
-            return token;
-        }
-
-        // If there was no comment prefix, there is no comment immediately available.
-        None
+    /// Advance this lexer by the specified number of bytes. 
+    /// 
+    /// # Panics
+    /// - If the lexer is not on a unicode character boundary after advancing. 
+    /// - If the number of bytes is greater than the length of the [remaining](Lexer::remaining) fragment. 
+    fn advance(&mut self, bytes: usize) {
+        self.remaining.inner = &self.remaining.inner[bytes..];
     }
 
-    /// Attempt to read/consume a multi-line comment from the start of the `remaining` fragment.
-    fn handle_multi_line_comment(&mut self) -> Option<Token<'src>> {
-        // Handle corner cases here so we don't have to below.
-        // These are both considered empty non-documenting comments.
-        if self.consume("/***/") {
-            return None;
-        }
-
-        if self.consume("/**/") {
-            return None;
-        }
-
-        // Make a fork of the lexer to avoid modifying this lexer if we fail to parse.
-        let mut fork: Self = self.fork();
-
-        // Try to parse the start of a multi-line comment.
-        if fork.consume(MULTI_LINE_COMMENT_START) {
-            // Check if this is a doc comment.
-            let is_outer_doc: bool = fork.matches("!");
-            // Use this to indicate that more than one following asterix is not a doc comment.
-            let is_inner_doc: bool = fork.matches("*") && !fork.matches("**");
-
-            // Consume until we see the end of the doc comment. If we run out of characters, consider the
-            // comment unterminated.
-            while !fork.matches(MULTI_LINE_COMMENT_END) {
-                // Handle nested comments here:
-                if fork.matches(MULTI_LINE_COMMENT_START) {
-                    // Discard the output -- don't care about doc comments in other comments.
-                    fork.handle_multi_line_comment();
-                    continue;
-                }
-
-                // Handle unterminated comments here.
-                if fork.remaining.is_empty() {
-                    // If we have not hit a "*/" before the end of the input, return an unterminated block comment.
-                    let bytes_consumed: usize = fork.remaining.offset_from(&self.remaining);
-                    // Split the token and return it.
-                    return Some(
-                        self.split_token(bytes_consumed, TokenTy::UnterminatedBlockComment),
-                    );
-                }
-
-                // If there's still input, and not a nested comment, consume it.
-                fork.consume_any();
-            }
-
-            // If we get here, the comment was terminated. Consume the terminating characters, and return.
-            // Use debug assert here to make sure that the comment is actually terminated.
-            debug_assert!(fork.consume(MULTI_LINE_COMMENT_END), "comment is actually terminated");
-
-            // Determine the kind of token to produce (if any).
-            let variant: Option<TokenTy> = match (is_inner_doc, is_outer_doc) {
-                (true, false) => Some(TokenTy::InnerBlockDocComment),
-                (false, true) => Some(TokenTy::OuterBlockDocComment),
-                (false, false) => None,
-                (true, true) => {
-                    unreachable!("Lexer should not match multiple comment types at once.")
-                }
-            };
-
-            // Make the token to return.
-            let token: Option<Token> = variant.map(|kind| {
-                // Get the number of bytes we have consumed using `offset_from`.
-                let bytes_consumed: usize = fork.remaining.offset_from(&self.remaining);
-                // Split this token from `self` rather than `fork` since self is still in an unmodified position.
-                self.split_token(bytes_consumed, kind)
-            });
-
-            // Update this lexer to match the state of the fork.
-            *self = fork;
-            // Return token if there was one.
-            return token;
-        }
-
-        // If the fork did not consume a multi-line comment start, return None and do
-        // not update this lexer.
-        None
+    /// Unsafe version of [Lexer::advance]. 
+    /// Advances this lexer by the specified number of bytes.
+    /// 
+    /// # Safety
+    /// - This lexer will be left in an invalid/undefined state if the number of bytes is greater than the length
+    ///     of the [Lexer::remaining] fragment.
+    /// - This lexer will be left in an invalid/undefined state if after advancing, the next byte in the 
+    ///     [Lexer::remaining] fragment is not the start of a unicode code point. 
+    unsafe fn advance_unchecked(&mut self, bytes: usize) {
+        self.remaining.inner = self.remaining.inner.get_unchecked(bytes..);
     }
 
     /// Get the next token from the lexer.
@@ -308,40 +202,28 @@ impl<'src> Lexer<'src> {
             return None;
         }
 
-        // Grab a copy of the initial lexer to compare and check when progress has been made.
-        let initial_lexer: Self = self.fork();
-
-        // Attempt to parse a single line comment. Return it if it's documentation.
-        // Rerun this function if there was a comment and it was ignored successfully.
-        match self.handle_single_line_comment() {
-            // There was a single line comment ignored or no single line comment.
-            None => {
-                // Check if the remaining fragment changed.
-                if !self.remaining.ptr_eq(&initial_lexer.remaining) {
-                    // If so, re-run this function.
+        // Attempt to parse a single line comment and then attempt a multi-line comment. 
+        for comment_match_fn in [try_match_single_line_comment, try_match_block_comment] {
+            // Attempt to parse a comment using the given match function. Return it if it's documentation or unterminated.
+            // Get a new token and return that if there was a comment and it was ignored successfully.
+            match (comment_match_fn)(self) {
+                // A comment was parsed, consume and return it. 
+                (bytes, Some(comment_variant)) => {
+                    // Split the token.
+                    let token: Token = self.split_token(bytes, comment_variant);
+                    // Return it.
+                    return Some(token);
+                },
+
+                // There was a comment, advance the lexer and ignore it. Re-start this function. 
+                (bytes @ 1.., None) => {
+                    self.advance(bytes);
                     return self.next_token();
                 }
 
-                // If the lexer was unchanged, then there was no comment -- keep trying to match tokens.
-            }
-
-            // If there was some token, return it.
-            token => return token,
-        }
-
-        // Try to handle a multi-line comment if there is one.
-        match self.handle_multi_line_comment() {
-            // There was an ignored comment or no comment.
-            None => {
-                // If the lexer was changed, restart this function.
-                if !self.remaining.ptr_eq(&initial_lexer.remaining) {
-                    return self.next_token();
-                }
+                // There was no comment, keep trying to match other tokens. 
+                (0, None) => {},
             }
-
-            // If there was a block style doc-comment, or an unterminated multi-line comment
-            // return.
-            token => return token,
         }
 
         // Handle a trivial token if there is one.
diff --git a/wright/src/parser/lexer/comments.rs b/wright/src/parser/lexer/comments.rs
new file mode 100644
index 00000000..3045ba28
--- /dev/null
+++ b/wright/src/parser/lexer/comments.rs
@@ -0,0 +1,133 @@
+//! Implementation of comment token lexing.
+
+use super::{token::TokenTy, Lexer};
+
+/// The pattern that begins any single line comments (including doc comments).
+pub const SINGLE_LINE_COMMENT_PREFIX: &str = "//";
+
+/// The pattern that starts any multi-line comments (including doc comments).
+pub const MULTI_LINE_COMMENT_START: &str = "/*";
+
+/// The pattern that ends any multi-line comments (including doc comments).
+pub const MULTI_LINE_COMMENT_END: &str = "*/";
+
+/// Attempt to match a sinlgle line comment from the start of the [Lexer::remaining] fragment.
+/// Return a [usize] and optionally a [TokenTy]. The [usize] indicates how many bytes were in the comment.
+/// The [TokenTy] (if it's not [None]) should be either [TokenTy::InnerDocComment] or [TokenTy::OuterDocComment].
+///
+/// If the [TokenTy] is not [None], the lexer should consume the specified number of bytes (by the [usize]) and
+/// Produce a token with the [variant](super::token::Token::variant) from this function.
+/// 
+/// Generally I'm trying to follow the [rust comment spec] here.
+///
+/// [rust comment spec]: https://doc.rust-lang.org/reference/comments.html
+pub fn try_match_single_line_comment(lexer: &Lexer) -> (usize, Option<TokenTy>) {
+    // Fork the lexer so we can do all the parsing on the fork without worrying about modifying the original
+    // unnecessarily.
+    let mut fork: Lexer = lexer.fork();
+
+    // Try to consume the single line comment prefix from the fork.
+    if fork.consume(SINGLE_LINE_COMMENT_PREFIX) {
+        // We consumed it successfully, read through a newline or the end of the forked lexer if we get there.
+
+        // First determine if this is a doc comment of some kind.
+        let is_inner_doc: bool = fork.matches("/") && !fork.matches("//");
+        let is_outer_doc: bool = fork.matches("!");
+
+        // The consume until a newline, carraige return, or the end of the source fragment.
+        while !fork.remaining.is_empty() && !fork.matches("\r") && !fork.matches("\n") {
+            fork.consume_any();
+        }
+
+        // Determine the kind of token to produce (if any).
+        let variant: Option<TokenTy> = match (is_inner_doc, is_outer_doc) {
+            (true, false) => Some(TokenTy::InnerDocComment),
+            (false, true) => Some(TokenTy::OuterDocComment),
+            (false, false) => None,
+            (true, true) => unreachable!("It is impossible for the `remaining` fragment to start with an `!` and a `/` simultaneously.")
+        };
+
+        // Return the number of bytes consumed and the type of token to
+        // produce if any.
+        return (fork.offset_from(lexer), variant);
+    }
+
+    // If the single line comment prefix was not immediately available, there is no comment.
+    (0, None)
+}
+
+/// Attempt to match a block comment from the start of the [Lexer::remaining] fragment.
+/// Return a [usize] and optionally a [TokenTy]. The [usize] indicates how many bytes were in the comment.
+/// The [TokenTy] (if it's not [None]) should be [TokenTy::InnerBlockDocComment], [TokenTy::OuterBlockDocComment], or
+/// [TokenTy::UnterminatedBlockComment].
+///
+/// If the [TokenTy] is not [None], the lexer should consume the specified number of bytes (by the [usize]) and
+/// Produce a token with the [variant](super::token::Token::variant) from this function.
+pub fn try_match_block_comment(lexer: &Lexer) -> (usize, Option<TokenTy>) {
+    // Handle corner cases here so we don't have to below.
+    // These are both considered empty non-documenting comments.
+    if lexer.matches("/***/") {
+        return (5, None);
+    }
+
+    if lexer.matches("/**/") {
+        return (4, None);
+    }
+
+    // Make a fork of the lexer to avoid modifying this lexer if we fail to parse.
+    let mut fork: Lexer = lexer.fork();
+
+    // Try to parse the start of a multi-line comment.
+    if fork.consume(MULTI_LINE_COMMENT_START) {
+        // Check if this is a doc comment.
+        let is_outer_doc: bool = fork.matches("!");
+        // Use this to indicate that more than one following asterix is not a doc comment.
+        let is_inner_doc: bool = fork.matches("*") && !fork.matches("**");
+
+        // Consume until we see the end of the doc comment. If we run out of characters, consider the
+        // comment unterminated.
+        while !fork.matches(MULTI_LINE_COMMENT_END) {
+            // Handle nested comments here:
+            if fork.matches(MULTI_LINE_COMMENT_START) {
+                // Discard the output -- don't care about doc comments in other comments.
+                let (nested_comment_bytes, _) = try_match_block_comment(&fork);
+
+                // SAFETY: the return from this function should never be on a char boundary or out of bounds.
+                // This is because the return value is always either 0 or calculated using `offset_from`. 
+                unsafe { fork.advance_unchecked(nested_comment_bytes) };
+
+                // Restart the loop to keep consuming this comment.
+                continue;
+            }
+
+            // Handle unterminated comments here.
+            if fork.remaining.is_empty() {
+                // If we have not hit a "*/" before the end of the input, return an unterminated block comment.
+                let bytes_consumed: usize = fork.offset_from(lexer);
+                return (bytes_consumed, Some(TokenTy::UnterminatedBlockComment));
+            }
+
+            // If there's still input, and not a nested comment, consume it.
+            fork.consume_any();
+        }
+
+        // If we get here, the comment was terminated. Consume the terminating characters, and return.
+        // Use debug assert here to make sure that the comment is actually terminated.
+        let consumed_comment_terminator: bool = fork.consume(MULTI_LINE_COMMENT_END);
+        debug_assert!(consumed_comment_terminator, "comment is actually terminated");
+
+        // Determine the kind of token to produce (if any).
+        let variant: Option<TokenTy> = match (is_inner_doc, is_outer_doc) {
+            (true, false) => Some(TokenTy::InnerBlockDocComment),
+            (false, true) => Some(TokenTy::OuterBlockDocComment),
+            (false, false) => None,
+            (true, true) => {
+                unreachable!("Lexer should not match multiple comment types at once.")
+            }
+        };
+
+        return (fork.offset_from(lexer), variant);
+    }
+
+    (0, None)
+}

From 99f19c81cea9d222fa65f817eff8f9f594ae722e Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Mon, 26 Feb 2024 01:22:13 -0500
Subject: [PATCH 42/60] cargo fmt

---
 wright/src/parser/lexer.rs          | 40 ++++++++++++++---------------
 wright/src/parser/lexer/comments.rs |  4 +--
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 601d8233..baa354a2 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -12,9 +12,9 @@ use std::{iter::Peekable, ptr};
 use token::{Token, TokenTy};
 use unicode_ident::{is_xid_continue, is_xid_start};
 
+pub mod comments;
 pub mod token;
 pub mod trivial;
-pub mod comments;
 
 /// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language.
 #[derive(Debug, Clone, Copy)]
@@ -112,13 +112,13 @@ impl<'src> Lexer<'src> {
         *self
     }
 
-    /// Get the number of bytes between the origin's [remaining](Lexer::remaining) and 
-    /// this [Lexer]'s [remaining](Lexer::remaining) using [`Fragment::offset_from`]. 
-    /// 
+    /// Get the number of bytes between the origin's [remaining](Lexer::remaining) and
+    /// this [Lexer]'s [remaining](Lexer::remaining) using [`Fragment::offset_from`].
+    ///
     /// # Panics
     /// - This function panics under the same conditions as [`Fragment::offset_from`].
-    /// - Generally the best way to avoid panics is to only call this function on 
-    ///     [Lexer]s created using [Lexer::fork] on the `origin` lexer. 
+    /// - Generally the best way to avoid panics is to only call this function on
+    ///     [Lexer]s created using [Lexer::fork] on the `origin` lexer.
     #[inline]
     fn offset_from(&self, origin: &Self) -> usize {
         self.remaining.offset_from(&origin.remaining)
@@ -171,23 +171,23 @@ impl<'src> Lexer<'src> {
         }
     }
 
-    /// Advance this lexer by the specified number of bytes. 
-    /// 
+    /// Advance this lexer by the specified number of bytes.
+    ///
     /// # Panics
-    /// - If the lexer is not on a unicode character boundary after advancing. 
-    /// - If the number of bytes is greater than the length of the [remaining](Lexer::remaining) fragment. 
+    /// - If the lexer is not on a unicode character boundary after advancing.
+    /// - If the number of bytes is greater than the length of the [remaining](Lexer::remaining) fragment.
     fn advance(&mut self, bytes: usize) {
         self.remaining.inner = &self.remaining.inner[bytes..];
     }
 
-    /// Unsafe version of [Lexer::advance]. 
+    /// Unsafe version of [Lexer::advance].
     /// Advances this lexer by the specified number of bytes.
-    /// 
+    ///
     /// # Safety
     /// - This lexer will be left in an invalid/undefined state if the number of bytes is greater than the length
     ///     of the [Lexer::remaining] fragment.
-    /// - This lexer will be left in an invalid/undefined state if after advancing, the next byte in the 
-    ///     [Lexer::remaining] fragment is not the start of a unicode code point. 
+    /// - This lexer will be left in an invalid/undefined state if after advancing, the next byte in the
+    ///     [Lexer::remaining] fragment is not the start of a unicode code point.
     unsafe fn advance_unchecked(&mut self, bytes: usize) {
         self.remaining.inner = self.remaining.inner.get_unchecked(bytes..);
     }
@@ -202,27 +202,27 @@ impl<'src> Lexer<'src> {
             return None;
         }
 
-        // Attempt to parse a single line comment and then attempt a multi-line comment. 
+        // Attempt to parse a single line comment and then attempt a multi-line comment.
         for comment_match_fn in [try_match_single_line_comment, try_match_block_comment] {
             // Attempt to parse a comment using the given match function. Return it if it's documentation or unterminated.
             // Get a new token and return that if there was a comment and it was ignored successfully.
             match (comment_match_fn)(self) {
-                // A comment was parsed, consume and return it. 
+                // A comment was parsed, consume and return it.
                 (bytes, Some(comment_variant)) => {
                     // Split the token.
                     let token: Token = self.split_token(bytes, comment_variant);
                     // Return it.
                     return Some(token);
-                },
+                }
 
-                // There was a comment, advance the lexer and ignore it. Re-start this function. 
+                // There was a comment, advance the lexer and ignore it. Re-start this function.
                 (bytes @ 1.., None) => {
                     self.advance(bytes);
                     return self.next_token();
                 }
 
-                // There was no comment, keep trying to match other tokens. 
-                (0, None) => {},
+                // There was no comment, keep trying to match other tokens.
+                (0, None) => {}
             }
         }
 
diff --git a/wright/src/parser/lexer/comments.rs b/wright/src/parser/lexer/comments.rs
index 3045ba28..dd89856a 100644
--- a/wright/src/parser/lexer/comments.rs
+++ b/wright/src/parser/lexer/comments.rs
@@ -17,7 +17,7 @@ pub const MULTI_LINE_COMMENT_END: &str = "*/";
 ///
 /// If the [TokenTy] is not [None], the lexer should consume the specified number of bytes (by the [usize]) and
 /// Produce a token with the [variant](super::token::Token::variant) from this function.
-/// 
+///
 /// Generally I'm trying to follow the [rust comment spec] here.
 ///
 /// [rust comment spec]: https://doc.rust-lang.org/reference/comments.html
@@ -93,7 +93,7 @@ pub fn try_match_block_comment(lexer: &Lexer) -> (usize, Option<TokenTy>) {
                 let (nested_comment_bytes, _) = try_match_block_comment(&fork);
 
                 // SAFETY: the return from this function should never be on a char boundary or out of bounds.
-                // This is because the return value is always either 0 or calculated using `offset_from`. 
+                // This is because the return value is always either 0 or calculated using `offset_from`.
                 unsafe { fork.advance_unchecked(nested_comment_bytes) };
 
                 // Restart the loop to keep consuming this comment.

From 92b5d867c966d3ab06d2caa736eb878b3317f0d2 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Wed, 13 Mar 2024 00:36:17 -0400
Subject: [PATCH 43/60] Refactor identifier/keywords to their own file

---
 wright/src/parser/lexer.rs            | 74 ++-----------------------
 wright/src/parser/lexer/identifier.rs | 78 +++++++++++++++++++++++++++
 2 files changed, 83 insertions(+), 69 deletions(-)
 create mode 100644 wright/src/parser/lexer/identifier.rs

diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index baa354a2..e9c69019 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -10,11 +10,11 @@ use std::iter::FusedIterator;
 use std::str::Chars;
 use std::{iter::Peekable, ptr};
 use token::{Token, TokenTy};
-use unicode_ident::{is_xid_continue, is_xid_start};
 
 pub mod comments;
 pub mod token;
 pub mod trivial;
+pub mod identifier;
 
 /// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language.
 #[derive(Debug, Clone, Copy)]
@@ -36,46 +36,6 @@ impl<'src> Lexer<'src> {
         }
     }
 
-    /// Try to match a fragment recognized to be an identifier or keyword to
-    /// a keyword or return [TokenTy::Identifier].
-    fn identifier_or_keyword(fragment: Fragment<'src>) -> TokenTy {
-        use TokenTy::*;
-
-        match fragment.inner {
-            "record" => KwRecord,
-            "type" => KwType,
-            "enum" => KwEnum,
-            "union" => KwUnion,
-            "func" => KwFunc,
-            "repr" => KwRepr,
-            "impl" => KwImpl,
-            "constraint" => KwConstraint,
-            "references" => KwReferences,
-            "trait" => KwTrait,
-            "const" => KwConst,
-            "where" => KwWhere,
-
-            "use" => KwUse,
-            "as" => KwAs,
-            "mod" => KwMod,
-
-            "if" => KwIf,
-            "else" => KwElse,
-
-            "for" => KwFor,
-            "in" => KwIn,
-            "while" => KwWhile,
-            "loop" => KwLoop,
-
-            "true" => KwTrue,
-            "false" => KwFalse,
-
-            "_" => Underscore,
-
-            _ => Identifier,
-        }
-    }
-
     /// Make a token by splitting a given number of bytes off of the `self.remaining` fragment
     /// and labeling them with the given kind.
     ///
@@ -93,7 +53,7 @@ impl<'src> Lexer<'src> {
 
     /// Unsafe version of [Lexer::split_token].
     ///
-    /// # Safety:
+    /// # Safety
     /// - This function matches the safety guarantees of [Fragment::split_at_unchecked].
     unsafe fn split_token_unchecked(&mut self, bytes: usize, kind: TokenTy) -> Token<'src> {
         let (token_fragment, new_remaining_fragment) = self.remaining.split_at_unchecked(bytes);
@@ -232,32 +192,8 @@ impl<'src> Lexer<'src> {
         }
 
         // Next attempt to match a keyword or identifier.
-        {
-            let mut chars: Chars = self.remaining.chars();
-            // The unsafe is fine here -- we've established that this lexer has bytes remaining.
-            let next: char = unsafe { chars.next().unwrap_unchecked() };
-
-            if is_xid_start(next) || next == '_' {
-                let mut bytes_consumed: usize = next.len_utf8();
-
-                // Take remaining chars and add to sum.
-                bytes_consumed += chars
-                    .take_while(|c| is_xid_continue(*c))
-                    .map(char::len_utf8)
-                    .sum::<usize>();
-
-                // Split the number of bytes we consumed.
-                let (ident_frag, new_remaining) = self.remaining.split_at(bytes_consumed);
-                // Get the token kind to produce for this fragment.
-                let variant = Lexer::identifier_or_keyword(ident_frag);
-                // Update the lexers remaining fragment.
-                self.remaining = new_remaining;
-                // Return the identifier, keyword, or underscore.
-                return Some(Token {
-                    variant,
-                    fragment: ident_frag,
-                });
-            }
+        if let Some(token) = identifier::try_consume_keyword_or_identifier(self) {
+            return Some(token);
         }
 
         // Next attempt to parse a numerical literal.
@@ -313,7 +249,7 @@ impl<'src> Iterator for Lexer<'src> {
     }
 
     fn size_hint(&self) -> (usize, Option<usize>) {
-        // Lexers cannot return multiple tokens for a single byte.
+        // Lexers should not return multiple tokens for a single byte.
         (0, Some(self.bytes_remaining()))
     }
 }
diff --git a/wright/src/parser/lexer/identifier.rs b/wright/src/parser/lexer/identifier.rs
new file mode 100644
index 00000000..d950e062
--- /dev/null
+++ b/wright/src/parser/lexer/identifier.rs
@@ -0,0 +1,78 @@
+//! Implementation related to parsing keywords and identifiers. 
+
+use std::str::Chars;
+use unicode_ident::{is_xid_continue, is_xid_start};
+use crate::parser::fragment::Fragment;
+use super::{token::Token, Lexer, token::TokenTy};
+
+/// Try to match a fragment recognized to be an identifier or keyword to
+/// a keyword or return [TokenTy::Identifier].
+fn identifier_or_keyword<'src>(fragment: Fragment<'src>) -> TokenTy {
+    use TokenTy::*;
+
+    match fragment.inner {
+        "record" => KwRecord,
+        "type" => KwType,
+        "enum" => KwEnum,
+        "union" => KwUnion,
+        "func" => KwFunc,
+        "repr" => KwRepr,
+        "impl" => KwImpl,
+        "constraint" => KwConstraint,
+        "references" => KwReferences,
+        "trait" => KwTrait,
+        "const" => KwConst,
+        "where" => KwWhere,
+
+        "use" => KwUse,
+        "as" => KwAs,
+        "mod" => KwMod,
+
+        "if" => KwIf,
+        "else" => KwElse,
+
+        "for" => KwFor,
+        "in" => KwIn,
+        "while" => KwWhile,
+        "loop" => KwLoop,
+
+        "true" => KwTrue,
+        "false" => KwFalse,
+
+        "_" => Underscore,
+
+        _ => Identifier,
+    }
+}
+
+/// Attempt to consume a keyword/[identifier](TokenTy::Identifier)/[underscore](TokenTy::Underscore) from the lexer.
+pub fn try_consume_keyword_or_identifier<'src>(lexer: &mut Lexer<'src>) -> Option<Token<'src>> {
+    // Get a character iterator that we can pull from. 
+    let mut chars: Chars = lexer.remaining.chars();
+    // Get the next character from the iterator, consider it the first char of any potential match.
+    // Make sure it's a valid identifier start (includes start to all keywords) or is an underscore. 
+    // If it does not exist or match predicates, return None. 
+    let next: char = chars.next().filter(|c| is_xid_start(*c) || *c == '_')?;
+    // Store/track the number of bytes consumed so far.
+    let mut bytes_consumed: usize = next.len_utf8();
+
+    // Take remaining chars and add to sum.
+    bytes_consumed += chars
+        .take_while(|c| is_xid_continue(*c))
+        .map(char::len_utf8)
+        .sum::<usize>();
+
+    // Split the token and the new remaining fragment. 
+    // SAFETY: The character iterator should guaruntee that we land on a valid character boundary within the bounds 
+    // of the fragment. 
+    let (token_fragment, new_remaining): (Fragment, Fragment) = unsafe { 
+        lexer.remaining.split_at_unchecked(bytes_consumed) 
+    };
+
+    // Get the variant of token to produce. 
+    let variant: TokenTy = identifier_or_keyword(token_fragment);
+    // Update the lexer's remaining fragment. 
+    lexer.remaining = new_remaining;
+    // Return the token. 
+    return Some(Token { variant, fragment: token_fragment });
+}

From 161e660c2b13e80ae1dfe3db25dc87b9cd234681 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Wed, 13 Mar 2024 00:54:51 -0400
Subject: [PATCH 44/60] Refactor integer literals and tests

---
 wright/src/parser/lexer.rs                 | 96 ++--------------------
 wright/src/parser/lexer/comments.rs        | 12 +++
 wright/src/parser/lexer/identifier.rs      | 13 +++
 wright/src/parser/lexer/integer_literal.rs | 56 +++++++++++++
 wright/src/parser/lexer/trivial.rs         | 28 +++++++
 5 files changed, 115 insertions(+), 90 deletions(-)
 create mode 100644 wright/src/parser/lexer/integer_literal.rs

diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index e9c69019..e9967c11 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -4,17 +4,19 @@
 //! defined for tokens.
 
 use self::comments::{try_match_block_comment, try_match_single_line_comment};
+use self::integer_literal::try_consume_integer_literal;
 
 use super::fragment::Fragment;
 use std::iter::FusedIterator;
 use std::str::Chars;
-use std::{iter::Peekable, ptr};
+use std::ptr;
 use token::{Token, TokenTy};
 
 pub mod comments;
 pub mod token;
 pub mod trivial;
 pub mod identifier;
+pub mod integer_literal;
 
 /// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language.
 #[derive(Debug, Clone, Copy)]
@@ -196,41 +198,9 @@ impl<'src> Lexer<'src> {
             return Some(token);
         }
 
-        // Next attempt to parse a numerical literal.
-        {
-            let mut chars: Peekable<Chars> = self.remaining.chars().peekable();
-            // The unsafe is fine here -- we've established that this lexer has bytes remaining.
-            let next: char = unsafe { chars.next().unwrap_unchecked() };
-
-            if next.is_ascii_digit() {
-                // Accumulate the number of bytes consumed in the numeric literal.
-                let mut acc: usize = 1;
-                // Track the radix
-                let mut radix: u32 = 10;
-
-                // Change the radix if necessary
-                if next == '0' {
-                    if let Some(prefix) = chars.next_if(|x| ['x', 'o', 'b', 'X', 'B'].contains(x)) {
-                        // All the possible prefix chars are 1 byte ascii characters.
-                        acc += 1;
-
-                        radix = match prefix {
-                            'x' | 'X' => 16,
-                            'b' | 'B' => 2,
-                            'o' => 8,
-                            _ => unreachable!("the prefix byte is checked above"),
-                        };
-                    }
-                }
-
-                // Add the rest of the integer literal.
-                acc += chars
-                    .take_while(|c| c.is_digit(radix) || *c == '_')
-                    .map(char::len_utf8)
-                    .sum::<usize>();
-
-                return Some(self.split_token(acc, TokenTy::IntegerLiteral));
-            }
+        // Next attempt to parse an integer literal.
+        if let Some(integer_lit) = try_consume_integer_literal(self) {
+            return Some(integer_lit);
         }
 
         // If we haven't matched at this point, produce a token marked as "Unknown".
@@ -256,57 +226,3 @@ impl<'src> Iterator for Lexer<'src> {
 
 // Lexers are fused -- they cannot generate tokens infinitely.
 impl<'src> FusedIterator for Lexer<'src> {}
-
-#[cfg(test)]
-mod tests {
-    use super::Lexer;
-    use crate::parser::lexer::TokenTy;
-
-    #[test]
-    fn plus_and_plus_eq_tokens() {
-        let mut plus = Lexer::new("+");
-        let mut plus_eq = Lexer::new("+=");
-
-        let plus_token = plus.next_token().unwrap();
-        let plus_eq_token = plus_eq.next_token().unwrap();
-
-        assert_eq!(plus.bytes_remaining(), 0);
-        assert_eq!(plus_eq.bytes_remaining(), 0);
-        assert_eq!(plus_token.variant, TokenTy::Plus);
-        assert_eq!(plus_eq_token.variant, TokenTy::PlusEq);
-    }
-
-    #[test]
-    fn plus_one_token() {
-        let mut plus_one = Lexer::new("+1");
-        let plus_token = plus_one.next_token().unwrap();
-        assert_eq!(plus_one.bytes_remaining(), 1);
-        assert_eq!(plus_token.variant, TokenTy::Plus);
-        assert_eq!(plus_token.fragment.len(), 1);
-    }
-
-    #[test]
-    fn identifiers_and_keywords() {
-        let mut lexer = Lexer::new("const TEST");
-
-        assert_eq!(lexer.next_token().unwrap().variant, TokenTy::KwConst);
-        assert_eq!(lexer.next_token().unwrap().variant, TokenTy::Identifier);
-    }
-
-    #[test]
-    fn intger_literal() {
-        let mut lexer = Lexer::new("123_456_789.");
-
-        let token = lexer.next_token().unwrap();
-
-        assert_eq!(token.fragment.inner, "123_456_789");
-        assert_eq!(token.variant, TokenTy::IntegerLiteral);
-    }
-
-    #[test]
-    fn ignored_single_line_comment() {
-        let mut lexer = Lexer::new("// test comment ");
-        assert!(lexer.next_token().is_none());
-        assert_eq!(lexer.remaining.len(), 0);
-    }
-}
diff --git a/wright/src/parser/lexer/comments.rs b/wright/src/parser/lexer/comments.rs
index dd89856a..bca7e23d 100644
--- a/wright/src/parser/lexer/comments.rs
+++ b/wright/src/parser/lexer/comments.rs
@@ -131,3 +131,15 @@ pub fn try_match_block_comment(lexer: &Lexer) -> (usize, Option<TokenTy>) {
 
     (0, None)
 }
+
+#[cfg(test)]
+mod tests {
+    use super::Lexer;
+    
+    #[test]
+    fn ignored_single_line_comment() {
+        let mut lexer = Lexer::new("// test comment ");
+        assert!(lexer.next_token().is_none());
+        assert_eq!(lexer.remaining.len(), 0);
+    }
+}
diff --git a/wright/src/parser/lexer/identifier.rs b/wright/src/parser/lexer/identifier.rs
index d950e062..d3fa29d1 100644
--- a/wright/src/parser/lexer/identifier.rs
+++ b/wright/src/parser/lexer/identifier.rs
@@ -76,3 +76,16 @@ pub fn try_consume_keyword_or_identifier<'src>(lexer: &mut Lexer<'src>) -> Optio
     // Return the token. 
     return Some(Token { variant, fragment: token_fragment });
 }
+
+#[cfg(test)]
+mod tests {
+    use super::{Lexer, TokenTy};
+
+    #[test]
+    fn identifiers_and_keywords() {
+        let mut lexer = Lexer::new("const TEST");
+
+        assert_eq!(lexer.next_token().unwrap().variant, TokenTy::KwConst);
+        assert_eq!(lexer.next_token().unwrap().variant, TokenTy::Identifier);
+    }
+}
diff --git a/wright/src/parser/lexer/integer_literal.rs b/wright/src/parser/lexer/integer_literal.rs
new file mode 100644
index 00000000..2479565a
--- /dev/null
+++ b/wright/src/parser/lexer/integer_literal.rs
@@ -0,0 +1,56 @@
+//! Implementation for lexing integer literals. 
+
+use std::{iter::Peekable, str::Chars};
+use super::{token::{Token, TokenTy}, Lexer};
+
+/// Attempt to lex and consume an [TokenTy::IntegerLiteral] from the lexer.
+pub fn try_consume_integer_literal<'src>(lexer: &mut Lexer<'src>) -> Option<Token<'src>> {
+    // Make a peekable character iterator. 
+    let mut chars: Peekable<Chars> = lexer.remaining.chars().peekable();
+    // Get the first character from the iterator. We can only continue lexing if one exists and is an ascii 
+    // decimal digit. 
+    let next: char = chars.next().filter(char::is_ascii_digit)?;
+    // Track the number of bytes consumed. We use the length of the parsed first char here but we could probably 
+    // assume it to be 1. 
+    let mut bytes_consumed: usize = next.len_utf8();
+    // Track the radix
+    let mut radix: u32 = 10;
+
+    // Change the radix if necessary
+    if next == '0' {
+        if let Some(prefix) = chars.next_if(|x| ['x', 'o', 'b', 'X', 'B'].contains(x)) {
+            // All the possible prefix chars are 1 byte ascii characters.
+            bytes_consumed += 1;
+
+            radix = match prefix {
+                'x' | 'X' => 16,
+                'b' | 'B' => 2,
+                'o' => 8,
+                _ => unreachable!("the prefix byte is checked above"),
+            };
+        }
+    }
+
+    // Add the rest of the integer literal.
+    bytes_consumed += chars
+        .take_while(|c| c.is_digit(radix) || *c == '_')
+        .map(char::len_utf8)
+        .sum::<usize>();
+
+    return Some(lexer.split_token(bytes_consumed, TokenTy::IntegerLiteral));
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{TokenTy, Lexer};
+
+    #[test]
+    fn integer_literal() {
+        let mut lexer = Lexer::new("123_456_789.");
+
+        let token = lexer.next_token().unwrap();
+
+        assert_eq!(token.fragment.inner, "123_456_789");
+        assert_eq!(token.variant, TokenTy::IntegerLiteral);
+    }
+}
diff --git a/wright/src/parser/lexer/trivial.rs b/wright/src/parser/lexer/trivial.rs
index 1e2f52a1..a0c2445f 100644
--- a/wright/src/parser/lexer/trivial.rs
+++ b/wright/src/parser/lexer/trivial.rs
@@ -105,3 +105,31 @@ pub fn try_consume_trivial_token<'src>(lexer: &mut Lexer<'src>) -> Option<Token<
     // If nothing else has matched, there is no trivial token available.
     None
 }
+
+#[cfg(test)]
+mod tests {
+    use super::{Lexer, TokenTy};
+
+    #[test]
+    fn plus_and_plus_eq_tokens() {
+        let mut plus = Lexer::new("+");
+        let mut plus_eq = Lexer::new("+=");
+
+        let plus_token = plus.next_token().unwrap();
+        let plus_eq_token = plus_eq.next_token().unwrap();
+
+        assert_eq!(plus.bytes_remaining(), 0);
+        assert_eq!(plus_eq.bytes_remaining(), 0);
+        assert_eq!(plus_token.variant, TokenTy::Plus);
+        assert_eq!(plus_eq_token.variant, TokenTy::PlusEq);
+    }
+
+    #[test]
+    fn plus_one_token() {
+        let mut plus_one = Lexer::new("+1");
+        let plus_token = plus_one.next_token().unwrap();
+        assert_eq!(plus_one.bytes_remaining(), 1);
+        assert_eq!(plus_token.variant, TokenTy::Plus);
+        assert_eq!(plus_token.fragment.len(), 1);
+    }
+}

From ddcc0a79fb31684a66dc9f8219f56c8783de7902 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Wed, 13 Mar 2024 00:55:28 -0400
Subject: [PATCH 45/60] satisfy clippy

---
 wright/src/parser/lexer/identifier.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/wright/src/parser/lexer/identifier.rs b/wright/src/parser/lexer/identifier.rs
index d3fa29d1..fc0cffdc 100644
--- a/wright/src/parser/lexer/identifier.rs
+++ b/wright/src/parser/lexer/identifier.rs
@@ -7,7 +7,7 @@ use super::{token::Token, Lexer, token::TokenTy};
 
 /// Try to match a fragment recognized to be an identifier or keyword to
 /// a keyword or return [TokenTy::Identifier].
-fn identifier_or_keyword<'src>(fragment: Fragment<'src>) -> TokenTy {
+fn identifier_or_keyword(fragment: Fragment) -> TokenTy {
     use TokenTy::*;
 
     match fragment.inner {
@@ -74,7 +74,7 @@ pub fn try_consume_keyword_or_identifier<'src>(lexer: &mut Lexer<'src>) -> Optio
     // Update the lexer's remaining fragment. 
     lexer.remaining = new_remaining;
     // Return the token. 
-    return Some(Token { variant, fragment: token_fragment });
+    Some(Token { variant, fragment: token_fragment })
 }
 
 #[cfg(test)]

From 3ce43b9bc8380bf0989fc45c4482f1285744fed9 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Wed, 13 Mar 2024 00:56:10 -0400
Subject: [PATCH 46/60] cargo fmt

---
 wright/src/parser/lexer.rs                 |  6 ++--
 wright/src/parser/lexer/comments.rs        |  2 +-
 wright/src/parser/lexer/identifier.rs      | 34 ++++++++++++----------
 wright/src/parser/lexer/integer_literal.rs | 19 +++++++-----
 4 files changed, 33 insertions(+), 28 deletions(-)

diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index e9967c11..dd5ddf1c 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -8,15 +8,15 @@ use self::integer_literal::try_consume_integer_literal;
 
 use super::fragment::Fragment;
 use std::iter::FusedIterator;
-use std::str::Chars;
 use std::ptr;
+use std::str::Chars;
 use token::{Token, TokenTy};
 
 pub mod comments;
-pub mod token;
-pub mod trivial;
 pub mod identifier;
 pub mod integer_literal;
+pub mod token;
+pub mod trivial;
 
 /// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language.
 #[derive(Debug, Clone, Copy)]
diff --git a/wright/src/parser/lexer/comments.rs b/wright/src/parser/lexer/comments.rs
index bca7e23d..cd1559f4 100644
--- a/wright/src/parser/lexer/comments.rs
+++ b/wright/src/parser/lexer/comments.rs
@@ -135,7 +135,7 @@ pub fn try_match_block_comment(lexer: &Lexer) -> (usize, Option<TokenTy>) {
 #[cfg(test)]
 mod tests {
     use super::Lexer;
-    
+
     #[test]
     fn ignored_single_line_comment() {
         let mut lexer = Lexer::new("// test comment ");
diff --git a/wright/src/parser/lexer/identifier.rs b/wright/src/parser/lexer/identifier.rs
index fc0cffdc..b1df9533 100644
--- a/wright/src/parser/lexer/identifier.rs
+++ b/wright/src/parser/lexer/identifier.rs
@@ -1,9 +1,9 @@
-//! Implementation related to parsing keywords and identifiers. 
+//! Implementation related to parsing keywords and identifiers.
 
+use super::{token::Token, token::TokenTy, Lexer};
+use crate::parser::fragment::Fragment;
 use std::str::Chars;
 use unicode_ident::{is_xid_continue, is_xid_start};
-use crate::parser::fragment::Fragment;
-use super::{token::Token, Lexer, token::TokenTy};
 
 /// Try to match a fragment recognized to be an identifier or keyword to
 /// a keyword or return [TokenTy::Identifier].
@@ -47,11 +47,11 @@ fn identifier_or_keyword(fragment: Fragment) -> TokenTy {
 
 /// Attempt to consume a keyword/[identifier](TokenTy::Identifier)/[underscore](TokenTy::Underscore) from the lexer.
 pub fn try_consume_keyword_or_identifier<'src>(lexer: &mut Lexer<'src>) -> Option<Token<'src>> {
-    // Get a character iterator that we can pull from. 
+    // Get a character iterator that we can pull from.
     let mut chars: Chars = lexer.remaining.chars();
     // Get the next character from the iterator, consider it the first char of any potential match.
-    // Make sure it's a valid identifier start (includes start to all keywords) or is an underscore. 
-    // If it does not exist or match predicates, return None. 
+    // Make sure it's a valid identifier start (includes start to all keywords) or is an underscore.
+    // If it does not exist or match predicates, return None.
     let next: char = chars.next().filter(|c| is_xid_start(*c) || *c == '_')?;
     // Store/track the number of bytes consumed so far.
     let mut bytes_consumed: usize = next.len_utf8();
@@ -62,19 +62,21 @@ pub fn try_consume_keyword_or_identifier<'src>(lexer: &mut Lexer<'src>) -> Optio
         .map(char::len_utf8)
         .sum::<usize>();
 
-    // Split the token and the new remaining fragment. 
-    // SAFETY: The character iterator should guaruntee that we land on a valid character boundary within the bounds 
-    // of the fragment. 
-    let (token_fragment, new_remaining): (Fragment, Fragment) = unsafe { 
-        lexer.remaining.split_at_unchecked(bytes_consumed) 
-    };
+    // Split the token and the new remaining fragment.
+    // SAFETY: The character iterator should guaruntee that we land on a valid character boundary within the bounds
+    // of the fragment.
+    let (token_fragment, new_remaining): (Fragment, Fragment) =
+        unsafe { lexer.remaining.split_at_unchecked(bytes_consumed) };
 
-    // Get the variant of token to produce. 
+    // Get the variant of token to produce.
     let variant: TokenTy = identifier_or_keyword(token_fragment);
-    // Update the lexer's remaining fragment. 
+    // Update the lexer's remaining fragment.
     lexer.remaining = new_remaining;
-    // Return the token. 
-    Some(Token { variant, fragment: token_fragment })
+    // Return the token.
+    Some(Token {
+        variant,
+        fragment: token_fragment,
+    })
 }
 
 #[cfg(test)]
diff --git a/wright/src/parser/lexer/integer_literal.rs b/wright/src/parser/lexer/integer_literal.rs
index 2479565a..435bdd40 100644
--- a/wright/src/parser/lexer/integer_literal.rs
+++ b/wright/src/parser/lexer/integer_literal.rs
@@ -1,17 +1,20 @@
-//! Implementation for lexing integer literals. 
+//! Implementation for lexing integer literals.
 
+use super::{
+    token::{Token, TokenTy},
+    Lexer,
+};
 use std::{iter::Peekable, str::Chars};
-use super::{token::{Token, TokenTy}, Lexer};
 
 /// Attempt to lex and consume an [TokenTy::IntegerLiteral] from the lexer.
 pub fn try_consume_integer_literal<'src>(lexer: &mut Lexer<'src>) -> Option<Token<'src>> {
-    // Make a peekable character iterator. 
+    // Make a peekable character iterator.
     let mut chars: Peekable<Chars> = lexer.remaining.chars().peekable();
-    // Get the first character from the iterator. We can only continue lexing if one exists and is an ascii 
-    // decimal digit. 
+    // Get the first character from the iterator. We can only continue lexing if one exists and is an ascii
+    // decimal digit.
     let next: char = chars.next().filter(char::is_ascii_digit)?;
-    // Track the number of bytes consumed. We use the length of the parsed first char here but we could probably 
-    // assume it to be 1. 
+    // Track the number of bytes consumed. We use the length of the parsed first char here but we could probably
+    // assume it to be 1.
     let mut bytes_consumed: usize = next.len_utf8();
     // Track the radix
     let mut radix: u32 = 10;
@@ -42,7 +45,7 @@ pub fn try_consume_integer_literal<'src>(lexer: &mut Lexer<'src>) -> Option<Toke
 
 #[cfg(test)]
 mod tests {
-    use super::{TokenTy, Lexer};
+    use super::{Lexer, TokenTy};
 
     #[test]
     fn integer_literal() {

From b5156094d20532b06c77bdc72484069b8e38ca07 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Wed, 13 Mar 2024 01:05:28 -0400
Subject: [PATCH 47/60] Add new token types and make some lexer fns pub

---
 wright/src/parser/lexer.rs       | 15 +++++++--------
 wright/src/parser/lexer/token.rs |  7 +++++--
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index dd5ddf1c..2e13436d 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -68,9 +68,8 @@ impl<'src> Lexer<'src> {
     }
 
     /// "Fork" this lexer, creating a new [`Lexer`] at the same position as this one that can be used for
-    /// failable parsing. This can be compared to the original lexer it was forked from using [Lexer::offset_from]
-    /// on the underlying `remaining` fragments.
-    fn fork(&self) -> Self {
+    /// failable parsing. This can be compared to the original lexer it was forked from using [Lexer::offset_from].
+    pub fn fork(&self) -> Self {
         *self
     }
 
@@ -82,12 +81,12 @@ impl<'src> Lexer<'src> {
     /// - Generally the best way to avoid panics is to only call this function on
     ///     [Lexer]s created using [Lexer::fork] on the `origin` lexer.
     #[inline]
-    fn offset_from(&self, origin: &Self) -> usize {
+    pub fn offset_from(&self, origin: &Self) -> usize {
         self.remaining.offset_from(&origin.remaining)
     }
 
-    /// Remove and ignore any whitespace at the start of the remaining fragment.
-    fn ignore_whitespace(&mut self) {
+    /// Remove and ignore any whitespace at the start of the [Lexer::remaining] [Fragment].
+    pub fn ignore_whitespace(&mut self) {
         // Get a reference to the slice of the string past any whitespace at the start.
         let without_whitespace: &str = self.remaining.inner.trim_start();
 
@@ -97,8 +96,8 @@ impl<'src> Lexer<'src> {
         }
     }
 
-    /// Check if a pattern matches at the start of the remaining fragment, and if so return the number of bytes.
-    fn matches(&self, pattern: &str) -> bool {
+    /// Check if a pattern matches at the start of the [Lexer::remaining] [Fragment].
+    pub fn matches(&self, pattern: &str) -> bool {
         self.remaining.inner.starts_with(pattern)
     }
 
diff --git a/wright/src/parser/lexer/token.rs b/wright/src/parser/lexer/token.rs
index f6a9f9e8..6749aa84 100644
--- a/wright/src/parser/lexer/token.rs
+++ b/wright/src/parser/lexer/token.rs
@@ -55,6 +55,8 @@ pub enum TokenTy {
     InnerDocComment, InnerBlockDocComment,
     
     /// Indicates a block style comment without termination. 
+    /// Separate from [TokenTy::InnerDocComment] and [TokenTy::OuterDocComment] to indicate that 
+    /// unterminated comments will be handled differently (produce errors eventually). 
     UnterminatedBlockComment,
 
     KwRecord,
@@ -82,8 +84,9 @@ pub enum TokenTy {
     KwWhere,
 
     IntegerLiteral,
-    StringLiteral, 
-    CharLiteral,
+    StringLiteral { terminated: bool }, 
+    FormatStringLiteral { terminated: bool },
+    CharLiteral { terminated: bool },
 
     /// Unknown character in lexer fragment. 
     Unknown

From 07813bc2d017b8960f5da4f2abed7894aca756a6 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Wed, 13 Mar 2024 01:27:52 -0400
Subject: [PATCH 48/60] String/char literals

---
 wright/src/parser/lexer.rs        |  7 ++++
 wright/src/parser/lexer/quoted.rs | 65 +++++++++++++++++++++++++++++++
 2 files changed, 72 insertions(+)
 create mode 100644 wright/src/parser/lexer/quoted.rs

diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 2e13436d..5d502f52 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -5,6 +5,7 @@
 
 use self::comments::{try_match_block_comment, try_match_single_line_comment};
 use self::integer_literal::try_consume_integer_literal;
+use self::quoted::try_consume_quoted_literal;
 
 use super::fragment::Fragment;
 use std::iter::FusedIterator;
@@ -17,6 +18,7 @@ pub mod identifier;
 pub mod integer_literal;
 pub mod token;
 pub mod trivial;
+pub mod quoted;
 
 /// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language.
 #[derive(Debug, Clone, Copy)]
@@ -202,6 +204,11 @@ impl<'src> Lexer<'src> {
             return Some(integer_lit);
         }
 
+        // Next attempt to parse a quoted literal. 
+        if let Some(quoted_lit) = try_consume_quoted_literal(self) {
+            return Some(quoted_lit);
+        }
+
         // If we haven't matched at this point, produce a token marked as "Unknown".
         // The unsafe is fine -- we know from above that there are remaining characters.
         let unknown_char = unsafe { self.remaining.chars().next().unwrap_unchecked() };
diff --git a/wright/src/parser/lexer/quoted.rs b/wright/src/parser/lexer/quoted.rs
new file mode 100644
index 00000000..2318e0db
--- /dev/null
+++ b/wright/src/parser/lexer/quoted.rs
@@ -0,0 +1,65 @@
+//! Quoted literals. 
+
+use std::str::Chars;
+use super::{token::Token, Lexer, token::TokenTy};
+
+/// Attempt to parse a quoted literal. This includes [TokenTy::StringLiteral], [TokenTy::CharLiteral], and 
+/// [TokenTy::FormatStringLiteral]. 
+pub fn try_consume_quoted_literal<'src>(lexer: &mut Lexer<'src>) -> Option<Token<'src>> {
+    // Make a chars iterator to lex from.
+    let mut chars: Chars = lexer.remaining.chars();
+    // Get the first char from the character iterator. 
+    // Return none if the first character doesn't exist or is not one of the quote terminating characters. 
+    let first: char = chars.next().filter(|c| ['\'', '"', '`'].contains(c))?;
+    // Track number of bytes consumed.
+    let mut bytes_consumed: usize = first.len_utf8();
+    // Track whether the quoted literal is terminated. 
+    let mut is_terminated: bool = false;
+
+    // Consume from the iterator while possible. 
+    while let Some(consumed) = chars.next() {
+        // Update the number of bytes consumed. 
+        bytes_consumed += consumed.len_utf8();
+
+        // Check if the character matches the starting char. 
+        // If so, record the literal as terminated and break this loop. 
+        if consumed == first {
+            is_terminated = true;
+            break;
+        }
+
+        // If the character we just consumed is a backslash. 
+        // We only handle escaped terminators here, rather than parsing actual meaning. 
+        // Consume the next character if there is one, regardless of what it is. 
+        // This prevents an escaped terminator from ending the literal. 
+        if consumed == '\\' {
+            // If there is no next char, do not add anything to the number of bytes consumed. 
+            bytes_consumed += chars.next().map(char::len_utf8).unwrap_or(0);
+        }
+    }
+
+    // Return when we have either reached a terminator or run out of characters. 
+    // First determine the variant to return. 
+    let variant: TokenTy = match first {
+        '\'' => TokenTy::CharLiteral { terminated: is_terminated },
+        '\"' => TokenTy::StringLiteral { terminated: is_terminated },
+        '`'  => TokenTy::FormatStringLiteral { terminated: is_terminated },
+        _ => unreachable!("There are no other quoted literals"),
+    };
+
+    // SAFETY: Summing char lengths from the iterator should never give us an invalid or out of bounds index. 
+    Some(unsafe { lexer.split_token_unchecked(bytes_consumed, variant) })
+}
+
+#[cfg(test)]
+mod tests {
+    use crate::parser::lexer::{token::TokenTy, Lexer};
+
+    #[test] 
+    fn string_literal() {
+        let mut lexer = Lexer::new(r#" "Test string literal" "#);
+        let token = lexer.next_token().unwrap();
+        assert_eq!(token.variant, TokenTy::StringLiteral { terminated: true });
+        assert_eq!(token.fragment.inner, "\"Test string literal\"");
+    }
+}

From 955411a54f588594c7f85991bd9776b7452122ff Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Wed, 13 Mar 2024 01:28:48 -0400
Subject: [PATCH 49/60] cargo fmt

---
 wright/src/parser/lexer.rs        |  4 +--
 wright/src/parser/lexer/quoted.rs | 55 ++++++++++++++++++-------------
 2 files changed, 34 insertions(+), 25 deletions(-)

diff --git a/wright/src/parser/lexer.rs b/wright/src/parser/lexer.rs
index 5d502f52..387b6d40 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/parser/lexer.rs
@@ -16,9 +16,9 @@ use token::{Token, TokenTy};
 pub mod comments;
 pub mod identifier;
 pub mod integer_literal;
+pub mod quoted;
 pub mod token;
 pub mod trivial;
-pub mod quoted;
 
 /// The lexical analyser for wright. This produces a series of tokens that make up the larger elements of the language.
 #[derive(Debug, Clone, Copy)]
@@ -204,7 +204,7 @@ impl<'src> Lexer<'src> {
             return Some(integer_lit);
         }
 
-        // Next attempt to parse a quoted literal. 
+        // Next attempt to parse a quoted literal.
         if let Some(quoted_lit) = try_consume_quoted_literal(self) {
             return Some(quoted_lit);
         }
diff --git a/wright/src/parser/lexer/quoted.rs b/wright/src/parser/lexer/quoted.rs
index 2318e0db..b26f0481 100644
--- a/wright/src/parser/lexer/quoted.rs
+++ b/wright/src/parser/lexer/quoted.rs
@@ -1,53 +1,62 @@
-//! Quoted literals. 
+//! Lexing implementation for quoted literals.
 
+use super::{token::Token, token::TokenTy, Lexer};
 use std::str::Chars;
-use super::{token::Token, Lexer, token::TokenTy};
 
-/// Attempt to parse a quoted literal. This includes [TokenTy::StringLiteral], [TokenTy::CharLiteral], and 
-/// [TokenTy::FormatStringLiteral]. 
+/// Attempt to parse a quoted literal. This includes [TokenTy::StringLiteral], [TokenTy::CharLiteral], and
+/// [TokenTy::FormatStringLiteral].
 pub fn try_consume_quoted_literal<'src>(lexer: &mut Lexer<'src>) -> Option<Token<'src>> {
     // Make a chars iterator to lex from.
     let mut chars: Chars = lexer.remaining.chars();
-    // Get the first char from the character iterator. 
-    // Return none if the first character doesn't exist or is not one of the quote terminating characters. 
+    // Get the first char from the character iterator.
+    // Return none if the first character doesn't exist or is not one of the quote terminating characters.
     let first: char = chars.next().filter(|c| ['\'', '"', '`'].contains(c))?;
     // Track number of bytes consumed.
     let mut bytes_consumed: usize = first.len_utf8();
-    // Track whether the quoted literal is terminated. 
+    // Track whether the quoted literal is terminated.
     let mut is_terminated: bool = false;
 
-    // Consume from the iterator while possible. 
+    // Consume from the iterator while possible.
     while let Some(consumed) = chars.next() {
-        // Update the number of bytes consumed. 
+        // Update the number of bytes consumed.
         bytes_consumed += consumed.len_utf8();
 
-        // Check if the character matches the starting char. 
-        // If so, record the literal as terminated and break this loop. 
+        // Check if the character matches the starting char.
+        // If so, record the literal as terminated and break this loop.
         if consumed == first {
             is_terminated = true;
             break;
         }
 
-        // If the character we just consumed is a backslash. 
-        // We only handle escaped terminators here, rather than parsing actual meaning. 
-        // Consume the next character if there is one, regardless of what it is. 
-        // This prevents an escaped terminator from ending the literal. 
+        // If the character we just consumed is a backslash.
+        // We only handle escaped terminators here, rather than parsing actual meaning.
+        // Consume the next character if there is one, regardless of what it is.
+        // This prevents an escaped terminator from ending the literal.
         if consumed == '\\' {
-            // If there is no next char, do not add anything to the number of bytes consumed. 
+            // If there is no next char, do not add anything to the number of bytes consumed.
             bytes_consumed += chars.next().map(char::len_utf8).unwrap_or(0);
         }
     }
 
-    // Return when we have either reached a terminator or run out of characters. 
-    // First determine the variant to return. 
+    // Return when we have either reached a terminator or run out of characters.
+    // First determine the variant to return.
     let variant: TokenTy = match first {
-        '\'' => TokenTy::CharLiteral { terminated: is_terminated },
-        '\"' => TokenTy::StringLiteral { terminated: is_terminated },
-        '`'  => TokenTy::FormatStringLiteral { terminated: is_terminated },
+        '\'' => TokenTy::CharLiteral {
+            terminated: is_terminated,
+        },
+
+        '\"' => TokenTy::StringLiteral {
+            terminated: is_terminated,
+        },
+
+        '`' => TokenTy::FormatStringLiteral {
+            terminated: is_terminated,
+        },
+
         _ => unreachable!("There are no other quoted literals"),
     };
 
-    // SAFETY: Summing char lengths from the iterator should never give us an invalid or out of bounds index. 
+    // SAFETY: Summing char lengths from the iterator should never give us an invalid or out of bounds index.
     Some(unsafe { lexer.split_token_unchecked(bytes_consumed, variant) })
 }
 
@@ -55,7 +64,7 @@ pub fn try_consume_quoted_literal<'src>(lexer: &mut Lexer<'src>) -> Option<Token
 mod tests {
     use crate::parser::lexer::{token::TokenTy, Lexer};
 
-    #[test] 
+    #[test]
     fn string_literal() {
         let mut lexer = Lexer::new(r#" "Test string literal" "#);
         let token = lexer.next_token().unwrap();

From c707dfd5f7cfebafbb1c08391c7b6a4587e08b00 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 16 Mar 2024 01:05:36 -0400
Subject: [PATCH 50/60] Add codecov.io

---
 .github/workflows/codecov-io.yml | 29 +++++++++++++++++++++++++++++
 wright/src/parser/ast.rs         |  1 +
 2 files changed, 30 insertions(+)
 create mode 100644 .github/workflows/codecov-io.yml

diff --git a/.github/workflows/codecov-io.yml b/.github/workflows/codecov-io.yml
new file mode 100644
index 00000000..890cd8b8
--- /dev/null
+++ b/.github/workflows/codecov-io.yml
@@ -0,0 +1,29 @@
+on: ["push", "pull_request"]
+
+name: codecov.io Code Coverage
+jobs:
+  coverage:
+    runs-on: ubuntu-22.04
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install LLVM
+        # See: https://apt.llvm.org/
+        # Last line: https://gitlab.com/taricorp/llvm-sys.rs/-/issues/13
+        run: |
+          wget https://apt.llvm.org/llvm.sh
+          chmod +x llvm.sh
+          sudo ./llvm.sh 17
+          sudo apt install libpolly-17-dev libz-dev
+      - uses: actions-rs/cargo@v1
+        with:
+          command: test
+          args: --all-features --no-fail-fast
+        env:
+          RUSTFLAGS: '-C instrument-coverage'
+          LLVM_PROFILE_FILE: 'cargo-test-%p-%m.profraw'
+      - name: Upload coverage reports to Codecov
+        uses: codecov/codecov-action@v4.0.1
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          slug: vcfxb/wright-lang
+          
diff --git a/wright/src/parser/ast.rs b/wright/src/parser/ast.rs
index 9b7bc8c1..fdd36655 100644
--- a/wright/src/parser/ast.rs
+++ b/wright/src/parser/ast.rs
@@ -1 +1,2 @@
 //! Abstract syntax tree representation for Wright source code.
+

From 839d1a74878231a96c34402be0022734433792bc Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 16 Mar 2024 01:17:10 -0400
Subject: [PATCH 51/60] Fix codecov.io

---
 .github/workflows/codecov-io.yml | 34 +++++++++++++++++++++++++-------
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/codecov-io.yml b/.github/workflows/codecov-io.yml
index 890cd8b8..c880abcb 100644
--- a/.github/workflows/codecov-io.yml
+++ b/.github/workflows/codecov-io.yml
@@ -1,4 +1,13 @@
-on: ["push", "pull_request"]
+on:
+  push:
+    branches:
+      # https://stackoverflow.com/questions/64635032/github-actions-run-on-push-to-all-branches
+      - "**"
+  pull_request:
+    branches: 
+      - "main"
+env:
+  CARGO_TERM_COLOR: always
 
 name: codecov.io Code Coverage
 jobs:
@@ -6,6 +15,7 @@ jobs:
     runs-on: ubuntu-22.04
     steps:
       - uses: actions/checkout@v4
+      
       - name: Install LLVM
         # See: https://apt.llvm.org/
         # Last line: https://gitlab.com/taricorp/llvm-sys.rs/-/issues/13
@@ -14,16 +24,26 @@ jobs:
           chmod +x llvm.sh
           sudo ./llvm.sh 17
           sudo apt install libpolly-17-dev libz-dev
-      - uses: actions-rs/cargo@v1
+      
+          - uses: actions-rs/toolchain@v1
         with:
-          command: test
-          args: --all-features --no-fail-fast
+          toolchain: nightly
+          override: true
+      
+      - name: Run tests
+        run: cargo test --verbose
         env:
-          RUSTFLAGS: '-C instrument-coverage'
-          LLVM_PROFILE_FILE: 'cargo-test-%p-%m.profraw'
+          CARGO_INCREMENTAL: '0'
+          RUSTFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
+          RUSTDOCFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
+      
+      - name: rust-grcov
+        # You may pin to the exact commit or the version.
+        # uses: actions-rs/grcov@bb47b1ed7883a1502fa6875d562727ace2511248
+        uses: actions-rs/grcov@v0.1
+
       - name: Upload coverage reports to Codecov
         uses: codecov/codecov-action@v4.0.1
         with:
           token: ${{ secrets.CODECOV_TOKEN }}
           slug: vcfxb/wright-lang
-          

From 754d6de59fbddd77b4f1dc0a8e6513149339974b Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 16 Mar 2024 01:18:24 -0400
Subject: [PATCH 52/60] Activate for non-main branches

---
 .github/workflows/codecov-io.yml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.github/workflows/codecov-io.yml b/.github/workflows/codecov-io.yml
index c880abcb..c3e2c2e9 100644
--- a/.github/workflows/codecov-io.yml
+++ b/.github/workflows/codecov-io.yml
@@ -1,8 +1,6 @@
 on:
   push:
     branches:
-      # https://stackoverflow.com/questions/64635032/github-actions-run-on-push-to-all-branches
-      - "**"
   pull_request:
     branches: 
       - "main"

From d5fabc2eae9b1f0da95196645d3bcc75a7ced255 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 16 Mar 2024 01:19:05 -0400
Subject: [PATCH 53/60] activate for all branches

---
 .github/workflows/codecov-io.yml | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/.github/workflows/codecov-io.yml b/.github/workflows/codecov-io.yml
index c3e2c2e9..b69c0243 100644
--- a/.github/workflows/codecov-io.yml
+++ b/.github/workflows/codecov-io.yml
@@ -1,9 +1,5 @@
-on:
-  push:
-    branches:
-  pull_request:
-    branches: 
-      - "main"
+on: ["push", "pull_request"]
+
 env:
   CARGO_TERM_COLOR: always
 

From 0554dec01c830453753e30c99ba8a98c4abee199 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 16 Mar 2024 01:20:10 -0400
Subject: [PATCH 54/60] third times a charm

---
 .github/workflows/codecov-io.yml | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/codecov-io.yml b/.github/workflows/codecov-io.yml
index b69c0243..0efcda05 100644
--- a/.github/workflows/codecov-io.yml
+++ b/.github/workflows/codecov-io.yml
@@ -1,5 +1,9 @@
-on: ["push", "pull_request"]
-
+on:
+  push:
+    branches:
+  pull_request:
+    branches: 
+      - "main"
 env:
   CARGO_TERM_COLOR: always
 
@@ -18,8 +22,8 @@ jobs:
           chmod +x llvm.sh
           sudo ./llvm.sh 17
           sudo apt install libpolly-17-dev libz-dev
-      
-          - uses: actions-rs/toolchain@v1
+
+      - uses: actions-rs/toolchain@v1
         with:
           toolchain: nightly
           override: true

From facde2ca7a32bb96e8650d4d0513a9fdee9aa728 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 16 Mar 2024 01:27:32 -0400
Subject: [PATCH 55/60] Add codecov badge to readme

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 23d24c05..00f10754 100644
--- a/README.md
+++ b/README.md
@@ -6,7 +6,8 @@
 | Cargo Check Status | ![Cargo Check status](https://github.com/vcfxb/wright-lang/actions/workflows/cargo-check.yml/badge.svg?branch=master) |
 | Cargo Test Status | ![Cargo Test status](https://github.com/vcfxb/wright-lang/actions/workflows/cargo-test.yml/badge.svg?branch=master) |
 | Cargo Clippy Status | ![Cargo Clippy status](https://github.com/vcfxb/wright-lang/actions/workflows/cargo-clippy.yml/badge.svg?branch=master) |
-| Code Coverage | [![Coverage Status](https://coveralls.io/repos/github/vcfxb/wright-lang/badge.svg?branch=master&kill_cache=1)](https://coveralls.io/github/vcfxb/wright-lang?branch=master) |
+| Code Coverage (Coveralls) | [![Coverage Status](https://coveralls.io/repos/github/vcfxb/wright-lang/badge.svg?branch=master&kill_cache=1)](https://coveralls.io/github/vcfxb/wright-lang?branch=master) |
+| Code Coverage (Codecov.io) | [![codecov](https://codecov.io/github/vcfxb/wright-lang/graph/badge.svg?token=HO07JEYMIH)](https://codecov.io/github/vcfxb/wright-lang)
 | Docs.rs | [![Documentation](https://docs.rs/wright/badge.svg)](https://docs.rs/wright) |
 | Crates.io | [![Crates.io](https://img.shields.io/crates/v/wright.svg)](https://crates.io/crates/wright) |
 | GitHub release | [![GitHub release](https://img.shields.io/github/release/vcfxb/wright-lang.svg)](https://github.com/vcfxb/wright-lang/releases) |

From 9d69484e503458478686456660a835cb9e374f34 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 16 Mar 2024 01:33:32 -0400
Subject: [PATCH 56/60] Update coveralls coverage CI

---
 .github/actions-rs/grcov.yml |  6 ------
 .github/workflows/grcov.yml  | 12 +++++-------
 2 files changed, 5 insertions(+), 13 deletions(-)
 delete mode 100644 .github/actions-rs/grcov.yml

diff --git a/.github/actions-rs/grcov.yml b/.github/actions-rs/grcov.yml
deleted file mode 100644
index 97314290..00000000
--- a/.github/actions-rs/grcov.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-branch: true
-output-type: lcov
-output-file: ./lcov.info
-ignore-not-existing: true
-ignore:
-  - "/*"
diff --git a/.github/workflows/grcov.yml b/.github/workflows/grcov.yml
index a098cbbe..e258cf08 100644
--- a/.github/workflows/grcov.yml
+++ b/.github/workflows/grcov.yml
@@ -1,6 +1,6 @@
 on: ["push", "pull_request"]
 
-name: Code Coverage
+name: coveralls Code Coverage
 
 jobs:
   coverage:
@@ -19,18 +19,16 @@ jobs:
         with:
           toolchain: nightly
           override: true
-      - uses: actions-rs/cargo@v1
-        with:
-          command: test
-          args: --all-features --no-fail-fast
+      - name: Run tests
+        run: cargo test --verbose
         env:
           CARGO_INCREMENTAL: '0'
           RUSTFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
           RUSTDOCFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
-      - id: coverage
+      - name: rust-grcov
         uses: actions-rs/grcov@v0.1
       - name: Coveralls upload
         uses: coverallsapp/github-action@master
         with:
-          github-token: ${{secrets.GITHUB_TOKEN}}
+          github-token: ${{ secrets.GITHUB_TOKEN }}
           path-to-lcov: ${{ steps.coverage.outputs.report }}

From 000f8e06f2c9a04e2ef3fcb52b32c960defc6c63 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 16 Mar 2024 01:38:52 -0400
Subject: [PATCH 57/60] Needed id for coverage step

---
 .github/workflows/grcov.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/grcov.yml b/.github/workflows/grcov.yml
index e258cf08..979fc7cb 100644
--- a/.github/workflows/grcov.yml
+++ b/.github/workflows/grcov.yml
@@ -26,6 +26,7 @@ jobs:
           RUSTFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
           RUSTDOCFLAGS: '-Zprofile -Ccodegen-units=1 -Cinline-threshold=0 -Clink-dead-code -Coverflow-checks=off -Cpanic=abort -Zpanic_abort_tests'
       - name: rust-grcov
+        id: coverage
         uses: actions-rs/grcov@v0.1
       - name: Coveralls upload
         uses: coverallsapp/github-action@master

From 735cfd42fd8849821e1179d1a2e234fdb4048222 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 16 Mar 2024 01:43:04 -0400
Subject: [PATCH 58/60] Start stripping out old unused code

---
 README.md                                   |   2 +-
 wright/src/parser/old/lexer.rs              | 500 --------------------
 wright/src/parser/old/lexer/definition.rs   |  72 ---
 wright/src/parser/old/lexer/pretty_print.rs | 176 -------
 wright/src/parser/old/lexer/tokens.rs       | 189 --------
 5 files changed, 1 insertion(+), 938 deletions(-)
 delete mode 100644 wright/src/parser/old/lexer.rs
 delete mode 100644 wright/src/parser/old/lexer/definition.rs
 delete mode 100644 wright/src/parser/old/lexer/pretty_print.rs
 delete mode 100644 wright/src/parser/old/lexer/tokens.rs

diff --git a/README.md b/README.md
index 00f10754..e250037d 100644
--- a/README.md
+++ b/README.md
@@ -7,7 +7,7 @@
 | Cargo Test Status | ![Cargo Test status](https://github.com/vcfxb/wright-lang/actions/workflows/cargo-test.yml/badge.svg?branch=master) |
 | Cargo Clippy Status | ![Cargo Clippy status](https://github.com/vcfxb/wright-lang/actions/workflows/cargo-clippy.yml/badge.svg?branch=master) |
 | Code Coverage (Coveralls) | [![Coverage Status](https://coveralls.io/repos/github/vcfxb/wright-lang/badge.svg?branch=master&kill_cache=1)](https://coveralls.io/github/vcfxb/wright-lang?branch=master) |
-| Code Coverage (Codecov.io) | [![codecov](https://codecov.io/github/vcfxb/wright-lang/graph/badge.svg?token=HO07JEYMIH)](https://codecov.io/github/vcfxb/wright-lang)
+| Code Coverage (Codecov.io) | [![codecov](https://codecov.io/github/vcfxb/wright-lang/graph/badge.svg?token=HO07JEYMIH)](https://codecov.io/github/vcfxb/wright-lang) |
 | Docs.rs | [![Documentation](https://docs.rs/wright/badge.svg)](https://docs.rs/wright) |
 | Crates.io | [![Crates.io](https://img.shields.io/crates/v/wright.svg)](https://crates.io/crates/wright) |
 | GitHub release | [![GitHub release](https://img.shields.io/github/release/vcfxb/wright-lang.svg)](https://github.com/vcfxb/wright-lang/releases) |
diff --git a/wright/src/parser/old/lexer.rs b/wright/src/parser/old/lexer.rs
deleted file mode 100644
index 051313dc..00000000
--- a/wright/src/parser/old/lexer.rs
+++ /dev/null
@@ -1,500 +0,0 @@
-//! The wright lexer. This module is responsible for lexical analysis and initial processing of source code.
-//! 
-//! This is implemented here using an iterator that looks up the next character from the input using a `const`-defined
-//! lexer structure definition. This can be found in [definition]. 
-
-pub mod tokens;
-mod definition;
-// mod pretty_print;
-
-use std::{
-    iter::{FusedIterator, Peekable},
-    str::CharIndices,
-};
-
-use self::tokens::{CommentTy, Token, TokenTy};
-
-/// Lexical analyzer for wright code. This struct host functions that produce tokens from wright source.
-#[derive(Debug, Clone)]
-pub struct Lexer<'a> {
-    /// Iterator over the indexed input characters tied to the lifetime of the source code.
-    iterator: Peekable<CharIndices<'a>>,
-    /// The source code passed to the lexer. This is used to check for keywords.
-    source: &'a str,
-}
-
-impl<'a> Lexer<'a> {
-    /// Create a new lexer that iterates on a given source string.
-    pub fn new(source: &'a str) -> Self {
-        Lexer {
-            iterator: source.char_indices().peekable(),
-            source,
-        }
-    }
-}
-
-impl<'a> Iterator for Lexer<'a> {
-    type Item = Token;
-
-    fn next(&mut self) -> Option<Token> {
-        // Get the next character out of the iterator.
-        let (start_index, next) = self.iterator.next()?;
-
-        // Handle single character tokens first.
-        let single_char_tokens = [
-            
-        ];
-
-        for (c, variant) in single_char_tokens {
-            if next == c {
-                return Some(Token { variant, length: 1 });
-            }
-        }
-
-        // Next handle tokens that can possibly be followed by an equal sign.
-        let possible_eq_upgrades = [
-            ('!', TokenTy::Bang, TokenTy::BangEq),
-            ('%', TokenTy::Mod, TokenTy::ModEq),
-            ('^', TokenTy::Xor, TokenTy::XorEq),
-            ('*', TokenTy::Star, TokenTy::StarEq),
-            ('+', TokenTy::Plus, TokenTy::PlusEq),
-        ];
-
-        for (c, no_eq, with_eq) in possible_eq_upgrades {
-            if next == c {
-                return match self.iterator.next_if(|&(_, x)| x == '=') {
-                    Some(_) => Some(Token {
-                        variant: with_eq,
-                        length: 2,
-                    }),
-                    None => Some(Token {
-                        variant: no_eq,
-                        length: 1,
-                    }),
-                };
-            }
-        }
-
-        // Next handle tokens that can be doubled or have an equals sign.
-        let possible_eq_or_double = [
-            ('&', TokenTy::And, TokenTy::AndEq, TokenTy::AndAnd),
-            ('|', TokenTy::Or, TokenTy::OrEq, TokenTy::OrOr),
-            ('<', TokenTy::Lt, TokenTy::LtEq, TokenTy::ShiftLeft),
-            ('>', TokenTy::Gt, TokenTy::GtEq, TokenTy::ShiftRight),
-            (':', TokenTy::Colon, TokenTy::ColonEq, TokenTy::ColonColon),
-            ('/', TokenTy::Div, TokenTy::DivEq, TokenTy::DivDiv),
-        ];
-
-        for (c, alone, with_eq, doubled) in possible_eq_or_double {
-            if next == c {
-                return match self.iterator.next_if(|&(_, x)| x == '=' || x == c) {
-                    // Followed by `=`
-                    Some((_, '=')) => Some(Token {
-                        variant: with_eq,
-                        length: 2,
-                    }),
-
-                    // Followed by itself.
-                    Some(_) => Some(Token {
-                        variant: doubled,
-                        length: 2,
-                    }),
-
-                    // Single char token
-                    None => Some(Token {
-                        variant: alone,
-                        length: 1,
-                    }),
-                };
-            }
-        }
-
-        // Next deal with arrows
-        let arrows = [
-            ('-', TokenTy::Minus, TokenTy::MinusEq, TokenTy::SingleArrow),
-            ('=', TokenTy::Eq, TokenTy::EqEq, TokenTy::DoubleArrow),
-            ('~', TokenTy::Tilde, TokenTy::TildeEq, TokenTy::TildeArrow),
-        ];
-
-        for (c, alone, with_eq, as_arrow) in arrows {
-            if next == c {
-                return match self.iterator.next_if(|&(_, x)| x == '=' || x == '>') {
-                    Some((_, '=')) => Some(Token {
-                        variant: with_eq,
-                        length: 2,
-                    }),
-                    Some((_, '>')) => Some(Token {
-                        variant: as_arrow,
-                        length: 2,
-                    }),
-                    None => Some(Token {
-                        variant: alone,
-                        length: 1,
-                    }),
-                    _ => unreachable!(),
-                };
-            }
-        }
-
-        // Dot and range operators.
-        if next == '.' {
-            return match self.iterator.next_if(|&(_, x)| x == '.') {
-                None => Some(Token {
-                    variant: TokenTy::Dot,
-                    length: 1,
-                }),
-                Some(_) => match self.iterator.next_if(|&(_, x)| x == '=') {
-                    None => Some(Token {
-                        variant: TokenTy::Range,
-                        length: 2,
-                    }),
-                    Some(_) => Some(Token {
-                        variant: TokenTy::RangeInclusive,
-                        length: 3,
-                    }),
-                },
-            };
-        }
-
-        // Whitespace.
-        if next.is_whitespace() {
-            // Accumulate the number of bytes of whitespace consumed.
-            let mut acc = next.len_utf8();
-            // Use while-let instead of take-while to avoid consuming the whole iterator.
-            while let Some((_, consumed)) = self.iterator.next_if(|&(_, x)| x.is_whitespace()) {
-                acc += consumed.len_utf8();
-            }
-
-            return Some(Token {
-                variant: TokenTy::Whitespace,
-                length: acc,
-            });
-        }
-
-        // Identifiers
-        if unicode_ident::is_xid_start(next) || next == '_' {
-            // Accumulate the number of bytes consumed in the identifier.
-            let mut acc = next.len_utf8();
-            // Consume the rest of the identifier.
-            while let Some((_, consumed)) = self
-                .iterator
-                .next_if(|&(_, x)| unicode_ident::is_xid_continue(x))
-            {
-                acc += consumed.len_utf8();
-            }
-
-            // Get the matching source code to check for reserved words.
-            let range = start_index..start_index + acc;
-            let matching_source = &self.source[range];
-
-            // Match on reserved words.
-            let variant: TokenTy = match matching_source {
-                // Declaration keywords
-                "class" => TokenTy::Class,
-                "struct" => TokenTy::Struct,
-                "record" => TokenTy::Record,
-                "trait" => TokenTy::Trait,
-                "func" => TokenTy::Func,
-                "enum" => TokenTy::Enum,
-                "union" => TokenTy::Union,
-                "module" => TokenTy::Module,
-                "import" => TokenTy::Import,
-                "implement" => TokenTy::Implement,
-                "represent" => TokenTy::Represent,
-
-                // Visibility keywords
-                "public" => TokenTy::Public,
-                "package" => TokenTy::Package,
-                "private" => TokenTy::Private,
-
-                // Boolean literals
-                "true" => TokenTy::True,
-                "false" => TokenTy::False,
-
-                // Other keywords.
-                "constraint" => TokenTy::Constraint,
-                "constrain" => TokenTy::Constrain,
-                "relation" => TokenTy::Relation,
-                "unsafe" => TokenTy::Unsafe,
-                "unchecked" => TokenTy::Unchecked,
-                "lifetime" => TokenTy::Lifetime,
-                "outlives" => TokenTy::Outlives,
-                "Self" => TokenTy::SelfUpper,
-                "self" => TokenTy::SelfLower,
-                "type" => TokenTy::Type,
-                "const" => TokenTy::Const,
-                "var" => TokenTy::Var,
-                "if" => TokenTy::If,
-                "else" => TokenTy::Else,
-                "match" => TokenTy::Match,
-                "is" => TokenTy::Is,
-                "as" => TokenTy::As,
-                "on" => TokenTy::On,
-                "in" => TokenTy::In,
-                "not" => TokenTy::Not,
-                "dyn" => TokenTy::Dyn,
-                "try" => TokenTy::Try,
-
-                _ => TokenTy::Identifier,
-            };
-
-            return Some(Token {
-                variant,
-                length: acc,
-            });
-        }
-
-        // Numerical literals.
-        if next.is_ascii_digit() {
-            // Accumulate the number of bytes consumed in the numeric literal.
-            // All ascii is 1 byte wide so avoid the extra call to `.len_utf8()`.
-            let mut acc = 1;
-            // Track the radix
-            let mut radix = 10;
-
-            // Change the radix if necessary
-            if next == '0' {
-                if let Some((_, prefix)) = self
-                    .iterator
-                    .next_if(|(_, x)| ['x', 'o', 'b', 'X', 'B'].contains(x))
-                {
-                    acc += 1;
-
-                    radix = match prefix {
-                        'x' | 'X' => 16,
-                        'b' | 'B' => 2,
-                        'o' => 8,
-                        _ => unreachable!(),
-                    };
-                }
-            }
-
-            // Consume the rest of the integer literal.
-            while self
-                .iterator
-                .next_if(|&(_, x)| x.is_digit(radix) || x == '_')
-                .is_some()
-            {
-                // All accepted characters should be ascii, so we can just simplify `.len_utf8()` to 1.
-                acc += 1;
-            }
-
-            return Some(Token {
-                variant: TokenTy::IntegerLit,
-                length: acc,
-            });
-        }
-
-        // String and Character literals.
-        if ['\'', '"', '`'].contains(&next) {
-            // Accumulator to track number of bytes consumed.
-            let mut acc: usize = 1;
-            let mut is_terminated = false;
-
-            // Consume characters until the end of the literal
-            while let Some((_, consumed)) = self.iterator.next() {
-                acc += consumed.len_utf8();
-
-                match consumed {
-                    // Ending character is the same as starting character.
-                    // Escapes should all be handled, so don't worry about this being escaped.
-                    _ if consumed == next => {
-                        is_terminated = true;
-                        break;
-                    }
-
-                    // Escaped pattern.
-                    // Only worry about escaped terminators here, since all other escaped
-                    // patterns can be dealt with later.
-                    '\\' => {
-                        // Consume the escaped character regardless of what it is.
-                        // It will always be part of the quoted literal.
-                        if let Some((_, escaped)) = self.iterator.next() {
-                            acc += escaped.len_utf8();
-                        }
-                    }
-
-                    // Do nothing for non-escaped chars since the quoted literal continues
-                    // and we have already recorded the consumed bytes.
-                    _ => {}
-                }
-            }
-
-            // We have finished consuming the literal -- make sure we produce the
-            // right variant
-            return match next {
-                '\'' => Some(Token {
-                    variant: TokenTy::CharLit { is_terminated },
-                    length: acc,
-                }),
-                _ => Some(Token {
-                    variant: TokenTy::StringLit {
-                        is_format: next == '`',
-                        is_terminated,
-                    },
-                    length: acc,
-                }),
-            };
-        }
-
-        // Comments.
-        if next == '#' {
-            // Use accumulator to track number of bytes consumed.
-            let mut acc = 1;
-
-            // There are a few variants as follows.
-            // `#...` - single line comment
-            // `#*...*#` - multiline comment
-            // `##...` - single line inner doc comment
-            // `##!...` - single line outer doc comment
-            // `#**...*#` - multiline inner doc comment
-            // `#*!...*#` - multiline outer doc comment
-            // If a multiline comment is not terminated by the end of the file then just mark it as such in the
-            // produced token. A seperate token error handling layer will raise that outside of this function.
-
-            // Handle multiline comments
-            if self.iterator.next_if(|&(_, x)| x == '*').is_some() {
-                acc += 1;
-
-                // Check if it's a doc comment.
-                let comment_type = match self.iterator.next_if(|&(_, x)| x == '*' || x == '!') {
-                    Some((_, '*')) => {
-                        acc += 1;
-                        CommentTy::InnerDoc
-                    }
-
-                    Some((_, '!')) => {
-                        acc += 1;
-                        CommentTy::OuterDoc
-                    }
-
-                    None => CommentTy::Normal,
-
-                    _ => unreachable!(),
-                };
-
-                // Read the rest of the multi-line comment
-                while let Some((_, consumed)) = self.iterator.next() {
-                    acc += consumed.len_utf8();
-                    if consumed == '*' && matches!(self.iterator.peek(), Some((_, '#'))) {
-                        acc += 1;
-                        return Some(Token {
-                            variant: TokenTy::MultilineComment {
-                                comment_type,
-                                is_terminated: true,
-                            },
-                            length: acc,
-                        });
-                    }
-                }
-
-                // If we hit the end, the comment is not terminated.
-                return Some(Token {
-                    variant: TokenTy::MultilineComment {
-                        comment_type,
-                        is_terminated: false,
-                    },
-                    length: acc,
-                });
-            }
-
-            // Handle single line comment.
-            let mut comment_type = CommentTy::Normal;
-
-            // Check for inner doc comment
-            if self.iterator.next_if(|&(_, x)| x == '#').is_some() {
-                acc += 1;
-                comment_type = CommentTy::InnerDoc;
-
-                // Check for outer doc comment
-                if self.iterator.next_if(|&(_, x)| x == '!').is_some() {
-                    acc += 1;
-                    comment_type = CommentTy::OuterDoc;
-                }
-            }
-
-            // Read to end of line/file for rest of comment. Include line ending in consumed bytes.
-            for (_, consumed) in self.iterator.by_ref() {
-                acc += consumed.len_utf8();
-                if consumed == '\n' {
-                    break;
-                }
-            }
-
-            return Some(Token {
-                variant: TokenTy::SingleLineComment { comment_type },
-                length: acc,
-            });
-        }
-
-        // If we haven't matched by this point, return an unknown token.
-        Some(Token {
-            variant: TokenTy::Unknown,
-            length: next.len_utf8(),
-        })
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        // Get the size hint of the internal iterator.
-        let (inner_lower, upper) = self.iterator.size_hint();
-        // If there are any characters left, then there is at least one token remaining.
-        ((inner_lower > 0) as usize, upper)
-    }
-}
-
-impl<'a> FusedIterator for Lexer<'a> {}
-
-/// A token with an index in a piece of source code.
-#[derive(Copy, Clone, Debug)]
-pub struct IndexedToken {
-    /// The byte index into the source code that this token starts on.
-    pub index: usize,
-    /// The token itself.
-    pub token: Token,
-}
-
-/// An iterator over the tokens in the source code with byte indices attached.
-#[derive(Debug, Clone)]
-pub struct IndexedLexer<'src> {
-    /// The current index in source code -- the number of bytes currently consumed by the iterator.
-    pub index: usize,
-    /// The underlying lexer iterator.
-    lexer: Lexer<'src>,
-}
-
-impl<'src> IndexedLexer<'src> {
-    /// Construct a new indexed lexer.
-    pub fn new(source: &'src str) -> Self {
-        Self {
-            index: 0,
-            lexer: Lexer::new(source),
-        }
-    }
-}
-
-impl<'a> Iterator for IndexedLexer<'a> {
-    type Item = IndexedToken;
-
-    fn next(&mut self) -> Option<Self::Item> {
-        // Pull a token from the iterator.
-        let token = self.lexer.next()?;
-
-        // If available, add the current index to it to return.
-        let indexed_token = IndexedToken {
-            index: self.index,
-            token,
-        };
-
-        // Update the current index with the length of the token.
-        self.index += token.length;
-
-        // Return indexed token
-        Some(indexed_token)
-    }
-
-    fn size_hint(&self) -> (usize, Option<usize>) {
-        self.lexer.size_hint()
-    }
-}
-
-impl<'a> FusedIterator for IndexedLexer<'a> {}
diff --git a/wright/src/parser/old/lexer/definition.rs b/wright/src/parser/old/lexer/definition.rs
deleted file mode 100644
index 8dbeb5fa..00000000
--- a/wright/src/parser/old/lexer/definition.rs
+++ /dev/null
@@ -1,72 +0,0 @@
-//! The lexer definition in a rust constant that tells us how to handle characters encountered and lists all the 
-//! possible tokens produced. 
-
-use super::tokens::{TokenTy};
-
-/// A single character token matches a single character from the input, and produces a token of the length of the 
-/// character exactly. 
-#[derive(Clone, Copy, Debug)]
-pub struct SingleCharToken {
-    /// The character to match. 
-    pub matching_char: char, 
-    /// The token type produced. 
-    pub produces: TokenTy, 
-}
-
-impl SingleCharToken {
-    /// Turn a single character token into a lexer branch. 
-    const fn into_lexer_branch(self) -> LexerBranch {
-        LexerBranch::SingleCharToken(self)
-    }
-}
-
-/// A set of posible continuations from a single char token that will form multi char tokens 
-/// (i.e. going from `&` to `&&` and `&=`).  
-#[derive(Clone, Copy, Debug)]
-pub struct PossibleContinuations {
-    /// The base single char and the token it produces when not followed by one of the other possible characters. 
-    pub base: SingleCharToken,
-    /// The characters that can follow this and the tokens they would produce. 
-    pub continuations: &'static [(char, TokenTy)]
-}
-
-impl PossibleContinuations {
-    /// Convert to a [LexerBranch].
-    const fn into_lexer_branch(self) -> LexerBranch {
-        LexerBranch::PossibleContinuations(self)
-    }
-}
-
-/// A branch in the lexer, representing options to be tried. 
-#[derive(Debug)]
-pub enum LexerBranch {
-    /// A single character token (such as '[') with no option for continuation. 
-    SingleCharToken(SingleCharToken),
-    PossibleContinuations(PossibleContinuations)
-
-}
-
-// Below is a variety of `const-fn`s to make generating this structure easier. 
-
-/// Makes a [SingleCharToken]. 
-const fn single(matching_char: char, produces: TokenTy) -> SingleCharToken {
-    SingleCharToken { matching_char, produces }
-}
-
-/// Makes a [PossibleContinuations].
-const fn pc(matching_char: char, produces: TokenTy, continuations: &'static [(char, TokenTy)]) -> PossibleContinuations {
-    PossibleContinuations { base: single(matching_char, produces), continuations }
-}
-
-
-/// The lexer's definition, in abstract branching. 
-pub const DEFINITION: &[LexerBranch] = &[
-    single('(', TokenTy::LeftParen).into_lexer_branch(),
-    single(')', TokenTy::RightParen).into_lexer_branch(),
-
-    pc('+', TokenTy::Plus, &[
-        ('=', TokenTy::PlusEq),
-    ]).into_lexer_branch(),
-
-
-];
diff --git a/wright/src/parser/old/lexer/pretty_print.rs b/wright/src/parser/old/lexer/pretty_print.rs
deleted file mode 100644
index 84629e73..00000000
--- a/wright/src/parser/old/lexer/pretty_print.rs
+++ /dev/null
@@ -1,176 +0,0 @@
-//! Lexer pretty printer.
-
-use crate::parser::lexer::{IndexedLexer, IndexedToken};
-
-use super::Lexer;
-use codespan_reporting::files::{Files, SimpleFile};
-use std::cmp;
-use std::io::Write;
-use std::{fmt::Display, ops::Range};
-use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
-
-#[derive(Default)]
-struct PrettyPrinter {
-    print_lines: [String; 2],
-}
-
-impl PrettyPrinter {
-    fn flush(&mut self) -> anyhow::Result<()> {
-        // Use termcolor to print in different colors.
-        let mut out = StandardStream::stdout(ColorChoice::Always);
-        // Print source code default.
-        writeln!(&mut out, "{}", self.print_lines[0])?;
-        // Print token info in cyan
-        out.set_color(ColorSpec::new().set_fg(Some(Color::Cyan)))?;
-        writeln!(&mut out, "{}", self.print_lines[1])?;
-        // Reset after printing is over.
-        out.set_color(ColorSpec::new().set_reset(true))?;
-        // Reset the print_lines.
-        self.print_lines = [Default::default(), Default::default()];
-        Ok(())
-    }
-}
-
-impl<'a> Lexer<'a> {
-    /// Print in pretty format the source code and the tokens it matched to under it.
-    pub fn debug_pretty_print<Name: Display + Clone, Source: AsRef<str>>(
-        source: &SimpleFile<Name, Source>,
-    ) -> anyhow::Result<()> {
-        // Create a pretty-printer to use for colored text
-        let mut pp: PrettyPrinter = Default::default();
-        // Print a header line to indicate columns.
-        println!("line ({:#10})", "byte");
-        // Get the source code as a str ref.
-        let source_str: &str = source.source().as_ref();
-        // Get the token iterator for the source code.
-        let mut token_iter = IndexedLexer::new(source_str)
-            // Go from byte start indices to byte ranges in the source string
-            .map(|IndexedToken { index, token }| (index..index + token.length, token))
-            // Make it peekable so that we can consume the iterator conditionally
-            .peekable();
-
-        // Make an iterator over the line byte-index ranges.
-        let mut line_range_iter = source_str
-            .lines()
-            // Use enumerate to get line indices.
-            .enumerate()
-            // Get line byte-index ranges for each line. Use `.unwrap()` beacause
-            // all the indices out of enumerate should be valid.
-            .map(|(line_index, _)| (line_index, source.line_range((), line_index).unwrap()))
-            // Use `.peekable()` to make it conditionally consubable.
-            .peekable();
-
-        // Make a utility function to get the matching source for a byte-index range.
-        let matching_source = |range: Range<usize>| -> String {
-            source_str[range]
-                // Use `.replace()` to make sure tabs are printed in the
-                // same width in a predictable way.
-                .replace('\t', "    ")
-                // Also use replace to avoid double-printing newline characters if they exist.
-                // Do replace them with a space though, to avoid underflow on subtraction in formatting.
-                .replace(['\r', '\n'], " ")
-        };
-
-        // Iterate on the lines of the source file.
-        while let Some((line_index, line_range)) = line_range_iter.next() {
-            // Set the print headers if empty.
-            if pp.print_lines[0].is_empty() {
-                pp.print_lines[0] = format!("{:04} ({:#010x}): ", line_index, line_range.start);
-                pp.print_lines[1] = format!("{:04} ({:#010x}): ", line_index, line_range.start);
-            }
-
-            // Consume all tokens that end (and therefore start also) on this line.
-            while let Some((token_range, token)) =
-                token_iter.next_if(|(token_range, _)| token_range.end <= line_range.end)
-            {
-                // Get the matching source code for the token.
-                let matched = matching_source(token_range);
-
-                // Make a string representation of the token to print in the debug.
-                let token_string: String = token.to_string();
-
-                // Get the width of the display as the max of the two string character (not byte) lengths. Add two to the
-                // token length to represent the square brackets added later.
-                let width: usize =
-                    cmp::max(token_string.chars().count() + 2, matched.chars().count());
-
-                // Add source to first line and token info to second line as appopriate. Add two to the source with for the
-                // square brackets.
-                pp.print_lines[0].push_str(format!("{matched:<width$}").as_str());
-                pp.print_lines[1].push_str(format!("[{token_string:<0$}]", width - 2).as_str());
-            }
-
-            // Check if the next available token still starts on this line (it definitely ends later than this line).
-            if let Some((token_range, token)) =
-                token_iter.next_if(|(token_range, _)| token_range.start < line_range.end)
-            {
-                // The token ends on the next line (or later) so limit the matching source range to
-                // the end of this line.
-                let matching_source_range = token_range.start..line_range.end;
-                // Get the matching source code for the part of the token that's on this line.
-                let matched: String = matching_source(matching_source_range);
-                // Make a string representation of the token to print in the debug.
-                let token_string: String = token.to_string();
-
-                // Get the width of the display as the max of the two string character (not byte) lengths. Add one to the
-                // token length to represent the square bracket added later.
-                let width: usize =
-                    cmp::max(token_string.chars().count() + 1, matched.chars().count());
-
-                // Add source to first line and token info to second line as appopriate.
-                pp.print_lines[0].push_str(format!("{matched:<width$}").as_str());
-                pp.print_lines[1].push_str(format!("[{token_string:<0$}", width - 1).as_str());
-
-                // Flush the print_lines.
-                pp.flush()?;
-
-                // Keep flushing lines until we reach the end of the multi-line token.
-                while let Some((add_line_index, add_line_range)) = line_range_iter
-                    .next_if(|(_, add_line_range)| add_line_range.end < token_range.end)
-                {
-                    let matched = matching_source(add_line_range.clone());
-                    // Print the line of source
-                    pp.print_lines[0] = format!(
-                        "{:04} ({:#010x}): {matched}",
-                        add_line_index, add_line_range.start
-                    );
-                    pp.print_lines[1] =
-                        format!("{:04} ({:#010x}):", add_line_index, add_line_range.start);
-                    pp.flush()?;
-                }
-
-                // Finally if the token is partially in another line, begin that one too.
-                // Not that this does not consume the next line off the line iterator.
-                // The next iteration of the loop will do that.
-                let continues_on_next_line = line_range_iter
-                    .peek()
-                    .filter(|(_, add_line_range)| token_range.end > add_line_range.start);
-
-                // If it does continue on the next line, start the line.
-                if let Some((add_line_index, add_line_range)) = continues_on_next_line {
-                    // Get the matching source.
-                    let matching_source_range: Range<usize> = add_line_range.start..token_range.end;
-                    let matched: String = matching_source(matching_source_range);
-
-                    // Calculate the number of spaces to put before the closing bracket.
-                    let space = matched.chars().count() - 1;
-
-                    // Add the match and the closing bracket.
-                    pp.print_lines[0] = format!(
-                        "{:04} ({:#010x}): {matched}",
-                        add_line_index, add_line_range.start
-                    );
-                    pp.print_lines[1] = format!(
-                        "{:04} ({:#010x}): {:space$}]",
-                        add_line_index, add_line_range.start, ""
-                    );
-                }
-            } else {
-                // The next token is on the next line, just flush the print_lines and move on.
-                pp.flush()?;
-            }
-        }
-
-        Ok(())
-    }
-}
diff --git a/wright/src/parser/old/lexer/tokens.rs b/wright/src/parser/old/lexer/tokens.rs
deleted file mode 100644
index ada667b2..00000000
--- a/wright/src/parser/old/lexer/tokens.rs
+++ /dev/null
@@ -1,189 +0,0 @@
-use derive_more::Display;
-
-/// Token of Wright source code.
-#[derive(Clone, Copy, Debug, Display)]
-#[display(fmt = "{} ({}b)", variant, length)]
-pub struct Token {
-    /// What type of token is it?
-    pub variant: TokenTy,
-    /// How many bytes of source code long is it? Note this doesn't necessarily mean how many characters long it is.
-    pub length: usize,
-}
-
-/// All of the reserved words are just upper-case versions of the
-/// matching source code unless otherwise stated.
-#[derive(Clone, Copy, PartialEq, Eq, Debug, Display)]
-pub enum TokenTy {
-    // Operators and parentheses
-    LeftParen,      // (
-    RightParen,     // )
-    Bang,           // !
-    BangEq,         // !=
-    Tilde,          // ~
-    TildeArrow,     // ~>
-    TildeEq,        // ~=
-    At,             // @
-    Pound,          // #
-    Dollar,         // $
-    Mod,            // %
-    ModEq,          // %=
-    Xor,            // ^
-    XorEq,          // ^=
-    And,            // &
-    AndEq,          // &=
-    AndAnd,         // &&
-    Or,             // |
-    OrEq,           // |=
-    OrOr,           // ||
-    Star,           // *
-    StarEq,         // *=
-    Plus,           // +
-    PlusEq,         // +=
-    Minus,          // -
-    MinusEq,        // -=
-    SingleArrow,    // ->
-    Gt,             // >
-    GtEq,           // >=
-    ShiftRight,     // >>
-    Lt,             // <
-    LtEq,           // <=
-    ShiftLeft,      // <<
-    Eq,             // =
-    EqEq,           // ==
-    DoubleArrow,    // =>
-    Div,            // /
-    DivEq,          // /=
-    DivDiv,         // //
-    Semi,           // ;
-    Colon,          // :
-    ColonColon,     // ::
-    ColonEq,        // :=
-    Question,       // ?
-    Dot,            // .
-    Range,          // ..
-    RangeInclusive, // ..=
-    Comma,          // ,
-    LeftSquare,     // [
-    RightSquare,    // ]
-    LeftBracket,    // {
-    RightBracket,   // }
-
-    // Reserved words
-    Class,
-    Struct,
-    Record,
-    Enum,
-    Union,
-    Trait,
-    Type,
-    Func,
-    Module,
-    Implement,
-    Represent,
-    /// Publicly visible.
-    Public,
-    /// Visible in the package only.
-    Package,
-    /// Visible only in file/module.
-    Private,
-    Constraint,
-    Constrain,
-    /// Used to constrain relations between variables.
-    Relation,
-    Unsafe,
-    /// May use similar to unsafe in Rust -- call a function or cast without checking any of the constraints.
-    Unchecked,
-    Import,
-    Const,
-    Var,
-    If,
-    Else,
-    Match,
-    Is,
-    As,
-    On,
-    In,
-    Not,
-    /// Marks functions as dynamic, and not to be executed at compile time.
-    Dyn,
-    /// For try { } blocks.
-    Try,
-    True,
-    False,
-    Lifetime,
-    Outlives,
-
-    /// `Self` in source code.
-    #[display(fmt = "Self")]
-    SelfUpper,
-
-    /// `self` in source code.
-    #[display(fmt = "self")]
-    SelfLower,
-
-    /// Whitespace of any kind and length.
-    #[display(fmt = "W")]
-    Whitespace,
-
-    /// Single line comment started with `#`. Optionally `## ` or `##! ` for documentation.
-    #[display(fmt = "Single line {} comment", comment_type)]
-    SingleLineComment {
-        comment_type: CommentTy,
-    },
-
-    /// Multiline comment between `#*` and `*#`. Starts with `#**` or `#*!` for documentation.
-    #[display(
-        fmt = "Multiline {} comment (terminated = {})",
-        comment_type,
-        is_terminated
-    )]
-    MultilineComment {
-        comment_type: CommentTy,
-        /// Is this comment terminated? If not raise an error before parsing the tokens.
-        is_terminated: bool,
-    },
-
-    /// Integer literal. This is a literal integer in source code. May include underscores after the leading digit
-    /// as visual seperators. May also include a prefix such as `0x`, `0o`, or `0b` for hex, octal, or binary.
-    IntegerLit,
-
-    /// A string literal in source code.
-    #[display(
-        fmt = "StringLit (fmt = {}, terminated = {})",
-        is_format,
-        is_terminated
-    )]
-    StringLit {
-        /// For format strings (backticks instead of double quotes)
-        is_format: bool,
-        /// Is this string terminated?
-        is_terminated: bool,
-    },
-
-    /// A character literal in source code.
-    #[display(fmt = "CharLit (terminated = {})", is_terminated)]
-    CharLit {
-        /// Is the char lit terminated?
-        is_terminated: bool,
-    },
-
-    /// A identifier in source code (such as a variable name). At this stage keywords (such as 'struct') are
-    /// also considered identifiers.
-    #[display(fmt = "ID")]
-    Identifier,
-
-    /// Unknown character for the lexer.
-    #[display(fmt = "?")]
-    Unknown,
-}
-
-/// Different types of comments.
-#[derive(Clone, Copy, Debug, PartialEq, Eq, Display)]
-pub enum CommentTy {
-    /// Normal comment that does not get used in documentation.
-    Normal,
-    /// Documentation for a declaration in the file.
-    InnerDoc,
-    /// Documentation for the file itself.
-    OuterDoc,
-}

From 6e186224dc573d1e4050b0541bdd562f360c49e6 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 16 Mar 2024 01:46:01 -0400
Subject: [PATCH 59/60] Put back grcov config see if it fixes things

---
 .github/actions-rs/grcov.yml | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 .github/actions-rs/grcov.yml

diff --git a/.github/actions-rs/grcov.yml b/.github/actions-rs/grcov.yml
new file mode 100644
index 00000000..97314290
--- /dev/null
+++ b/.github/actions-rs/grcov.yml
@@ -0,0 +1,6 @@
+branch: true
+output-type: lcov
+output-file: ./lcov.info
+ignore-not-existing: true
+ignore:
+  - "/*"

From ab0adf4a2a56d9677fd0d8a9caad88de63d166fd Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 16 Mar 2024 01:54:50 -0400
Subject: [PATCH 60/60] Update grcov.yml

---
 .github/actions-rs/grcov.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/actions-rs/grcov.yml b/.github/actions-rs/grcov.yml
index 97314290..5a936960 100644
--- a/.github/actions-rs/grcov.yml
+++ b/.github/actions-rs/grcov.yml
@@ -2,5 +2,7 @@ branch: true
 output-type: lcov
 output-file: ./lcov.info
 ignore-not-existing: true
+llvm: true
 ignore:
   - "/*"
+  - "../*"