Skip to content

Commit

Permalink
Add identifier parsing test and dedicated whitespace token type.
Browse files Browse the repository at this point in the history
  • Loading branch information
vcfxb committed Jul 13, 2024
1 parent cec24c0 commit 9775b67
Show file tree
Hide file tree
Showing 5 changed files with 43 additions and 15 deletions.
30 changes: 16 additions & 14 deletions wright/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -104,17 +104,6 @@ impl Lexer {
self.remaining.offset_from(&origin.remaining)
}

/// Remove and ignore any whitespace at the start of the [Lexer::remaining] [Fragment].
pub fn ignore_whitespace(&mut self) {
// Get a reference to the slice of the string past any whitespace at the start.
let without_whitespace = self.remaining.clone().trim_start();

// If the references aren't equal, update the remaining fragment.
if self.remaining.range != without_whitespace.range {
self.remaining = without_whitespace;
}
}

/// Check if a pattern matches at the start of the [Lexer::remaining] [Fragment].
pub fn matches(&self, pattern: &str) -> bool {
self.remaining.as_str().starts_with(pattern)
Expand Down Expand Up @@ -183,14 +172,27 @@ impl Lexer {

/// Get the next token from the lexer.
pub fn next_token(&mut self) -> Option<Token> {
// Ignore any whitespace at the start of the lexer.
self.ignore_whitespace();

// If the remaining input is empty, there is no token.
if self.remaining.is_empty() {
return None;
}

// If there is whitespace, it becomes its own token.
// Use a little unsafe here since this check is done every time and needs to be fast.
{
let remaining_str = self.remaining.as_str();
let trimmed = remaining_str.trim_start().as_ptr();

// Calculate the delta by pointer offset.
// SAFETY: In this case, all the requirements of pointer::offset_from are satisfied.
let delta = unsafe { trimmed.offset_from(remaining_str.as_ptr()) };

if delta > 0 {
// SAFETY: trim_start should always return a valid string, and delta is just checked to be > 0.
return unsafe { Some(self.split_token_unchecked(delta as usize, TokenTy::Whitespace)) };
}
}

// Attempt to parse a single line comment and then attempt a multi-line comment.
for comment_match_fn in [try_match_single_line_comment, try_match_block_comment] {
// Attempt to parse a comment using the given match function. Return it if it's documentation or unterminated.
Expand Down
1 change: 1 addition & 0 deletions wright/src/lexer/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@ mod tests {
let mut lexer = Lexer::new_test("const TEST");

assert_eq!(lexer.next_token().unwrap().variant, TokenTy::KwConst);
assert_eq!(lexer.next_token().unwrap().variant, TokenTy::Whitespace);
assert_eq!(lexer.next_token().unwrap().variant, TokenTy::Identifier);
}
}
2 changes: 1 addition & 1 deletion wright/src/lexer/quoted.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ mod tests {

#[test]
fn string_literal() {
let mut lexer = Lexer::new_test(r#" "Test string literal" "#);
let mut lexer = Lexer::new_test(r#""Test string literal""#);
let token = lexer.next_token().unwrap();
assert_eq!(token.variant, TokenTy::StringLiteral { terminated: true });
assert_eq!(token.fragment.as_str(), "\"Test string literal\"");
Expand Down
3 changes: 3 additions & 0 deletions wright/src/lexer/token.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,9 @@ pub enum TokenTy {
FormatStringLiteral { terminated: bool },
CharLiteral { terminated: bool },

/// Whitespace counts as a token.
Whitespace,

/// Unknown character in lexer fragment.
Unknown
}
22 changes: 22 additions & 0 deletions wright/src/parser/identifier.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,25 @@ impl Parse for Identifier {
Ok(Identifier { fragment: ident_token.fragment })
}
}

#[cfg(test)]
mod tests {
use crate::{ast::identifier::Identifier, lexer::{token::TokenTy, Lexer}, parser::{Parse, ParseError}};

#[test]
fn test_parse_ident() {
let mut lexer = Lexer::new_test("source");
let ident = Identifier::parse(&mut lexer).unwrap();
assert_eq!(ident.fragment.as_str(), "source");
assert_eq!(lexer.remaining.len(), 0);
}

#[test]
fn test_parse_ident_fail() {
for fail in ["12", "+", " ", " test", "_", "record"] {
let mut lexer = Lexer::new_test(&fail);
let error = Identifier::parse(&mut lexer).unwrap_err();
assert!(matches!(&error, ParseError::Expected { expected: TokenTy::Identifier, .. }));
}
}
}

0 comments on commit 9775b67

Please sign in to comment.