Skip to content

Commit

Permalink
Makes tokens aware of their line & column numbers
Browse files Browse the repository at this point in the history
  • Loading branch information
cuducos committed Sep 21, 2023
1 parent 8c5381c commit 284ca54
Show file tree
Hide file tree
Showing 2 changed files with 119 additions and 14 deletions.
5 changes: 4 additions & 1 deletion src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,10 @@ fn main() -> Result<()> {
}

let mut tokenizer = Tokenizer::new(PathBuf::from(&path))?;
println!("{:?}", tokenizer.tokenize()?);
let tokens = tokenizer.tokenize()?;
for token in &tokens {
println!("{}: {:?}", token.error_prefix(&path), token);
}
return Ok(());
}

Expand Down
128 changes: 115 additions & 13 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,25 @@ use anyhow::Result;

use crate::reader::{CharReader, CharType};

#[derive(Debug)]
#[derive(Debug, PartialEq)]
pub enum Token {
Text(String),
CommentMark,
HelpMark,
EqualSign,
Text(usize, usize, String),
CommentMark(usize, usize),
HelpMark(usize, usize),
EqualSign(usize, usize),
}

impl Token {
pub fn error_prefix(&self, path: &String) -> String {
let (line, column) = match self {
Token::Text(x, y, _) => (x, y),
Token::CommentMark(x, y) => (x, y),
Token::HelpMark(x, y) => (x, y),
Token::EqualSign(x, y) => (x, y),
};

format!("{path}:{line}:{column}")
}
}

pub struct Tokenizer {
Expand All @@ -23,6 +36,30 @@ impl Tokenizer {
})
}

fn text(&self, buffer: String, eol: bool, prepends_help: bool) -> Token {
let adjust = match (eol, prepends_help) {
(true, false) => -1,
(false, true) => 2,
_ => 0,
} + (buffer.len() as i8);

Token::Text(
self.reader.line,
self.reader.column - (adjust as usize),
buffer.trim().to_string(),
)
}

fn equal_sign(&self) -> Token {
Token::EqualSign(self.reader.line, self.reader.column)
}
fn comment_mark(&self) -> Token {
Token::CommentMark(self.reader.line, self.reader.column)
}
fn help_mark(&self) -> Token {
Token::HelpMark(self.reader.line, self.reader.column - 2)
}

fn next_tokens(&mut self) -> Result<Vec<Token>> {
let mut buffer = "".to_string();
loop {
Expand All @@ -33,23 +70,27 @@ impl Tokenizer {
if buffer.is_empty() {
continue;
}
return Ok(vec![Token::Text(buffer.trim().to_string())]);
return Ok(vec![self.text(buffer, true, false)]);
}
CharType::Char(c) => {
let mut token: Option<Token> = None;
let mut prepends_help = false;
if c == '=' {
token = Some(Token::EqualSign);
} else if c == '#' && self.reader.column == 1 {
token = Some(Token::CommentMark);
} else if c == ' ' && buffer.ends_with(" #") {
buffer = buffer.strip_suffix(" #").unwrap_or("").to_string();
token = Some(Token::HelpMark);
token = Some(self.equal_sign());
} else if c == '#' {
if self.reader.column == 1 {
token = Some(self.comment_mark());
} else if buffer.ends_with(" ") {
buffer = buffer.strip_suffix(" ").unwrap_or("").to_string();
prepends_help = true;
token = Some(self.help_mark());
}
}
if let Some(t) = token {
if buffer.is_empty() {
return Ok(vec![t]);
}
return Ok(vec![Token::Text(buffer.trim().to_string()), t]);
return Ok(vec![self.text(buffer, false, prepends_help), t]);
}
buffer.push(c)
}
Expand All @@ -70,3 +111,64 @@ impl Tokenizer {
Ok(tokens)
}
}

// TODO: move to tests/ as integration test?
#[cfg(test)]
mod tests {
use super::*;

#[test]
fn test_tokenizer() {
let sample = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join(".env.sample");
let mut tokenizer = Tokenizer::new(sample).unwrap();
let tokens = tokenizer.tokenize().unwrap();
assert_eq!(tokens.len(), 19);

// line 1
assert_eq!(tokens[0], Token::CommentMark(1, 1));
assert_eq!(tokens[1], Token::Text(1, 2, "Createnv".to_string()));
assert_eq!(tokens[2], Token::CommentMark(2, 1));

// line 2
assert_eq!(
tokens[3],
Token::Text(
2,
2,
"This is a simple example of how Createnv works".to_string()
)
);

// line 3
assert_eq!(tokens[4], Token::Text(3, 1, "NAME".to_string()));
assert_eq!(tokens[5], Token::EqualSign(3, 5));
assert_eq!(tokens[6], Token::HelpMark(3, 6));
assert_eq!(
tokens[7],
Token::Text(3, 9, "What's your name?".to_string())
);

// line 4
assert_eq!(tokens[8], Token::Text(4, 1, "GREETING".to_string()));
assert_eq!(tokens[9], Token::EqualSign(4, 9));
assert_eq!(tokens[10], Token::Text(4, 10, "Hello, {NAME}!".to_string()));

// line 5
assert_eq!(
tokens[11],
Token::Text(5, 1, "DO_YOU_LIKE_OPEN_SOURCE".to_string())
);
assert_eq!(tokens[12], Token::EqualSign(5, 24));
assert_eq!(tokens[13], Token::Text(5, 25, "True".to_string()));
assert_eq!(tokens[14], Token::HelpMark(5, 29));
assert_eq!(
tokens[15],
Token::Text(5, 32, "Do you like open-source?".to_string())
);

// line 6
assert_eq!(tokens[16], Token::Text(6, 1, "PASSWORD".to_string()));
assert_eq!(tokens[17], Token::EqualSign(6, 9));
assert_eq!(tokens[18], Token::Text(6, 10, "<random:16>".to_string()));
}
}

0 comments on commit 284ca54

Please sign in to comment.