From 8c5381c1d8db3b5bcaa8a64f2c2c8a28fda1c299 Mon Sep 17 00:00:00 2001 From: Eduardo Cuducos <4732915+cuducos@users.noreply.github.com> Date: Wed, 20 Sep 2023 19:18:48 -0400 Subject: [PATCH] Adds basic tokenizer --- src/main.rs | 20 +++++++---- src/model.rs | 27 ++++++++------- src/parser.rs | 90 +++--------------------------------------------- src/reader.rs | 84 ++++++++++++++++++++++++++++++++++++++++++++ src/tokenizer.rs | 72 ++++++++++++++++++++++++++++++++++++++ 5 files changed, 189 insertions(+), 104 deletions(-) create mode 100644 src/reader.rs create mode 100644 src/tokenizer.rs diff --git a/src/main.rs b/src/main.rs index 66e0244..5ee91bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -4,12 +4,15 @@ use std::path::PathBuf; use anyhow::Result; use crate::model::{ - AutoGeneratedVariable, Block, Comment, SimpleVariable, VariableWithRandomValue, Variables, + AutoGeneratedVariable, Block, Comment, SimpleVariable, VariableType, VariableWithRandomValue, }; use crate::parser::Parser; +use crate::tokenizer::Tokenizer; mod model; mod parser; +mod reader; +mod tokenizer; fn main() -> Result<()> { if let Some(path) = args().nth(1) { @@ -17,6 +20,9 @@ fn main() -> Result<()> { for block in parser.parse()? { println!("{block}"); } + + let mut tokenizer = Tokenizer::new(PathBuf::from(&path))?; + println!("{:?}", tokenizer.tokenize()?); return Ok(()); } @@ -34,12 +40,12 @@ fn main() -> Result<()> { let variable6 = AutoGeneratedVariable::new("AUTO_GENERATED", "{ANSWER}-{DEFAULT_VALUE_ONE}"); let variables = vec![ - Variables::Input(variable1), - Variables::Input(variable2), - Variables::Input(variable3), - Variables::Input(variable4), - Variables::Random(variable5), - Variables::AutoGenerated(variable6), + VariableType::Input(variable1), + VariableType::Input(variable2), + VariableType::Input(variable3), + VariableType::Input(variable4), + VariableType::Random(variable5), + VariableType::AutoGenerated(variable6), ]; let block = Block::new(title, description, variables); println!("{block}"); diff --git a/src/model.rs b/src/model.rs index 5a58d44..0491a4d 100644 --- a/src/model.rs +++ b/src/model.rs @@ -142,7 +142,7 @@ impl Variable for VariableWithRandomValue { } } -pub enum Variables { +pub enum VariableType { Input(SimpleVariable), AutoGenerated(AutoGeneratedVariable), Random(VariableWithRandomValue), @@ -151,17 +151,17 @@ pub enum Variables { pub struct Block { pub title: Comment, pub description: Option, - pub variables: Vec, + pub variables: Vec, context: HashMap, } impl Block { - pub fn new(title: Comment, description: Option, variables: Vec) -> Self { + pub fn new(title: Comment, description: Option, variables: Vec) -> Self { let context: HashMap = HashMap::new(); let has_auto_generated_variables = variables .iter() - .any(|v| matches!(v, Variables::AutoGenerated(_))); + .any(|v| matches!(v, VariableType::AutoGenerated(_))); let mut block = Self { title, @@ -173,14 +173,14 @@ impl Block { if has_auto_generated_variables { for variable in &block.variables { match variable { - Variables::Input(var) => block.context.insert(var.key(), var.value()), - Variables::AutoGenerated(_) => None, - Variables::Random(var) => block.context.insert(var.key(), var.value()), + VariableType::Input(var) => block.context.insert(var.key(), var.value()), + VariableType::AutoGenerated(_) => None, + VariableType::Random(var) => block.context.insert(var.key(), var.value()), }; } for variable in &mut block.variables { - if let Variables::AutoGenerated(var) = variable { + if let VariableType::AutoGenerated(var) = variable { var.load_context(&block.context); } } @@ -200,9 +200,9 @@ impl fmt::Display for Block { for variable in &self.variables { match variable { - Variables::Input(var) => lines.push(var.to_string()), - Variables::AutoGenerated(var) => lines.push(var.to_string()), - Variables::Random(var) => lines.push(var.to_string()), + VariableType::Input(var) => lines.push(var.to_string()), + VariableType::AutoGenerated(var) => lines.push(var.to_string()), + VariableType::Random(var) => lines.push(var.to_string()), } } @@ -278,7 +278,10 @@ mod tests { let mut variable1 = SimpleVariable::new("ANSWER", None, None); variable1.user_input("42"); let variable2 = SimpleVariable::new("AS_TEXT", Some("fourty two"), None); - let variables = vec![Variables::Input(variable1), Variables::Input(variable2)]; + let variables = vec![ + VariableType::Input(variable1), + VariableType::Input(variable2), + ]; let block = Block::new(title, description, variables); let got = block.to_string(); assert_eq!(got, "# 42\n# Fourty-two\nANSWER=42\nAS_TEXT=fourty two") diff --git a/src/parser.rs b/src/parser.rs index 66555a8..b51e5ec 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -1,98 +1,18 @@ -use std::{ - fs::File, - io::{BufRead, BufReader}, - path::PathBuf, -}; - use anyhow::{anyhow, Result}; -use crate::model::{Block, Comment}; - -#[derive(PartialEq, Eq)] -pub enum CharType { - Char(char), - Eol, - Eof, -} +use crate::{ + model::{Block, Comment}, + reader::{CharReader, CharType}, +}; const CAPITAL_ASCII_LETTERS: &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; -struct CharReader { - line: usize, - column: usize, - path: String, - current_line: Option, - reader: BufReader, - done: bool, -} - -impl CharReader { - fn new(path: PathBuf) -> Result { - Ok(Self { - line: 0, - column: 0, - path: path.display().to_string(), - current_line: None, - done: false, - reader: BufReader::new(File::open(path)?), - }) - } - - fn error(&self, character: &CharType, details: Option) -> anyhow::Error { - let prefix = format!("{}:{}:{}", self.path, self.line, self.column); - let extra = details.map_or("".to_string(), |msg| format!(": {msg}")); - let token = match &character { - CharType::Char(char) => format!("character `{char}`"), - CharType::Eol => "EOL (end of line)".to_string(), - CharType::Eof => "EOF (end of file)".to_string(), - }; - - anyhow!(format!("{prefix}: Unexpected {token}{extra}")) - } - - fn next(&mut self) -> Result { - if self.done { - return Ok(CharType::Eof); - } - match &self.current_line { - None => { - let mut buffer = "".to_string(); - let size = self.reader.read_line(&mut buffer)?; - if size == 0 { - self.done = true; - return Ok(CharType::Eof); - } - self.current_line = Some(buffer.clone()); - self.line += 1; - self.column = 0; - self.next() - } - Some(line) => match line.chars().nth(self.column) { - Some(char) => match char { - '\n' => { - self.current_line = None; - Ok(CharType::Eol) - } - _ => { - self.column += 1; - Ok(CharType::Char(char)) - } - }, - None => { - self.current_line = None; - Ok(CharType::Eol) - } - }, - } - } -} - pub struct Parser { reader: CharReader, } impl Parser { - pub fn new(path: PathBuf) -> Result { + pub fn new(path: std::path::PathBuf) -> Result { Ok(Self { reader: CharReader::new(path)?, }) diff --git a/src/reader.rs b/src/reader.rs new file mode 100644 index 0000000..23fb99a --- /dev/null +++ b/src/reader.rs @@ -0,0 +1,84 @@ +use std::{ + fs::File, + io::{BufRead, BufReader}, + path::PathBuf, +}; + +use anyhow::{anyhow, Result}; + +#[derive(PartialEq, Eq)] +pub enum CharType { + Char(char), + Eol, + Eof, +} + +pub struct CharReader { + pub line: usize, + pub column: usize, + path: String, + current_line: Option, + reader: BufReader, + done: bool, +} + +impl CharReader { + pub fn new(path: PathBuf) -> Result { + Ok(Self { + line: 0, + column: 0, + path: path.display().to_string(), + current_line: None, + done: false, + reader: BufReader::new(File::open(path)?), + }) + } + + pub fn error(&self, character: &CharType, details: Option) -> anyhow::Error { + let prefix = format!("{}:{}:{}", self.path, self.line, self.column); + let extra = details.map_or("".to_string(), |msg| format!(": {msg}")); + let token = match &character { + CharType::Char(char) => format!("character `{char}`"), + CharType::Eol => "EOL (end of line)".to_string(), + CharType::Eof => "EOF (end of file)".to_string(), + }; + + anyhow!(format!("{prefix}: Unexpected {token}{extra}")) + } + + pub fn next(&mut self) -> Result { + if self.done { + return Ok(CharType::Eof); + } + match &self.current_line { + None => { + let mut buffer = "".to_string(); + let size = self.reader.read_line(&mut buffer)?; + if size == 0 { + self.done = true; + return Ok(CharType::Eof); + } + self.current_line = Some(buffer.clone()); + self.line += 1; + self.column = 0; + self.next() + } + Some(line) => match line.chars().nth(self.column) { + Some(char) => match char { + '\n' => { + self.current_line = None; + Ok(CharType::Eol) + } + _ => { + self.column += 1; + Ok(CharType::Char(char)) + } + }, + None => { + self.current_line = None; + Ok(CharType::Eol) + } + }, + } + } +} diff --git a/src/tokenizer.rs b/src/tokenizer.rs new file mode 100644 index 0000000..12e189c --- /dev/null +++ b/src/tokenizer.rs @@ -0,0 +1,72 @@ +use std::path::PathBuf; + +use anyhow::Result; + +use crate::reader::{CharReader, CharType}; + +#[derive(Debug)] +pub enum Token { + Text(String), + CommentMark, + HelpMark, + EqualSign, +} + +pub struct Tokenizer { + reader: CharReader, +} + +impl Tokenizer { + pub fn new(path: PathBuf) -> Result { + Ok(Self { + reader: CharReader::new(path)?, + }) + } + + fn next_tokens(&mut self) -> Result> { + let mut buffer = "".to_string(); + loop { + let char = self.reader.next()?; + match char { + CharType::Eof => return Ok(vec![]), + CharType::Eol => { + if buffer.is_empty() { + continue; + } + return Ok(vec![Token::Text(buffer.trim().to_string())]); + } + CharType::Char(c) => { + let mut token: Option = None; + if c == '=' { + token = Some(Token::EqualSign); + } else if c == '#' && self.reader.column == 1 { + token = Some(Token::CommentMark); + } else if c == ' ' && buffer.ends_with(" #") { + buffer = buffer.strip_suffix(" #").unwrap_or("").to_string(); + token = Some(Token::HelpMark); + } + if let Some(t) = token { + if buffer.is_empty() { + return Ok(vec![t]); + } + return Ok(vec![Token::Text(buffer.trim().to_string()), t]); + } + buffer.push(c) + } + } + } + } + + // TODO: make iterator? + pub fn tokenize(&mut self) -> Result> { + let mut tokens: Vec = vec![]; + loop { + let new_tokens = self.next_tokens()?; + if new_tokens.is_empty() { + break; + } + tokens.extend(new_tokens); + } + Ok(tokens) + } +}