Skip to content

Commit

Permalink
Implement parsing to tokens
Browse files Browse the repository at this point in the history
  • Loading branch information
rozukke committed May 10, 2024
1 parent 914a3f3 commit 0434976
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 34 deletions.
45 changes: 11 additions & 34 deletions src/main.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
#![allow(unused)] // Remove later

use std::{
fs::File,
io::{BufRead, BufReader},
};

use clap::{Parser, Subcommand};
use colored::Colorize;
use parse::FileAndPath;

mod command;
mod ops;
mod parse;
mod state;
mod symbol;
mod token;

/// Lace is a complete compiler and interpreter toolchain for the LC3 assembly language.
#[derive(Parser)]
Expand Down Expand Up @@ -76,35 +76,12 @@ fn main() {
match command {
Subcommands::Run { os, name } => todo!(),
Subcommands::Compile { name, dest } => {
// Parse file into a buffer and symbol table
let file = File::open(&name).unwrap_or_else(|err| {
eprintln!(
"Failed to open file with path {}: {}",
name.bold(),
err.to_string().red()
);
std::process::exit(1)
});

// Process lines and check for wrong file type
let lines = BufReader::new(file)
.lines()
.enumerate()
.map(|(i, line)| {
line.unwrap_or_else(|err| {
eprintln!("Failed to read line {}: {}", i, err.to_string().red());
eprintln!(" --> {}:{}", name, i);
eprintln!(
"Check that you are providing a valid {} file.",
".asm".bold()
);
std::process::exit(1)
})
})
.collect::<Vec<String>>();

// Start parsing lines into symbol table and IR
todo!()
let toks = FileAndPath::open(&name).tokenize_asm();
for line in toks {
println!("\nFirst token: {}", line[0].val);
if line.len() < 2 {continue};
println!("Second token: {} line {} column {}", line[1].val, line[1].line, line[1].col);
}
}
Subcommands::Clean { name } => todo!(),
Subcommands::Watch { name } => todo!(),
Expand Down
77 changes: 77 additions & 0 deletions src/parse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
use crate::token::Token;
use colored::Colorize;
use std::{
fs::File,
io::{BufRead, BufReader, Read},
path::PathBuf,
};

pub struct FileAndPath {
file: File,
path: PathBuf,
}

impl FileAndPath {
pub fn open(path: &str) -> FileAndPath {
let path = PathBuf::from(&path);
let file = File::open(&path).unwrap_or_else(|err| {
eprintln!(
"Failed to open file with path {}: {}",
path.display(),
err.to_string().red()
);
std::process::exit(1)
});

FileAndPath { file, path }
}

pub fn tokenize_asm(&self) -> Vec<Vec<Token>> {
// Process lines and check for wrong file type
let contents = BufReader::new(&self.file)
.lines()
.enumerate()
.map(|(i, line)| {
line.unwrap_or_else(|err| {
eprintln!("Failed to read line {}: {}", i, err.to_string().red());
eprintln!(" --> {}:{}", self.path.display(), i);
eprintln!(
"Check that you are providing a valid {} file.",
".asm".bold()
);
std::process::exit(1)
})
})
.collect::<Vec<String>>();

// Turn lines into a vector that contains a list of tokens for each line
contents
.iter()
.enumerate()
.map(|(i, line)| {
// Get line without comment & whitespace
println!("{}", line);
let sc_idx = if let Some(idx) = line.find(';') {
idx
} else {
line.len()
};
let clean_str = &line[..sc_idx];

// Split on commas and spaces -> vec
let arr_list: Vec<Token> = clean_str
.split(|c| c == ' ' || c == ',')
.filter(|word| !word.is_empty())
.map(|word| Token {
val: word.into(),
line: (i + 1) as u16,
col: unsafe { word.as_ptr().offset_from(line.as_ptr()) + 1 } as u16,
})
.collect::<Vec<Token>>();
arr_list
})
// Filter after iteration to preserve correct line numbers in tokens
.filter(|line| !line.is_empty())
.collect()
}
}
9 changes: 9 additions & 0 deletions src/token.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
/// Represents a single "word" inside the parsed representation of source code.
pub struct Token {
// Value contained inside the token
pub val: String,
// Line number inside the file
pub line: u16,
// Column number inside line
pub col: u16,
}

0 comments on commit 0434976

Please sign in to comment.