Skip to content

Commit

Permalink
Reorganize parser and lexer to be separate features/modules.
Browse files Browse the repository at this point in the history
  • Loading branch information
vcfxb committed Jul 13, 2024
1 parent 0e086de commit cec24c0
Show file tree
Hide file tree
Showing 26 changed files with 120 additions and 18 deletions.
4 changes: 3 additions & 1 deletion .github/workflows/cargo-check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ jobs:
features:
- none
- std
- source_tracking
- source-tracking
- reporting
- file_memmap
- ast-model
- lexer
- parser
- wright_library_defaults
- wright_binary
Expand Down
4 changes: 3 additions & 1 deletion .github/workflows/cargo-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,11 @@ jobs:
features:
- none
- std
- source_tracking
- source-tracking
- reporting
- file_memmap
- ast-model
- lexer
- parser
- wright_library_defaults
- wright_binary
Expand Down
21 changes: 17 additions & 4 deletions wright/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,12 +55,25 @@ wright_binary = ["wright_library_defaults", "dep:clap"]
# Features and dependencies useful when the wright binary is not being built or used.
wright_library_defaults = ["file_memmap", "parser"]

# Wright's parser depends on the ability to report parsing errors.
# Wright's parser depends on the ability to report parsing errors and construct AST models.
parser = [
"reporting",
"ast-model",
"lexer",
"dep:unicode-ident"
]

# Wright's abstract syntax tree model is built on types from the "source_tracking" module.
ast-model = [
"source-tracking",
# "derive_more/from"
]

# Wright's lexical analyzer is build using types from the "source_tracking" module.
lexer = [
"source-tracking"
]

# Loading memory mapped files from the disk requires memmap2, fs4, and the reporting feature to correctly and efficiently
# read from disk. We also use `anyhow` to make error handling easier.
file_memmap = [
Expand All @@ -73,13 +86,13 @@ file_memmap = [
# Reporting errors requires source tracking, codespan-reporting (for rendering diagnostics), and
# termcolor (for pretty output).
reporting = [
"source_tracking",
"source-tracking",
"dep:termcolor",
"dep:codespan-reporting"
]

# Source tracking requires just a few dependencies and standard library.
source_tracking = [
source-tracking = [
"std",
"dep:dashmap",
"derive_more/display",
Expand Down Expand Up @@ -112,7 +125,7 @@ optional = true

# derive_more is used for allowing us to derive additional traits like From and Display.
# Currently used by features:
# - "source_tracking"
# - "source-tracking"
[dependencies.derive_more]
version = "0.99.18"
default-features = false
Expand Down
2 changes: 1 addition & 1 deletion wright/benches/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use std::sync::Arc;

use criterion::{black_box, criterion_group, criterion_main, Bencher, Criterion};
use wright::{
parser::lexer::Lexer,
lexer::Lexer,
source_tracking::{filename::FileName, source::Source},
};

Expand Down
6 changes: 6 additions & 0 deletions wright/src/ast.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
//! [Abstract syntax tree] modeling.
//!
//! [Abstract syntax tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
pub mod identifier;
pub mod path;
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
14 changes: 14 additions & 0 deletions wright/src/ast/identifier.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
//! [Identifier]s are used throughout wright as variable names, type names, function names, etc.
//! Their modeling is pretty simple, and is defined here.
//!
//! [Identifier]: https://en.wikipedia.org/wiki/Identifier
use crate::source_tracking::fragment::Fragment;

/// Identifiers are used as names for variables, functions, modules, etc.
/// These are defined using [Fragment]s of source code, which will contain the identifier itself.
#[derive(Debug, Clone)]
pub struct Identifier {
/// The fragment of source code containing the identifier.
pub fragment: Fragment,
}
20 changes: 20 additions & 0 deletions wright/src/ast/path.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
//! [Path]s are used in import statements, and can take the place of an [Identifier] in many people.
use crate::source_tracking::fragment::Fragment;
use super::identifier::Identifier;

/// A double-colon separated path/reference to a module/function. This can be used in an `import` declaration and
/// some other places. [Path]s with length of 1 are just [Identifier]s -- [Identifier]s can be considered paths in some
/// instances.
#[derive(Debug, Clone)]
pub struct Path {
/// The [Fragment] of source code containing the full source of this path (including the double-colon separators).
pub full_path: Fragment,

/// The first (left-most) identifier in this [Path]. This can also be considered the "root" of the path --
/// the module that the following item/identifier can be found in.
pub head: Identifier,

/// The rest of the [Path], following the first separator.
pub tail: Option<Box<Path>>
}
File renamed without changes.
File renamed without changes.
1 change: 0 additions & 1 deletion wright/src/parser/lexer.rs → wright/src/lexer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
use self::comments::{try_match_block_comment, try_match_single_line_comment};
use self::integer_literal::try_consume_integer_literal;
use self::quoted::try_consume_quoted_literal;

use crate::source_tracking::fragment::Fragment;
use crate::source_tracking::SourceRef;
use std::iter::FusedIterator;
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ pub fn try_consume_quoted_literal(lexer: &mut Lexer) -> Option<Token> {

#[cfg(test)]
mod tests {
use crate::parser::lexer::{token::TokenTy, Lexer};
use super::super::{token::TokenTy, Lexer};

#[test]
fn string_literal() {
Expand Down
File renamed without changes.
File renamed without changes.
10 changes: 8 additions & 2 deletions wright/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,17 @@ pub mod build_info {
include!(concat!(env!("OUT_DIR"), "/built.rs"));
}

#[cfg(feature = "source-tracking")]
pub mod source_tracking;

#[cfg(feature = "reporting")]
pub mod reporting;

#[cfg(feature = "source_tracking")]
pub mod source_tracking;
#[cfg(feature = "lexer")]
pub mod lexer;

#[cfg(feature = "ast-model")]
pub mod ast;

#[cfg(feature = "parser")]
pub mod parser;
Expand Down
44 changes: 38 additions & 6 deletions wright/src/parser.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,40 @@
//! The wright lexer, parser, and AST representation.
//! This parser module is responsible for turning the stream of [Token]s from the [Lexer] into a tree of [AST] nodes.
//!
//! [AST]: crate::ast
// pub mod error;
// pub mod state;
// pub mod util;
use super::lexer::{token::{Token, TokenTy}, Lexer};

// pub mod ast;
pub mod lexer;
mod identifier;

/// Errors that can arise when parsing a source to an abstract syntax tree node.
#[derive(Debug)]
pub enum ParseError {
/// Expected one type of token, found another
Expected {
/// The expected variant.
expected: TokenTy,
/// The token found from the lexer.
found: Option<Token>,
}
}

/// Trait implemented by all AST nodes that can be parsed.
pub trait Parse: Sized {
/// Attempt to parse a tree node of this type from a given [Lexer].
fn parse(lexer: &mut Lexer) -> Result<Self, ParseError>;
}

impl Lexer {
/// Pull the next token from a lexer, and return an error if it's not of the given variant.
pub fn expect(&mut self, token_ty: TokenTy) -> Result<Token, ParseError> {
let next_token = self
.next_token()
.ok_or(ParseError::Expected { expected: token_ty, found: None })?;

if next_token.variant != token_ty {
return Err(ParseError::Expected { expected: token_ty, found: Some(next_token) });
}

Ok(next_token)
}
}
1 change: 0 additions & 1 deletion wright/src/parser/ast/identifier.rs

This file was deleted.

9 changes: 9 additions & 0 deletions wright/src/parser/identifier.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
use crate::{ast::identifier::Identifier, lexer::{token::TokenTy, Lexer}};
use super::{Parse, ParseError};

impl Parse for Identifier {
fn parse(lexer: &mut Lexer) -> Result<Self, ParseError> {
let ident_token = lexer.expect(TokenTy::Identifier)?;
Ok(Identifier { fragment: ident_token.fragment })
}
}

0 comments on commit cec24c0

Please sign in to comment.