From cec24c0cb4000dee988346dcfb43b18e9701d0f2 Mon Sep 17 00:00:00 2001
From: Venus Xeon-Blonde <venusflameblonde@gmail.com>
Date: Sat, 13 Jul 2024 14:30:07 -0400
Subject: [PATCH] Reorganize parser and lexer to be separate features/modules.

---
 .github/workflows/cargo-check.yml             |  4 +-
 .github/workflows/cargo-test.yml              |  4 +-
 wright/Cargo.toml                             | 21 +++++++--
 wright/benches/lexer.rs                       |  2 +-
 wright/src/ast.rs                             |  6 +++
 wright/src/{parser/ast.rs => ast/astOld.rs}   |  0
 wright/src/{parser => }/ast/expression.rs     |  0
 .../{parser => }/ast/expression/primary.rs    |  0
 .../ast/expression/primary/integer_literal.rs |  0
 .../ast/expression/primary/parens.rs          |  0
 .../src/{parser => }/ast/expression/unary.rs  |  0
 wright/src/ast/identifier.rs                  | 14 ++++++
 wright/src/ast/path.rs                        | 20 +++++++++
 wright/src/{parser => }/ast/test_utils.rs     |  0
 wright/src/{parser => }/ast/ty.rs             |  0
 wright/src/{parser => }/lexer.rs              |  1 -
 wright/src/{parser => }/lexer/comments.rs     |  0
 wright/src/{parser => }/lexer/identifier.rs   |  0
 .../src/{parser => }/lexer/integer_literal.rs |  0
 wright/src/{parser => }/lexer/quoted.rs       |  2 +-
 wright/src/{parser => }/lexer/token.rs        |  0
 wright/src/{parser => }/lexer/trivial.rs      |  0
 wright/src/lib.rs                             | 10 ++++-
 wright/src/parser.rs                          | 44 ++++++++++++++++---
 wright/src/parser/ast/identifier.rs           |  1 -
 wright/src/parser/identifier.rs               |  9 ++++
 26 files changed, 120 insertions(+), 18 deletions(-)
 create mode 100644 wright/src/ast.rs
 rename wright/src/{parser/ast.rs => ast/astOld.rs} (100%)
 rename wright/src/{parser => }/ast/expression.rs (100%)
 rename wright/src/{parser => }/ast/expression/primary.rs (100%)
 rename wright/src/{parser => }/ast/expression/primary/integer_literal.rs (100%)
 rename wright/src/{parser => }/ast/expression/primary/parens.rs (100%)
 rename wright/src/{parser => }/ast/expression/unary.rs (100%)
 create mode 100644 wright/src/ast/identifier.rs
 create mode 100644 wright/src/ast/path.rs
 rename wright/src/{parser => }/ast/test_utils.rs (100%)
 rename wright/src/{parser => }/ast/ty.rs (100%)
 rename wright/src/{parser => }/lexer.rs (99%)
 rename wright/src/{parser => }/lexer/comments.rs (100%)
 rename wright/src/{parser => }/lexer/identifier.rs (100%)
 rename wright/src/{parser => }/lexer/integer_literal.rs (100%)
 rename wright/src/{parser => }/lexer/quoted.rs (98%)
 rename wright/src/{parser => }/lexer/token.rs (100%)
 rename wright/src/{parser => }/lexer/trivial.rs (100%)
 delete mode 100644 wright/src/parser/ast/identifier.rs
 create mode 100644 wright/src/parser/identifier.rs

diff --git a/.github/workflows/cargo-check.yml b/.github/workflows/cargo-check.yml
index b786cada..98886669 100644
--- a/.github/workflows/cargo-check.yml
+++ b/.github/workflows/cargo-check.yml
@@ -18,9 +18,11 @@ jobs:
         features:
           - none
           - std
-          - source_tracking
+          - source-tracking
           - reporting
           - file_memmap
+          - ast-model
+          - lexer
           - parser
           - wright_library_defaults
           - wright_binary
diff --git a/.github/workflows/cargo-test.yml b/.github/workflows/cargo-test.yml
index 54d4ead2..2d117899 100644
--- a/.github/workflows/cargo-test.yml
+++ b/.github/workflows/cargo-test.yml
@@ -18,9 +18,11 @@ jobs:
         features:
           - none
           - std
-          - source_tracking
+          - source-tracking
           - reporting
           - file_memmap
+          - ast-model
+          - lexer
           - parser
           - wright_library_defaults
           - wright_binary
diff --git a/wright/Cargo.toml b/wright/Cargo.toml
index 6ef648fd..804d0cef 100644
--- a/wright/Cargo.toml
+++ b/wright/Cargo.toml
@@ -55,12 +55,25 @@ wright_binary = ["wright_library_defaults", "dep:clap"]
 # Features and dependencies useful when the wright binary is not being built or used. 
 wright_library_defaults = ["file_memmap", "parser"]
 
-# Wright's parser depends on the ability to report parsing errors.
+# Wright's parser depends on the ability to report parsing errors and construct AST models.
 parser = [
     "reporting",
+    "ast-model",
+    "lexer",
     "dep:unicode-ident"
 ]
 
+# Wright's abstract syntax tree model is built on types from the "source_tracking" module.
+ast-model = [
+    "source-tracking",
+    # "derive_more/from"
+]
+
+# Wright's lexical analyzer is build using types from the "source_tracking" module.
+lexer = [
+    "source-tracking"
+]
+
 # Loading memory mapped files from the disk requires memmap2, fs4, and the reporting feature to correctly and efficiently 
 # read from disk. We also use `anyhow` to make error handling easier.
 file_memmap = [
@@ -73,13 +86,13 @@ file_memmap = [
 # Reporting errors requires source tracking, codespan-reporting (for rendering diagnostics), and 
 # termcolor (for pretty output).
 reporting = [
-    "source_tracking", 
+    "source-tracking", 
     "dep:termcolor",
     "dep:codespan-reporting"
 ]
 
 # Source tracking requires just a few dependencies and standard library. 
-source_tracking = [
+source-tracking = [
     "std",
     "dep:dashmap",
     "derive_more/display",
@@ -112,7 +125,7 @@ optional = true
 
 # derive_more is used for allowing us to derive additional traits like From and Display.
 # Currently used by features: 
-# - "source_tracking"
+# - "source-tracking"
 [dependencies.derive_more]
 version = "0.99.18"
 default-features = false
diff --git a/wright/benches/lexer.rs b/wright/benches/lexer.rs
index 45f2f484..c8dd76f6 100644
--- a/wright/benches/lexer.rs
+++ b/wright/benches/lexer.rs
@@ -4,7 +4,7 @@ use std::sync::Arc;
 
 use criterion::{black_box, criterion_group, criterion_main, Bencher, Criterion};
 use wright::{
-    parser::lexer::Lexer,
+    lexer::Lexer,
     source_tracking::{filename::FileName, source::Source},
 };
 
diff --git a/wright/src/ast.rs b/wright/src/ast.rs
new file mode 100644
index 00000000..a8926ad5
--- /dev/null
+++ b/wright/src/ast.rs
@@ -0,0 +1,6 @@
+//! [Abstract syntax tree] modeling.
+//! 
+//! [Abstract syntax tree]: https://en.wikipedia.org/wiki/Abstract_syntax_tree
+
+pub mod identifier;
+pub mod path;
diff --git a/wright/src/parser/ast.rs b/wright/src/ast/astOld.rs
similarity index 100%
rename from wright/src/parser/ast.rs
rename to wright/src/ast/astOld.rs
diff --git a/wright/src/parser/ast/expression.rs b/wright/src/ast/expression.rs
similarity index 100%
rename from wright/src/parser/ast/expression.rs
rename to wright/src/ast/expression.rs
diff --git a/wright/src/parser/ast/expression/primary.rs b/wright/src/ast/expression/primary.rs
similarity index 100%
rename from wright/src/parser/ast/expression/primary.rs
rename to wright/src/ast/expression/primary.rs
diff --git a/wright/src/parser/ast/expression/primary/integer_literal.rs b/wright/src/ast/expression/primary/integer_literal.rs
similarity index 100%
rename from wright/src/parser/ast/expression/primary/integer_literal.rs
rename to wright/src/ast/expression/primary/integer_literal.rs
diff --git a/wright/src/parser/ast/expression/primary/parens.rs b/wright/src/ast/expression/primary/parens.rs
similarity index 100%
rename from wright/src/parser/ast/expression/primary/parens.rs
rename to wright/src/ast/expression/primary/parens.rs
diff --git a/wright/src/parser/ast/expression/unary.rs b/wright/src/ast/expression/unary.rs
similarity index 100%
rename from wright/src/parser/ast/expression/unary.rs
rename to wright/src/ast/expression/unary.rs
diff --git a/wright/src/ast/identifier.rs b/wright/src/ast/identifier.rs
new file mode 100644
index 00000000..45e01f56
--- /dev/null
+++ b/wright/src/ast/identifier.rs
@@ -0,0 +1,14 @@
+//! [Identifier]s are used throughout wright as variable names, type names, function names, etc.
+//! Their modeling is pretty simple, and is defined here.
+//! 
+//! [Identifier]: https://en.wikipedia.org/wiki/Identifier
+
+use crate::source_tracking::fragment::Fragment;
+
+/// Identifiers are used as names for variables, functions, modules, etc. 
+/// These are defined using [Fragment]s of source code, which will contain the identifier itself.
+#[derive(Debug, Clone)]
+pub struct Identifier {
+    /// The fragment of source code containing the identifier.
+    pub fragment: Fragment,
+}
diff --git a/wright/src/ast/path.rs b/wright/src/ast/path.rs
new file mode 100644
index 00000000..709b5a82
--- /dev/null
+++ b/wright/src/ast/path.rs
@@ -0,0 +1,20 @@
+//! [Path]s are used in import statements, and can take the place of an [Identifier] in many people.
+
+use crate::source_tracking::fragment::Fragment;
+use super::identifier::Identifier;
+
+/// A double-colon separated path/reference to a module/function. This can be used in an `import` declaration and 
+/// some other places. [Path]s with length of 1 are just [Identifier]s -- [Identifier]s can be considered paths in some
+/// instances.
+#[derive(Debug, Clone)]
+pub struct Path {
+    /// The [Fragment] of source code containing the full source of this path (including the double-colon separators). 
+    pub full_path: Fragment,
+
+    /// The first (left-most) identifier in this [Path]. This can also be considered the "root" of the path -- 
+    /// the module that the following item/identifier can be found in.
+    pub head: Identifier,
+    
+    /// The rest of the [Path], following the first separator.
+    pub tail: Option<Box<Path>>
+}
diff --git a/wright/src/parser/ast/test_utils.rs b/wright/src/ast/test_utils.rs
similarity index 100%
rename from wright/src/parser/ast/test_utils.rs
rename to wright/src/ast/test_utils.rs
diff --git a/wright/src/parser/ast/ty.rs b/wright/src/ast/ty.rs
similarity index 100%
rename from wright/src/parser/ast/ty.rs
rename to wright/src/ast/ty.rs
diff --git a/wright/src/parser/lexer.rs b/wright/src/lexer.rs
similarity index 99%
rename from wright/src/parser/lexer.rs
rename to wright/src/lexer.rs
index e8f12db1..388d9e6d 100644
--- a/wright/src/parser/lexer.rs
+++ b/wright/src/lexer.rs
@@ -6,7 +6,6 @@
 use self::comments::{try_match_block_comment, try_match_single_line_comment};
 use self::integer_literal::try_consume_integer_literal;
 use self::quoted::try_consume_quoted_literal;
-
 use crate::source_tracking::fragment::Fragment;
 use crate::source_tracking::SourceRef;
 use std::iter::FusedIterator;
diff --git a/wright/src/parser/lexer/comments.rs b/wright/src/lexer/comments.rs
similarity index 100%
rename from wright/src/parser/lexer/comments.rs
rename to wright/src/lexer/comments.rs
diff --git a/wright/src/parser/lexer/identifier.rs b/wright/src/lexer/identifier.rs
similarity index 100%
rename from wright/src/parser/lexer/identifier.rs
rename to wright/src/lexer/identifier.rs
diff --git a/wright/src/parser/lexer/integer_literal.rs b/wright/src/lexer/integer_literal.rs
similarity index 100%
rename from wright/src/parser/lexer/integer_literal.rs
rename to wright/src/lexer/integer_literal.rs
diff --git a/wright/src/parser/lexer/quoted.rs b/wright/src/lexer/quoted.rs
similarity index 98%
rename from wright/src/parser/lexer/quoted.rs
rename to wright/src/lexer/quoted.rs
index cc2a788e..5cf25fbe 100644
--- a/wright/src/parser/lexer/quoted.rs
+++ b/wright/src/lexer/quoted.rs
@@ -62,7 +62,7 @@ pub fn try_consume_quoted_literal(lexer: &mut Lexer) -> Option<Token> {
 
 #[cfg(test)]
 mod tests {
-    use crate::parser::lexer::{token::TokenTy, Lexer};
+    use super::super::{token::TokenTy, Lexer};
 
     #[test]
     fn string_literal() {
diff --git a/wright/src/parser/lexer/token.rs b/wright/src/lexer/token.rs
similarity index 100%
rename from wright/src/parser/lexer/token.rs
rename to wright/src/lexer/token.rs
diff --git a/wright/src/parser/lexer/trivial.rs b/wright/src/lexer/trivial.rs
similarity index 100%
rename from wright/src/parser/lexer/trivial.rs
rename to wright/src/lexer/trivial.rs
diff --git a/wright/src/lib.rs b/wright/src/lib.rs
index c0b13ed9..cd9f3591 100644
--- a/wright/src/lib.rs
+++ b/wright/src/lib.rs
@@ -36,11 +36,17 @@ pub mod build_info {
     include!(concat!(env!("OUT_DIR"), "/built.rs"));
 }
 
+#[cfg(feature = "source-tracking")]
+pub mod source_tracking;
+
 #[cfg(feature = "reporting")]
 pub mod reporting;
 
-#[cfg(feature = "source_tracking")]
-pub mod source_tracking;
+#[cfg(feature = "lexer")]
+pub mod lexer;
+
+#[cfg(feature = "ast-model")]
+pub mod ast;
 
 #[cfg(feature = "parser")]
 pub mod parser;
diff --git a/wright/src/parser.rs b/wright/src/parser.rs
index 4cfa5954..c101b0a8 100644
--- a/wright/src/parser.rs
+++ b/wright/src/parser.rs
@@ -1,8 +1,40 @@
-//! The wright lexer, parser, and AST representation.
+//! This parser module is responsible for turning the stream of [Token]s from the [Lexer] into a tree of [AST] nodes.
+//! 
+//! [AST]: crate::ast
 
-// pub mod error;
-// pub mod state;
-// pub mod util;
+use super::lexer::{token::{Token, TokenTy}, Lexer};
 
-// pub mod ast;
-pub mod lexer;
+mod identifier;
+
+/// Errors that can arise when parsing a source to an abstract syntax tree node.
+#[derive(Debug)]
+pub enum ParseError {
+    /// Expected one type of token, found another
+    Expected {
+        /// The expected variant.
+        expected: TokenTy,
+        /// The token found from the lexer.
+        found: Option<Token>,
+    }
+}
+
+/// Trait implemented by all AST nodes that can be parsed.
+pub trait Parse: Sized {
+    /// Attempt to parse a tree node of this type from a given [Lexer].
+    fn parse(lexer: &mut Lexer) -> Result<Self, ParseError>;
+}
+
+impl Lexer {
+    /// Pull the next token from a lexer, and return an error if it's not of the given variant.
+    pub fn expect(&mut self, token_ty: TokenTy) -> Result<Token, ParseError> {
+        let next_token = self
+            .next_token()
+            .ok_or(ParseError::Expected { expected: token_ty, found: None })?;
+
+        if next_token.variant != token_ty {
+            return Err(ParseError::Expected { expected: token_ty, found: Some(next_token) });
+        }
+
+        Ok(next_token)
+    }
+}
diff --git a/wright/src/parser/ast/identifier.rs b/wright/src/parser/ast/identifier.rs
deleted file mode 100644
index 0d0386eb..00000000
--- a/wright/src/parser/ast/identifier.rs
+++ /dev/null
@@ -1 +0,0 @@
-//! AST node implementation for parsing identifiers, which can be used to name types, variables, functions, etc.
diff --git a/wright/src/parser/identifier.rs b/wright/src/parser/identifier.rs
new file mode 100644
index 00000000..025b9488
--- /dev/null
+++ b/wright/src/parser/identifier.rs
@@ -0,0 +1,9 @@
+use crate::{ast::identifier::Identifier, lexer::{token::TokenTy, Lexer}};
+use super::{Parse, ParseError};
+
+impl Parse for Identifier {
+    fn parse(lexer: &mut Lexer) -> Result<Self, ParseError> {
+        let ident_token = lexer.expect(TokenTy::Identifier)?;
+        Ok(Identifier { fragment: ident_token.fragment })
+    }
+}