From 8c602d832e625a0965701618626166e2ffbd94bb Mon Sep 17 00:00:00 2001 From: Tomas Tauber <2410580+tomtau@users.noreply.github.com> Date: Thu, 24 Nov 2022 21:50:31 +0800 Subject: [PATCH] feature: added a `pest_debugger` crate (fixes #98) (#736) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feature: added a `pest_debugger` crate (fixes #98) based on the old PR by @dragostis: https://github.com/pest-parser/pest/pull/277 Changes that were made: - debugger core context was refactored and extracted to a lib (so that it could be used in other frontends, e.g. editor plugins) - CLI was extended using rustyline helpers to provide file completions, history etc. - applied suggestions from @hansihe from the old PR (https://github.com/pest-parser/pest/pull/277#issuecomment-421838669): 1. added `ba` (add breakpoints at all rules) which is useful for stepping through the entire grammar, plus breakpoint deletions and loading input directly from readline; 2. added command line arguments. - changed the listener function to return a boolean, so that the debugger can signal back to a parsing thread to finish before reaching its input's EOF. Co-authored-by: Tomas Tauber Co-authored-by: Dragoș Tiselice --- Cargo.toml | 1 + codecov.yml | 1 + debugger/Cargo.toml | 26 +++ debugger/_README.md | 1 + debugger/src/lib.rs | 474 ++++++++++++++++++++++++++++++++++++++++++ debugger/src/main.rs | 332 +++++++++++++++++++++++++++++ derive/Cargo.toml | 6 +- generator/Cargo.toml | 6 +- generator/src/lib.rs | 34 +-- grammars/Cargo.toml | 6 +- meta/Cargo.toml | 4 +- meta/src/lib.rs | 22 ++ meta/src/parser.rs | 34 +++ meta/src/validator.rs | 5 +- pest/Cargo.toml | 2 +- release.sh | 2 +- vm/Cargo.toml | 6 +- vm/src/lib.rs | 32 ++- 18 files changed, 943 insertions(+), 51 deletions(-) create mode 100644 debugger/Cargo.toml create mode 120000 debugger/_README.md create mode 100644 debugger/src/lib.rs create mode 100644 debugger/src/main.rs diff --git a/Cargo.toml b/Cargo.toml index 8a0f6885..8b3e8947 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,7 @@ [workspace] members = [ "bootstrap", + "debugger", "derive", "generator", "grammars", diff --git a/codecov.yml b/codecov.yml index c1ddf45b..7fefaea0 100644 --- a/codecov.yml +++ b/codecov.yml @@ -1,6 +1,7 @@ comment: off ignore: + - debugger/src/main.rs - derive/src/lib.rs - pest/examples/parens.rs - pest/src/unicode/* diff --git a/debugger/Cargo.toml b/debugger/Cargo.toml new file mode 100644 index 00000000..4842d540 --- /dev/null +++ b/debugger/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "pest_debugger" +description = "pest grammar debugger" +version = "2.5.0" +edition = "2021" +authors = ["Dragoș Tiselice ", "Tomas Tauber "] +homepage = "https://pest.rs/" +repository = "https://github.com/pest-parser/pest" +documentation = "https://docs.rs/pest" +keywords = ["pest", "grammar", "debugger"] +categories = ["parsing"] +license = "MIT/Apache-2.0" +readme = "_README.md" +rust-version = "1.56" + +[dependencies] +pest = { path = "../pest", version = "2.5.0" } +pest_meta = { path = "../meta", version = "2.5.0" } +pest_vm = { path = "../vm", version = "2.5.0" } +rustyline = "10" +thiserror = "1" + +[badges] +codecov = { repository = "pest-parser/pest" } +maintenance = { status = "actively-developed" } +github = { repository = "pest-parser/pest" } \ No newline at end of file diff --git a/debugger/_README.md b/debugger/_README.md new file mode 120000 index 00000000..32d46ee8 --- /dev/null +++ b/debugger/_README.md @@ -0,0 +1 @@ +../README.md \ No newline at end of file diff --git a/debugger/src/lib.rs b/debugger/src/lib.rs new file mode 100644 index 00000000..e9e4c7d0 --- /dev/null +++ b/debugger/src/lib.rs @@ -0,0 +1,474 @@ +// pest. The Elegant Parser +// Copyright (c) 2018-2022 Dragoș Tiselice, Tomas Tauber +// +// Licensed under the Apache License, Version 2.0 +// or the MIT +// license , at your +// option. All files in the project carrying such notice may not be copied, +// modified, or distributed except according to those terms. +//! # pest debugger +//! +//! This crate contains definitions for the debugger. +//! A sample CLI-based debugger is available in `main.rs`. +//! Other debugger frontends can be implemented using this +//! crate's `DebuggerContext`: +//! +//! ``` +//! use pest_debugger::DebuggerContext; +//! use std::sync::mpsc::sync_channel; +//! let mut context = DebuggerContext::default(); +//! +//! context +//! .load_grammar_direct( +//! "testgrammar", +//! r#"alpha = { 'a'..'z' | 'A'..'Z' } +//! digit = { '0'..'9' } +//! +//! ident = { !digit ~ (alpha | digit)+ } +//! +//! ident_list = _{ ident ~ (" " ~ ident)* }"#, +//! ).expect("Error: failed to load grammar"); +//! context.load_input_direct("test test2".to_owned()); +//! +//! let (sender, receiver) = sync_channel(1); +//! +//! context.add_breakpoint("ident".to_owned()); +//! for b in context.list_breakpoints().iter() { +//! println!("Breakpoint: {}", b); +//! } +//! context +//! .run("ident_list", sender) +//! .expect("Error: failed to run rule"); +//! +//! let event = receiver.recv().expect("Error: failed to receive event"); +//! println!("Received a debugger event: {:?}", event); +//! +//! context.cont().expect("Error: failed to continue"); +//! +//! let event = receiver.recv().expect("Error: failed to receive event"); +//! println!("Received a debugger event: {:?}", event); +//! ``` +//! ## Current Limitations +//! - relies on OS threads instead of stack-full generators +//! - only shows position from the `ParserState` when it reaches a breakpoint +//! - no way to run another rule from a breakpoint, only from the start +#![doc( + html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg", + html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg" +)] +#![warn(missing_docs, rust_2018_idioms, unused_qualifications)] +use std::{ + collections::HashSet, + fs::File, + io::{self, Read}, + path::PathBuf, + sync::{ + atomic::{AtomicBool, Ordering}, + mpsc::SyncSender as Sender, + Arc, Mutex, + }, + thread::{self, JoinHandle}, +}; + +use pest::{error::Error, Position}; +use pest_meta::{ + optimizer::OptimizedRule, + parse_and_optimize, + parser::{rename_meta_rule, Rule}, +}; +use pest_vm::Vm; + +/// Possible errors that can occur in the debugger context. +#[derive(Debug, thiserror::Error)] +pub enum DebuggerError { + /// Errors from opening files etc. + #[error("I/O error: {0}")] + Io(#[from] io::Error), + /// When a filename can't be extracted from a grammar path. + #[error("Missing filename")] + MissingFilename, + /// Running a debugger requires a grammar to be provided. + #[error("Open grammar first")] + GrammarNotOpened, + /// Running a debugger requires a parsing input to be provided. + #[error("Open input first")] + InputNotOpened, + /// Continuing a debugger session requires starting a session by running a rule. + #[error("Run rule first")] + RunRuleFirst, + /// Parsing finished (i.e. cannot continue the session). + #[error("End-of-input reached")] + EofReached, + /// Can't create a `Position` in a given input. + #[error("Invalid position: {0}")] + InvalidPosition(usize), + /// The provided grammar is invalid. + /// The first element contains a formatted error message. + /// The second element (`Vec`) contains the errors. + #[error("Grammar error: {0}")] + IncorrectGrammar(String, Vec>), + /// When restarting a session, the previous session + /// seem to have panicked. + #[error("Previous parsing execution panic: {0}")] + PreviousRunPanic(String), +} + +/// Events that are sent from the debugger. +#[derive(Debug, PartialEq, Eq)] +pub enum DebuggerEvent { + /// A breakpoint encountered. + /// The first element is the rule name. + /// The second element is the position. + Breakpoint(String, usize), + /// The end of the input has been reached. + Eof, + /// A parsing error encountered. + Error(String), +} + +/// Debugger for pest grammars. +pub struct DebuggerContext { + handle: Option>, + is_done: Arc, + grammar: Option>, + input: Option, + breakpoints: Arc>>, +} + +const POISONED_LOCK_PANIC: &str = "poisoned lock"; +const CHANNEL_CLOSED_PANIC: &str = "channel closed"; + +impl DebuggerContext { + fn file_to_string(path: &PathBuf) -> Result { + let mut file = File::open(path)?; + + let mut string = String::new(); + file.read_to_string(&mut string)?; + + Ok(string) + } + + /// Loads a grammar from a file. + pub fn load_grammar(&mut self, path: &PathBuf) -> Result<(), DebuggerError> { + let grammar = DebuggerContext::file_to_string(path)?; + + let file_name = path + .file_name() + .map(|string| string.to_string_lossy().into_owned()) + .ok_or(DebuggerError::MissingFilename)?; + + self.grammar = Some(DebuggerContext::parse_grammar(&file_name, &grammar)?); + + Ok(()) + } + + /// Loads a grammar from a string. + pub fn load_grammar_direct( + &mut self, + grammar_name: &str, + grammar: &str, + ) -> Result<(), DebuggerError> { + self.grammar = Some(DebuggerContext::parse_grammar(grammar_name, grammar)?); + + Ok(()) + } + + /// Loads a parsing input from a file. + pub fn load_input(&mut self, path: &PathBuf) -> Result<(), DebuggerError> { + let input = DebuggerContext::file_to_string(path)?; + + self.input = Some(input); + + Ok(()) + } + + /// Loads a parsing input from a string. + pub fn load_input_direct(&mut self, input: String) { + self.input = Some(input); + } + + /// Adds all grammar rules as breakpoints. + /// This is useful for stepping through the entire parsing process. + /// It returns an error if the grammar hasn't been loaded yet. + pub fn add_all_rules_breakpoints(&mut self) -> Result<(), DebuggerError> { + let ast = self + .grammar + .as_ref() + .ok_or(DebuggerError::GrammarNotOpened)?; + let mut breakpoints = self.breakpoints.lock().expect(POISONED_LOCK_PANIC); + for rule in ast { + breakpoints.insert(rule.name.clone()); + } + + Ok(()) + } + + /// Adds a rule to breakpoints. + pub fn add_breakpoint(&mut self, rule: String) { + let mut breakpoints = self.breakpoints.lock().expect(POISONED_LOCK_PANIC); + + breakpoints.insert(rule); + } + + /// Removes a rule from breakpoints. + pub fn delete_breakpoint(&mut self, rule: &str) { + let mut breakpoints = self.breakpoints.lock().expect(POISONED_LOCK_PANIC); + + breakpoints.remove(rule); + } + + /// Removes all breakpoints. + pub fn delete_all_breakpoints(&mut self) { + let mut breakpoints = self.breakpoints.lock().expect(POISONED_LOCK_PANIC); + + breakpoints.clear(); + } + + /// Returns a list of all breakpoints. + pub fn list_breakpoints(&self) -> Vec { + let breakpoints = self.breakpoints.lock().expect(POISONED_LOCK_PANIC); + let mut breakpoints: Vec<_> = breakpoints.iter().map(ToOwned::to_owned).collect(); + breakpoints.sort(); + breakpoints + } + + fn handle( + &self, + ast: Vec, + rule: String, + input: String, + sender: Sender, + ) -> JoinHandle<()> { + let breakpoints = Arc::clone(&self.breakpoints); + let is_done = Arc::clone(&self.is_done); + let is_done_signal = Arc::clone(&self.is_done); + + let rsender = sender.clone(); + thread::spawn(move || { + let vm = Vm::new_with_listener( + ast, + Box::new(move |rule, pos| { + if is_done_signal.load(Ordering::SeqCst) { + return true; + } + let lock = breakpoints.lock().expect(POISONED_LOCK_PANIC); + + if lock.contains(&rule) { + rsender + .send(DebuggerEvent::Breakpoint(rule, pos.pos())) + .expect(CHANNEL_CLOSED_PANIC); + + thread::park(); + } + false + }), + ); + + match vm.parse(&rule, &input) { + Ok(_) => sender.send(DebuggerEvent::Eof).expect(CHANNEL_CLOSED_PANIC), + Err(error) => sender + .send(DebuggerEvent::Error(error.to_string())) + .expect(CHANNEL_CLOSED_PANIC), + }; + + is_done.store(true, Ordering::SeqCst); + }) + } + + fn parse_grammar(file_name: &str, grammar: &str) -> Result, DebuggerError> { + match parse_and_optimize(grammar) { + Ok((_, ast)) => Ok(ast), + Err(errors) => { + let msg = format!( + "error parsing {:?}\n\n{}", + file_name, + errors + .iter() + .cloned() + .map(|error| format!("{}", error.renamed_rules(rename_meta_rule))) + .collect::>() + .join("\n") + ); + Err(DebuggerError::IncorrectGrammar(msg, errors)) + } + } + } + + /// Starts a debugger session: runs a rule on an input and stops at breakpoints. + /// When the debugger is stopped, an event is sent to the channel using `sender`. + /// The debugger can be resumed by calling `cont`. + /// This naturally returns errors if the grammar or input haven't been loaded yet etc. + pub fn run(&mut self, rule: &str, sender: Sender) -> Result<(), DebuggerError> { + if let Some(handle) = self.handle.take() { + if !(self.is_done.load(Ordering::Relaxed)) { + self.is_done.store(true, Ordering::SeqCst); + handle.thread().unpark(); + } + handle + .join() + .map_err(|e| DebuggerError::PreviousRunPanic(format!("{:?}", e)))?; + } + + self.is_done.store(false, Ordering::SeqCst); + let ast = self + .grammar + .as_ref() + .ok_or(DebuggerError::GrammarNotOpened)?; + match self.input { + Some(ref input) => { + let rule = rule.to_owned(); + let input = input.clone(); + + self.handle = Some(self.handle(ast.clone(), rule, input, sender)); + Ok(()) + } + None => Err(DebuggerError::InputNotOpened), + } + } + + /// Continue the debugger session from the breakpoint. + /// It returns an error if the session finished or wasn't started yet. + pub fn cont(&self) -> Result<(), DebuggerError> { + if self.is_done.load(Ordering::SeqCst) { + return Err(DebuggerError::EofReached); + } + + match self.handle { + Some(ref handle) => { + handle.thread().unpark(); + Ok(()) + } + None => Err(DebuggerError::RunRuleFirst), + } + } + + /// Returns a `Position` from the loaded input. + pub fn get_position(&self, pos: usize) -> Result, DebuggerError> { + match self.input { + Some(ref input) => Position::new(input, pos).ok_or(DebuggerError::InvalidPosition(pos)), + None => Err(DebuggerError::InputNotOpened), + } + } +} + +impl Default for DebuggerContext { + fn default() -> Self { + Self { + handle: None, + is_done: Arc::new(AtomicBool::new(false)), + grammar: None, + input: None, + breakpoints: Arc::new(Mutex::new(HashSet::new())), + } + } +} + +#[cfg(test)] +mod test { + use super::*; + use std::sync::mpsc::sync_channel; + + fn get_test_context() -> DebuggerContext { + let mut context = DebuggerContext::default(); + + context + .load_grammar_direct( + "testgrammar", + r#"alpha = { 'a'..'z' | 'A'..'Z' } + digit = { '0'..'9' } + + ident = { !digit ~ (alpha | digit)+ } + + ident_list = _{ ident ~ (" " ~ ident)* }"#, + ) + .expect("Error: failed to load grammar"); + context.load_input_direct("test test2".to_owned()); + context + } + + #[test] + fn test_full_flow() { + let mut context = get_test_context(); + + let (sender, receiver) = sync_channel(1); + + assert_eq!(context.list_breakpoints().len(), 0); + context.add_breakpoint("ident".to_owned()); + assert_eq!(context.list_breakpoints().len(), 1); + context + .run("ident_list", sender) + .expect("Error: failed to run rule"); + + let event = receiver.recv().expect("Error: failed to receive event"); + assert_eq!(event, DebuggerEvent::Breakpoint("ident".to_owned(), 0)); + + context.cont().expect("Error: failed to continue"); + + let event = receiver.recv().expect("Error: failed to receive event"); + assert_eq!(event, DebuggerEvent::Breakpoint("ident".to_owned(), 5)); + context.cont().expect("Error: failed to continue"); + let event = receiver.recv().expect("Error: failed to receive event"); + + assert_eq!(event, DebuggerEvent::Eof); + context + .add_all_rules_breakpoints() + .expect("grammar is loaded"); + assert_eq!(context.list_breakpoints().len(), 4); + context.delete_breakpoint("ident"); + assert_eq!(context.list_breakpoints().len(), 3); + context.delete_all_breakpoints(); + assert_eq!(context.list_breakpoints().len(), 0); + } + + #[test] + fn test_restart() { + let mut context = get_test_context(); + + let (sender, receiver) = sync_channel(1); + + assert_eq!(context.list_breakpoints().len(), 0); + context.add_breakpoint("ident".to_owned()); + assert_eq!(context.list_breakpoints().len(), 1); + context + .run("ident_list", sender) + .expect("Error: failed to run rule"); + + let event = receiver.recv().expect("Error: failed to receive event"); + assert_eq!(event, DebuggerEvent::Breakpoint("ident".to_owned(), 0)); + let (sender2, receiver2) = sync_channel(1); + + context + .run("ident_list", sender2) + .expect("Error: failed to run rule"); + let event = receiver2.recv().expect("Error: failed to receive event"); + assert_eq!(event, DebuggerEvent::Breakpoint("ident".to_owned(), 0)); + } + + #[test] + pub fn test_errors() { + let mut context = DebuggerContext::default(); + + assert!(context.load_input(&PathBuf::from(".")).is_err()); + let pest_readme = PathBuf::from(concat!(env!("CARGO_MANIFEST_DIR"), "/../README.md")); + let pest_grammar = PathBuf::from(concat!( + env!("CARGO_MANIFEST_DIR"), + "/../meta/src/grammar.pest" + )); + + assert!(context.load_grammar(&pest_readme).is_err()); + assert!(context.add_all_rules_breakpoints().is_err()); + assert!(context.cont().is_err()); + assert!(context.run("rule", sync_channel(1).0).is_err()); + assert!(context.load_grammar(&pest_grammar).is_ok()); + assert!(context.run("rule", sync_channel(1).0).is_err()); + assert!(context.get_position(0).is_err()); + context.load_input_direct("".to_owned()); + assert!(context.get_position(0).is_ok()); + assert!(context.get_position(1).is_err()); + assert!(context.load_input(&pest_grammar).is_ok()); + let (sender, _receiver) = sync_channel(1); + assert!(context.run("ANY", sender).is_ok()); + while context.cont().is_ok() {} + assert!(context.cont().is_err()); + } +} diff --git a/debugger/src/main.rs b/debugger/src/main.rs new file mode 100644 index 00000000..59945542 --- /dev/null +++ b/debugger/src/main.rs @@ -0,0 +1,332 @@ +// pest. The Elegant Parser +// Copyright (c) 2018-2022 Dragoș Tiselice, Tomas Tauber +// +// Licensed under the Apache License, Version 2.0 +// or the MIT +// license , at your +// option. All files in the project carrying such notice may not be copied, +// modified, or distributed except according to those terms. +//! # pest debugger +//! +//! This crate contains the CLI debugger. + +#![doc( + html_logo_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg", + html_favicon_url = "https://raw.githubusercontent.com/pest-parser/pest/master/pest-logo.svg" +)] +#![warn(missing_docs, rust_2018_idioms, unused_qualifications)] +use std::path::PathBuf; +use std::sync::mpsc::{self, Receiver}; +use std::time::Duration; + +use pest::error::{Error, ErrorVariant}; + +use pest_debugger::{DebuggerContext, DebuggerError, DebuggerEvent}; +use rustyline::completion::{Completer, FilenameCompleter, Pair}; +use rustyline::error::ReadlineError; +use rustyline::highlight::Highlighter; +use rustyline::hint::{Hinter, HistoryHinter}; +use rustyline::validate::Validator; +use rustyline::{Editor, Helper}; + +const VERSION: &str = env!("CARGO_PKG_VERSION"); + +#[derive(Default)] +struct Cli { + context: DebuggerContext, + receiver: Option>, +} + +impl Cli { + fn grammar(&mut self, path: PathBuf) -> Result<(), DebuggerError> { + self.context.load_grammar(&path) + } + + fn input(&mut self, path: PathBuf) -> Result<(), DebuggerError> { + self.context.load_input(&path) + } + + fn breakpoint(&mut self, rule: &str) { + self.context.add_breakpoint(rule.to_owned()); + } + + fn run(&mut self, rule: &str) -> Result<(), DebuggerError> { + let (sender, receiver) = mpsc::sync_channel(1); + let rec = &receiver; + self.context.run(rule, sender)?; + match rec.recv_timeout(Duration::from_secs(5)) { + Ok(DebuggerEvent::Breakpoint(rule, pos)) => { + let error: Error<()> = Error::new_from_pos( + ErrorVariant::CustomError { + message: format!("parsing {}", rule), + }, + self.context.get_position(pos)?, + ); + println!("{}", error); + } + Ok(DebuggerEvent::Eof) => println!("end-of-input reached"), + Ok(DebuggerEvent::Error(error)) => println!("{}", error), + Err(_) => eprintln!("parsing timed out"), + } + self.receiver = Some(receiver); + Ok(()) + } + + fn cont(&mut self) -> Result<(), DebuggerError> { + self.context.cont()?; + + match self.receiver { + Some(ref rec) => match rec.recv_timeout(Duration::from_secs(5)) { + Ok(DebuggerEvent::Breakpoint(rule, pos)) => { + let error: Error<()> = Error::new_from_pos( + ErrorVariant::CustomError { + message: format!("parsing {}", rule), + }, + self.context.get_position(pos)?, + ); + println!("{}", error); + } + Ok(DebuggerEvent::Eof) => println!("end-of-input reached"), + Ok(DebuggerEvent::Error(error)) => println!("{}", error), + Err(_) => eprintln!("parsing timed out"), + }, + None => println!("Error: run rule first"), + }; + Ok(()) + } + + fn list(&mut self) { + let breakpoints = self.context.list_breakpoints(); + + println!("Breakpoints: {}", breakpoints.join(", ")); + } + + fn help() { + println!( + "\n\ + Use the following commands:\n\ + g - load .pest grammar\n\ + i - load input from a file\n\ + id - load input directly from a single-line input\n\ + ba - add breakpoints at all rules\n\ + b - add a breakpoint at a rule\n\ + d - delete a breakpoint at a rule\n\ + da - delete all breakpoints\n\ + r - run a rule\n\ + c - continue\n\ + l - list breakpoints\n\ + h - help\n\ + " + ); + } + + fn unrecognized(command: &str) { + println!("Unrecognized command: {}; use h for help", command); + } + + fn execute_command(&mut self, command: &str) -> Result<(), DebuggerError> { + match command { + "" => (), + "h" => Cli::help(), + "l" => self.list(), + "c" => self.cont()?, + "da" => self.context.add_all_rules_breakpoints()?, + "ba" => self.context.delete_all_breakpoints(), + x if x.starts_with("g ") => self.grammar(PathBuf::from(&x[2..]))?, + x if x.starts_with("i ") => self.input(PathBuf::from(&x[2..]))?, + x if x.starts_with("id ") => { + let input = &x[3..]; + self.context.load_input_direct(input.to_owned()); + } + x if x.starts_with("b ") => self.breakpoint(&x[2..]), + x if x.starts_with("d ") => { + self.context.delete_breakpoint(&x[2..]); + } + x if x.starts_with("r ") => self.run(&x[2..])?, + x => Cli::unrecognized(x), + }; + Ok(()) + } +} + +struct CliHelper { + completer: FilenameCompleter, + hinter: HistoryHinter, +} + +impl Validator for CliHelper {} +impl Highlighter for CliHelper {} +impl Helper for CliHelper {} + +impl Hinter for CliHelper { + type Hint = String; + + fn hint(&self, line: &str, pos: usize, ctx: &rustyline::Context<'_>) -> Option { + self.hinter.hint(line, pos, ctx) + } +} + +impl Completer for CliHelper { + type Candidate = Pair; + + fn complete( + &self, + line: &str, + pos: usize, + _ctx: &rustyline::Context<'_>, + ) -> rustyline::Result<(usize, Vec)> { + self.completer.complete_path(line, pos) + } +} + +struct CliArgs { + grammar_file: Option, + input_file: Option, + rule: Option, + breakpoint: Option, + session_file: Option, +} + +impl Default for CliArgs { + fn default() -> Self { + let mut result = Self { + grammar_file: None, + input_file: None, + rule: None, + breakpoint: None, + session_file: None, + }; + let args = std::env::args(); + let mut iter = args.skip(1); + while let Some(arg) = iter.next() { + match arg.as_str() { + "-g" | "--grammar" => { + if let Some(grammar_file) = iter.next() { + result.grammar_file = Some(PathBuf::from(grammar_file)); + } else { + eprintln!("Error: missing grammar file"); + std::process::exit(1); + } + } + "-i" | "--input" => { + if let Some(input_file) = iter.next() { + result.input_file = Some(PathBuf::from(input_file)); + } else { + eprintln!("Error: missing input file"); + std::process::exit(1); + } + } + "-r" | "--rule" => { + if let Some(rule) = iter.next() { + result.rule = Some(rule); + } else { + eprintln!("Error: missing rule"); + std::process::exit(1); + } + } + "-b" | "--breakpoint" => { + if let Some(breakpoint) = iter.next() { + result.breakpoint = Some(breakpoint); + } else { + eprintln!("Error: missing breakpoint"); + std::process::exit(1); + } + } + "-s" | "--session" => { + if let Some(session_file) = iter.next() { + result.session_file = Some(PathBuf::from(session_file)); + } else { + eprintln!("Error: missing session file"); + std::process::exit(1); + } + } + "-h" | "--help" => { + println!( + "\n\ + Usage: pest_debugger [options]\n\ + \n\ + Options:\n\ + -g, --grammar - load .pest grammar\n\ + -i, --input - load input file\n\ + -r, --rule - run rule\n\ + -b, --breakpoint - breakpoint at rule\n\ + -s, --session - load session history file\n\ + -h, --help - print this help menu\n\ + " + ); + std::process::exit(0); + } + _ => unreachable!(), + } + } + result + } +} + +impl CliArgs { + fn init(self, context: &mut Cli) { + if let Some(grammar_file) = self.grammar_file { + if let Err(e) = context.grammar(grammar_file) { + eprintln!("Error: {}", e); + } + } + if let Some(input_file) = self.input_file { + if let Err(e) = context.input(input_file) { + eprintln!("Error: {}", e); + } + } + if let Some(breakpoint) = &self.breakpoint { + context.breakpoint(breakpoint); + } + if let Some(rule) = self.rule { + if let Err(e) = context.run(&rule) { + eprintln!("Error: {}", e); + } + } + } +} + +fn main() -> rustyline::Result<()> { + let mut rl = Editor::::new()?; + let mut context = Cli::default(); + let cli_args = CliArgs::default(); + + let h = CliHelper { + completer: FilenameCompleter::new(), + hinter: HistoryHinter {}, + }; + rl.set_helper(Some(h)); + println!("pest_debugger v{}\n", VERSION); + let historyfile = if let Some(session_file) = &cli_args.session_file { + if let Err(e) = rl.load_history(session_file) { + eprintln!("Error loading history file: {}", e); + } + Some(session_file.clone()) + } else { + None + }; + cli_args.init(&mut context); + loop { + match rl.readline("> ") { + Ok(line) => { + rl.add_history_entry(line.clone()); + if let Err(err) = context.execute_command(line.trim()) { + println!("Error: {}", err); + } + } + Err(ReadlineError::Interrupted) | Err(ReadlineError::Eof) => { + break; + } + Err(err) => { + println!("Error: {:?}", err); + break; + } + } + } + if let Some(historyfile) = historyfile { + if let Err(e) = rl.save_history(&historyfile) { + eprintln!("Error saving history file: {}", e); + } + } + Ok(()) +} diff --git a/derive/Cargo.toml b/derive/Cargo.toml index 18c448b0..addb3c86 100644 --- a/derive/Cargo.toml +++ b/derive/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_derive" description = "pest's derive macro" -version = "2.4.1" +version = "2.5.0" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -23,5 +23,5 @@ std = ["pest/std", "pest_generator/std"] [dependencies] # for tests, included transitively anyway -pest = { path = "../pest", version = "2.4.1", default-features = false } -pest_generator = { path = "../generator", version = "2.4.1", default-features = false } +pest = { path = "../pest", version = "2.5.0", default-features = false } +pest_generator = { path = "../generator", version = "2.5.0", default-features = false } diff --git a/generator/Cargo.toml b/generator/Cargo.toml index ddde3ed0..621fb685 100644 --- a/generator/Cargo.toml +++ b/generator/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_generator" description = "pest code generator" -version = "2.4.1" +version = "2.5.0" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -18,8 +18,8 @@ default = ["std"] std = ["pest/std"] [dependencies] -pest = { path = "../pest", version = "2.4.1", default-features = false } -pest_meta = { path = "../meta", version = "2.4.1" } +pest = { path = "../pest", version = "2.5.0", default-features = false } +pest_meta = { path = "../meta", version = "2.5.0" } proc-macro2 = "1.0" quote = "1.0" syn = "1.0" diff --git a/generator/src/lib.rs b/generator/src/lib.rs index 770a7d33..2a1203e4 100644 --- a/generator/src/lib.rs +++ b/generator/src/lib.rs @@ -33,7 +33,7 @@ use syn::{Attribute, DeriveInput, Generics, Ident, Lit, Meta}; mod macros; mod generator; -use pest_meta::parser::{self, Rule}; +use pest_meta::parser::{self, rename_meta_rule, Rule}; use pest_meta::{optimizer, unwrap_or_report, validator}; /// Processes the derive/proc macro input and generates the corresponding parser based @@ -77,37 +77,7 @@ pub fn derive_parser(input: TokenStream, include_grammar: bool) -> TokenStream { let pairs = match parser::parse(Rule::grammar_rules, &data) { Ok(pairs) => pairs, - Err(error) => panic!( - "error parsing \n{}", - error.renamed_rules(|rule| match *rule { - Rule::grammar_rule => "rule".to_owned(), - Rule::_push => "PUSH".to_owned(), - Rule::assignment_operator => "`=`".to_owned(), - Rule::silent_modifier => "`_`".to_owned(), - Rule::atomic_modifier => "`@`".to_owned(), - Rule::compound_atomic_modifier => "`$`".to_owned(), - Rule::non_atomic_modifier => "`!`".to_owned(), - Rule::opening_brace => "`{`".to_owned(), - Rule::closing_brace => "`}`".to_owned(), - Rule::opening_brack => "`[`".to_owned(), - Rule::closing_brack => "`]`".to_owned(), - Rule::opening_paren => "`(`".to_owned(), - Rule::positive_predicate_operator => "`&`".to_owned(), - Rule::negative_predicate_operator => "`!`".to_owned(), - Rule::sequence_operator => "`&`".to_owned(), - Rule::choice_operator => "`|`".to_owned(), - Rule::optional_operator => "`?`".to_owned(), - Rule::repeat_operator => "`*`".to_owned(), - Rule::repeat_once_operator => "`+`".to_owned(), - Rule::comma => "`,`".to_owned(), - Rule::closing_paren => "`)`".to_owned(), - Rule::quote => "`\"`".to_owned(), - Rule::insensitive_string => "`^`".to_owned(), - Rule::range_operator => "`..`".to_owned(), - Rule::single_quote => "`'`".to_owned(), - other_rule => format!("{:?}", other_rule), - }) - ), + Err(error) => panic!("error parsing \n{}", error.renamed_rules(rename_meta_rule)), }; let defaults = unwrap_or_report(validator::validate_pairs(pairs.clone())); diff --git a/grammars/Cargo.toml b/grammars/Cargo.toml index 474e021f..807b34bc 100644 --- a/grammars/Cargo.toml +++ b/grammars/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_grammars" description = "pest popular grammar implementations" -version = "2.4.1" +version = "2.5.0" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -14,8 +14,8 @@ readme = "_README.md" rust-version = "1.56" [dependencies] -pest = { path = "../pest", version = "2.4.1" } -pest_derive = { path = "../derive", version = "2.4.1" } +pest = { path = "../pest", version = "2.5.0" } +pest_derive = { path = "../derive", version = "2.5.0" } [dev-dependencies] criterion = "0.3" diff --git a/meta/Cargo.toml b/meta/Cargo.toml index 7ebab384..b400f53c 100644 --- a/meta/Cargo.toml +++ b/meta/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_meta" description = "pest meta language parser and validator" -version = "2.4.1" +version = "2.5.0" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -16,7 +16,7 @@ include = ["Cargo.toml", "src/**/*", "src/grammar.rs", "_README.md", "LICENSE-*" rust-version = "1.56" [dependencies] -pest = { path = "../pest", version = "2.4.1" } +pest = { path = "../pest", version = "2.5.0" } once_cell = "1.8.0" [build-dependencies] diff --git a/meta/src/lib.rs b/meta/src/lib.rs index 7212ee80..e1cb9724 100644 --- a/meta/src/lib.rs +++ b/meta/src/lib.rs @@ -22,6 +22,8 @@ extern crate pest; use std::fmt::Display; +use pest::error::Error; + pub mod ast; pub mod optimizer; pub mod parser; @@ -46,6 +48,26 @@ where }) } +/// A tuple returned by the validation and processing of the parsed grammar. +/// The first element is the vector of used builtin rule names, +/// the second element is the vector of optimized rules. +type UsedBuiltinAndOptimized<'i> = (Vec<&'i str>, Vec); + +/// Parses, validates, processes and optimizes the provided grammar. +pub fn parse_and_optimize( + grammar: &str, +) -> Result, Vec>> { + let pairs = match parser::parse(parser::Rule::grammar_rules, grammar) { + Ok(pairs) => Ok(pairs), + Err(error) => Err(vec![error]), + }?; + + let defaults = validator::validate_pairs(pairs.clone())?; + let ast = parser::consume_rules(pairs)?; + + Ok((defaults, optimizer::optimize(ast))) +} + #[doc(hidden)] pub static UNICODE_PROPERTY_NAMES: &[&str] = &[ /* BINARY */ diff --git a/meta/src/parser.rs b/meta/src/parser.rs index a0eb951c..fc0224b3 100644 --- a/meta/src/parser.rs +++ b/meta/src/parser.rs @@ -213,6 +213,40 @@ pub fn consume_rules(pairs: Pairs<'_, Rule>) -> Result, Vec String { + match *rule { + Rule::grammar_rule => "rule".to_owned(), + Rule::_push => "PUSH".to_owned(), + Rule::assignment_operator => "`=`".to_owned(), + Rule::silent_modifier => "`_`".to_owned(), + Rule::atomic_modifier => "`@`".to_owned(), + Rule::compound_atomic_modifier => "`$`".to_owned(), + Rule::non_atomic_modifier => "`!`".to_owned(), + Rule::opening_brace => "`{`".to_owned(), + Rule::closing_brace => "`}`".to_owned(), + Rule::opening_brack => "`[`".to_owned(), + Rule::closing_brack => "`]`".to_owned(), + Rule::opening_paren => "`(`".to_owned(), + Rule::positive_predicate_operator => "`&`".to_owned(), + Rule::negative_predicate_operator => "`!`".to_owned(), + Rule::sequence_operator => "`&`".to_owned(), + Rule::choice_operator => "`|`".to_owned(), + Rule::optional_operator => "`?`".to_owned(), + Rule::repeat_operator => "`*`".to_owned(), + Rule::repeat_once_operator => "`+`".to_owned(), + Rule::comma => "`,`".to_owned(), + Rule::closing_paren => "`)`".to_owned(), + Rule::quote => "`\"`".to_owned(), + Rule::insensitive_string => "`^`".to_owned(), + Rule::range_operator => "`..`".to_owned(), + Rule::single_quote => "`'`".to_owned(), + other_rule => format!("{:?}", other_rule), + } +} + fn consume_rules_with_spans( pairs: Pairs<'_, Rule>, ) -> Result>, Vec>> { diff --git a/meta/src/validator.rs b/meta/src/validator.rs index 4bb46ec5..04eeeb2f 100644 --- a/meta/src/validator.rs +++ b/meta/src/validator.rs @@ -70,11 +70,14 @@ static BUILTINS: Lazy> = Lazy::new(|| { .collect::>() }); -/// The parsed grammar for common mistakes: +/// It checks the parsed grammar for common mistakes: /// - using Rust keywords /// - using Pest keywords /// - duplicate rules /// - undefined rules +/// +/// It returns a `Result` with a `Vec` of `Error`s if any of the above is found. +/// If no errors are found, it returns the vector of names of used builtin rules. pub fn validate_pairs(pairs: Pairs<'_, Rule>) -> Result, Vec>> { let definitions: Vec<_> = pairs .clone() diff --git a/pest/Cargo.toml b/pest/Cargo.toml index 0210441b..957db39f 100644 --- a/pest/Cargo.toml +++ b/pest/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest" description = "The Elegant Parser" -version = "2.4.1" +version = "2.5.0" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" diff --git a/release.sh b/release.sh index 741517e8..d5088b22 100755 --- a/release.sh +++ b/release.sh @@ -2,7 +2,7 @@ set -e -CRATES="pest pest_meta pest_vm pest_generator pest_derive pest_grammars" +CRATES="pest pest_meta pest_vm pest_generator pest_derive pest_grammars pest_debugger" get_local_version() { cargo metadata --format-version 1 | jq -r '.packages[]|select(.name == "'"${1}"'" and ((.manifest_path|contains("registry"))|not))|.version' diff --git a/vm/Cargo.toml b/vm/Cargo.toml index 3912a32c..da2b9d26 100644 --- a/vm/Cargo.toml +++ b/vm/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "pest_vm" description = "pest grammar virtual machine" -version = "2.4.1" +version = "2.5.0" edition = "2021" authors = ["Dragoș Tiselice "] homepage = "https://pest.rs/" @@ -14,5 +14,5 @@ readme = "_README.md" rust-version = "1.56" [dependencies] -pest = { path = "../pest", version = "2.4.1" } -pest_meta = { path = "../meta", version = "2.4.1" } +pest = { path = "../pest", version = "2.5.0" } +pest_meta = { path = "../meta", version = "2.5.0" } diff --git a/vm/src/lib.rs b/vm/src/lib.rs index b6bfc27c..7c9018a5 100644 --- a/vm/src/lib.rs +++ b/vm/src/lib.rs @@ -18,25 +18,48 @@ use pest::error::Error; use pest::iterators::Pairs; -use pest::unicode; +use pest::{unicode, Position}; use pest::{Atomicity, MatchDir, ParseResult, ParserState}; use pest_meta::ast::RuleType; use pest_meta::optimizer::{OptimizedExpr, OptimizedRule}; use std::collections::HashMap; +use std::panic::{RefUnwindSafe, UnwindSafe}; mod macros; +/// A callback function that is called when a rule is matched. +/// The first argument is the name of the rule and the second is the span of the rule. +/// The function should return `true` if parsing should be terminated +/// (if the new parsing session was started) or `false` otherwise. +type ListenerFn = + Box) -> bool + Sync + Send + RefUnwindSafe + UnwindSafe>; + /// A virtual machine-like construct that runs an AST on-the-fly pub struct Vm { rules: HashMap, + listener: Option, } impl Vm { /// Creates a new `Vm` from optimized rules pub fn new(rules: Vec) -> Vm { let rules = rules.into_iter().map(|r| (r.name.clone(), r)).collect(); - Vm { rules } + Vm { + rules, + listener: None, + } + } + + /// Creates a new `Vm` from optimized rules + /// and a listener function that is called when a rule is matched. + /// (used by the `pest_debugger` crate) + pub fn new_with_listener(rules: Vec, listener: ListenerFn) -> Vm { + let rules = rules.into_iter().map(|r| (r.name.clone(), r)).collect(); + Vm { + rules, + listener: Some(listener), + } } /// Runs a parser rule on an input @@ -55,6 +78,11 @@ impl Vm { rule: &'a str, state: Box>, ) -> ParseResult>> { + if let Some(ref listener) = self.listener { + if listener(rule.to_owned(), state.position()) { + return Err(ParserState::new(state.position().line_of())); + } + } match rule { "ANY" => return state.skip(1), "EOI" => return state.rule("EOI", |state| state.end_of_input()),