From 734ba66ad73229d54b10aa1197a9c0ad62014943 Mon Sep 17 00:00:00 2001 From: TheVeryDarkness <3266343194@qq.com> Date: Wed, 2 Aug 2023 08:23:34 +0800 Subject: [PATCH] A new stack implementation that should be faster and less space-consuming (#905) * Benchmark and test Stack * A new implementation of Stack. * Implement Default for Stack. To make clippy happy. * Remove redundant clone. --- pest/Cargo.toml | 9 ++- pest/benches/stack.rs | 88 +++++++++++++++++++++ pest/src/lib.rs | 1 + pest/src/stack.rs | 173 +++++++++++++++++++++++++++++++++--------- 4 files changed, 233 insertions(+), 38 deletions(-) create mode 100644 pest/benches/stack.rs diff --git a/pest/Cargo.toml b/pest/Cargo.toml index 34bec34d..aa3e131e 100644 --- a/pest/Cargo.toml +++ b/pest/Cargo.toml @@ -25,6 +25,13 @@ const_prec_climber = [] [dependencies] ucd-trie = { version = "0.1.5", default-features = false } serde = { version = "1.0.145", optional = true } -serde_json = { version = "1.0.85", optional = true} +serde_json = { version = "1.0.85", optional = true } thiserror = { version = "1.0.37", optional = true } memchr = { version = "2", optional = true } + +[dev-dependencies] +criterion = { version = "0.5.1", features = ["html_reports"] } + +[[bench]] +name = "stack" +harness = false diff --git a/pest/benches/stack.rs b/pest/benches/stack.rs new file mode 100644 index 00000000..59efae5e --- /dev/null +++ b/pest/benches/stack.rs @@ -0,0 +1,88 @@ +// pest. The Elegant Parser +// Copyright (c) 2018 DragoČ™ Tiselice +// +// Licensed under the Apache License, Version 2.0 +// or the MIT +// license , at your +// option. All files in the project carrying such notice may not be copied, +// modified, or distributed except according to those terms. + +use criterion::{criterion_group, criterion_main, Criterion}; +use pest::Stack; + +fn snapshot_push_restore(elements: impl Iterator + Clone) { + let mut stack = Stack::::new(); + for elem in elements { + stack.snapshot(); + stack.push(elem.clone()); + stack.restore(); + stack.push(elem); + } +} + +fn snapshot_push_clear_snapshot(elements: impl Iterator + Clone) { + let mut stack = Stack::::new(); + for elem in elements { + stack.snapshot(); + stack.push(elem); + stack.clear_snapshot(); + } +} + +fn snapshot_pop_restore(elements: impl Iterator) { + let mut stack = Stack::::new(); + for elem in elements { + stack.push(elem); + } + while !stack.is_empty() { + stack.snapshot(); + stack.pop(); + stack.restore(); + stack.pop(); + } +} + +fn snapshot_pop_clear(elements: impl Iterator) { + let mut stack = Stack::::new(); + for elem in elements { + stack.push(elem); + } + while !stack.is_empty() { + stack.snapshot(); + stack.pop(); + stack.clear_snapshot(); + } +} + +fn benchmark(b: &mut Criterion) { + use core::iter::repeat; + // use criterion::black_box; + let times = 10000usize; + let small = 0..times; + let medium = ("", 0usize, 1usize); + let medium = repeat(medium).take(times); + let large = [""; 64]; + let large = repeat(large).take(times); + macro_rules! test_series { + ($kind:ident) => { + b.bench_function(stringify!(push - restore - $kind), |b| { + b.iter(|| snapshot_push_restore($kind.clone())) + }) + .bench_function(stringify!(push - clear - $kind), |b| { + b.iter(|| snapshot_push_clear_snapshot($kind.clone())) + }) + .bench_function(stringify!(pop - restore - $kind), |b| { + b.iter(|| snapshot_pop_restore($kind.clone())) + }) + .bench_function(stringify!(pop - clear - $kind), |b| { + b.iter(|| snapshot_pop_clear($kind.clone())) + }) + }; + } + test_series!(small); + test_series!(medium); + test_series!(large); +} + +criterion_group!(benchmarks, benchmark); +criterion_main!(benchmarks); diff --git a/pest/src/lib.rs b/pest/src/lib.rs index 5dce7ee3..2ec2688c 100644 --- a/pest/src/lib.rs +++ b/pest/src/lib.rs @@ -340,6 +340,7 @@ pub use crate::parser_state::{ }; pub use crate::position::Position; pub use crate::span::{merge_spans, Lines, LinesSpan, Span}; +pub use crate::stack::Stack; pub use crate::token::Token; use core::fmt::Debug; use core::hash::Hash; diff --git a/pest/src/stack.rs b/pest/src/stack.rs index 05ac11f6..7badc5df 100644 --- a/pest/src/stack.rs +++ b/pest/src/stack.rs @@ -11,22 +11,48 @@ use alloc::vec; use alloc::vec::Vec; use core::ops::{Index, Range}; -/// Implementation of a `Stack` which maintains an log of `StackOp`s in order to rewind the stack -/// to a previous state. +/// Implementation of a `Stack` which maintains popped elements and length of previous states +/// in order to rewind the stack to a previous state. #[derive(Debug)] pub struct Stack { - ops: Vec>, + /// All elements in the stack. cache: Vec, - snapshots: Vec, + /// All elements that are in previous snapshots but may not be in the next state. + /// They will be pushed back to `cache` if the snapshot is restored, + /// otherwise be dropped if the snapshot is cleared. + /// + /// Those elements from a sequence of snapshots are stacked in one [`Vec`], and + /// `popped.len() == lengths.iter().map(|(len, remained)| len - remained).sum()` + popped: Vec, + /// Every element corresponds to a snapshot, and each element has two fields: + /// - Length of `cache` when corresponding snapshot is taken (AKA `len`). + /// - Count of elements that come from corresponding snapshot + /// and are still in next snapshot or current state (AKA `remained`). + /// + /// And `len` is never less than `remained`. + /// + /// On restoring, the `cache` can be divided into two parts: + /// - `0..remained` are untouched since the snapshot is taken. + /// + /// There's nothing to do with those elements. Just let them stay where they are. + /// + /// - `remained..cache.len()` are pushed after the snapshot is taken. + lengths: Vec<(usize, usize)>, +} + +impl Default for Stack { + fn default() -> Self { + Self::new() + } } impl Stack { /// Creates a new `Stack`. pub fn new() -> Self { Stack { - ops: vec![], cache: vec![], - snapshots: vec![], + popped: vec![], + lengths: vec![], } } @@ -43,15 +69,21 @@ impl Stack { /// Pushes a `T` onto the `Stack`. pub fn push(&mut self, elem: T) { - self.ops.push(StackOp::Push(elem.clone())); self.cache.push(elem); } /// Pops the top-most `T` from the `Stack`. pub fn pop(&mut self) -> Option { + let len = self.cache.len(); let popped = self.cache.pop(); - if let Some(ref val) = popped { - self.ops.push(StackOp::Pop(val.clone())); + if let Some(popped) = &popped { + if let Some((_, remained_count)) = self.lengths.last_mut() { + // `len >= *unpopped_count` + if len == *remained_count { + *remained_count -= 1; + self.popped.push(popped.clone()); + } + } } popped } @@ -63,40 +95,40 @@ impl Stack { /// Takes a snapshot of the current `Stack`. pub fn snapshot(&mut self) { - self.snapshots.push(self.ops.len()); + self.lengths.push((self.cache.len(), self.cache.len())) } /// The parsing after the last snapshot was successful so clearing it. pub fn clear_snapshot(&mut self) { - self.snapshots.pop(); + if let Some((len, unpopped)) = self.lengths.pop() { + // Popped elements from previous state are no longer needed. + self.popped.truncate(self.popped.len() - (len - unpopped)); + } } /// Rewinds the `Stack` to the most recent `snapshot()`. If no `snapshot()` has been taken, this /// function return the stack to its initial state. pub fn restore(&mut self) { - match self.snapshots.pop() { - Some(ops_index) => { - self.rewind_to(ops_index); - self.ops.truncate(ops_index); + match self.lengths.pop() { + Some((len_stack, remained)) => { + if remained < self.cache.len() { + // Remove those elements that are pushed after the snapshot. + self.cache.truncate(remained); + } + if len_stack > remained { + let rewind_count = len_stack - remained; + let new_len = self.popped.len() - rewind_count; + let recovered_elements = self.popped.drain(new_len..); + self.cache.extend(recovered_elements.rev()); + debug_assert_eq!(self.popped.len(), new_len); + } } None => { self.cache.clear(); - self.ops.clear(); - } - } - } - - // Rewind the stack to a particular index - fn rewind_to(&mut self, index: usize) { - let ops_to_rewind = &self.ops[index..]; - for op in ops_to_rewind.iter().rev() { - match *op { - StackOp::Push(_) => { - self.cache.pop(); - } - StackOp::Pop(ref elem) => { - self.cache.push(elem.clone()); - } + // As `self.popped` and `self.lengths` should already be empty, + // there is no need to clear it. + debug_assert!(self.popped.is_empty()); + debug_assert!(self.lengths.is_empty()); } } } @@ -110,12 +142,6 @@ impl Index> for Stack { } } -#[derive(Debug)] -enum StackOp { - Push(T), - Pop(T), -} - #[cfg(test)] mod test { use super::Stack; @@ -146,6 +172,79 @@ mod test { assert_eq!(stack[0..stack.len()], [0]); } + #[test] + fn restore_without_snapshot() { + let mut stack = Stack::new(); + + stack.push(0); + stack.restore(); + + assert_eq!(stack[0..stack.len()], [0; 0]); + } + + #[test] + fn snapshot_pop_restore() { + let mut stack = Stack::new(); + + stack.push(0); + stack.snapshot(); + stack.pop(); + stack.restore(); + + assert_eq!(stack[0..stack.len()], [0]); + } + + #[test] + fn snapshot_pop_push_restore() { + let mut stack = Stack::new(); + + stack.push(0); + stack.snapshot(); + stack.pop(); + stack.push(1); + stack.restore(); + + assert_eq!(stack[0..stack.len()], [0]); + } + + #[test] + fn snapshot_push_pop_restore() { + let mut stack = Stack::new(); + + stack.push(0); + stack.snapshot(); + stack.push(1); + stack.push(2); + stack.pop(); + stack.restore(); + + assert_eq!(stack[0..stack.len()], [0]); + } + + #[test] + fn snapshot_push_clear() { + let mut stack = Stack::new(); + + stack.push(0); + stack.snapshot(); + stack.push(1); + stack.clear_snapshot(); + + assert_eq!(stack[0..stack.len()], [0, 1]); + } + + #[test] + fn snapshot_pop_clear() { + let mut stack = Stack::new(); + + stack.push(0); + stack.push(1); + stack.snapshot(); + stack.pop(); + stack.clear_snapshot(); + + assert_eq!(stack[0..stack.len()], [0]); + } #[test] fn stack_ops() {