LykenSol
Showing with 103 additions and 75 deletions.

+1 −1 Cargo.toml

+6 −3 src/parser.rs

+78 −64 src/slow_bruteforce_interpreter.rs

+17 −6 tests/basic.rs

+1 −1 tests/json.rs
diff --git a/Cargo.toml b/Cargo.toml
@@ -13,7 +13,7 @@ readme = "README.md"
 description = "Grammar framework."
 
 [dependencies]
-cyclotron = "0.0.2"
+cyclotron = "0.0.3"
 elsa = "1.3.2"
 indexmap = "1"
 indexing = "0.3.2"

diff --git a/src/parser.rs b/src/parser.rs
@@ -26,7 +26,7 @@ pub struct ParseError<A, Pat> {
 
 pub type ParseResult<A, Pat, T> = Result<T, ParseError<A, Pat>>;
 
-impl<'i, P, G, I: Input, Pat> Parser<'_, 'i, G, I, Pat>
+impl<'i, P, G, I: Input, Pat: Ord> Parser<'_, 'i, G, I, Pat>
 where
     // FIXME(eddyb) these shouldn't be needed, as they are bounds on
     // `GrammarReflector::NodeKind`, but that's ignored currently.
@@ -57,10 +57,13 @@ where
                 remaining: range,
             });
 
-            let error = ParseError {
+            let mut error = ParseError {
                 at: I::source_info_point(&state.forest.input, state.last_input_pos),
                 expected: state.expected_pats,
             };
+            error.expected.sort();
+            error.expected.dedup();
+
             match result {
                 None => Err(error),
                 Some(node) => {
@@ -128,7 +131,7 @@ where
         match self.state.forest.input(self.remaining).match_left(&pat) {
             Some(n) => {
                 let (matching, after, _) = self.remaining.split_at(n);
-                if n > 0 {
+                if after.first() > self.state.last_input_pos {
                     self.state.last_input_pos = after.first();
                     self.state.expected_pats.clear();
                 }

diff --git a/src/slow_bruteforce_interpreter.rs b/src/slow_bruteforce_interpreter.rs
@@ -37,84 +37,98 @@ impl<Pat: Eq + Hash + fmt::Debug> GrammarReflector for SlowBruteforceInterpreter
     }
 }
 
-pub fn parse<'a, Pat: Clone + Eq + Hash + fmt::Debug, I: Input>(
+pub fn parse<'a, Pat: Clone + Ord + Hash + fmt::Debug, I: Input>(
     cx: &'a Context<Pat>,
     grammar: &'a crate::Grammar,
-    rule: IStr,
+    named_rule: IStr,
     input: I,
 ) -> ParseResult<I::SourceInfoPoint, Pat, OwnedHandle<'a, Pat, I>>
 where
     I::Slice: InputMatch<Pat>,
 {
-    let rule = cx.intern(Rule::Call(rule));
-    Parser::parse_with(SlowBruteforceInterpreter { cx, grammar }, input, |parser| {
-        let full_input = parser.remaining();
-        let parser = &RefCell::new(parser);
-        // TODO(eddyb) compare this approach to memoizing *only* at the named rule level,
-        // and using a direct traversal within each rule (as it can't cycle).
-        let results = bruteforce::memoize(
-            |parse, (rule, range): (IRule, Range<'_>)| -> BTreeSet<usize> {
-                match cx[rule] {
-                    Rule::Empty => iter::once(0).collect(),
-                    // FIXME(eddyb) find a way to avoid cloning the pattern.
-                    Rule::Eat(ref pat) => parser
-                        .borrow_mut()
-                        .with_result_and_remaining(Range(range.frontiers().0), range)
-                        .input_consume_left(pat.clone())
-                        .map(|parser| parser.result().len())
-                        .into_iter()
-                        .collect(),
-                    Rule::Call(r) => parse((grammar.rules[&r].rule, range)),
-                    Rule::Concat([left, right]) => parse((left, range))
-                        .into_iter()
-                        .flat_map(|left_len| {
-                            let (left_result, after_left, _) = range.split_at(left_len);
-                            parse((right, Range(after_left)))
-                                .into_iter()
-                                .map(move |right_len| {
-                                    let (right_result, after_right, _) =
-                                        after_left.split_at(right_len);
-                                    parser
-                                        .borrow_mut()
-                                        .with_result_and_remaining(
-                                            Range(right_result),
-                                            Range(after_right),
-                                        )
-                                        .forest_add_split(
-                                            rule,
-                                            Node {
-                                                kind: left,
-                                                range: Range(left_result),
-                                            },
-                                        );
-                                    left_len + right_len
-                                })
-                        })
-                        .collect(),
-                    Rule::Or(ref cases) => cases
-                        .iter()
-                        .flat_map(|&case| {
-                            parse((case, range)).into_iter().map(move |len| {
-                                let (result, remaining, _) = range.split_at(len);
+    fn parse_inner<'i, Pat: Clone + Ord + Hash + fmt::Debug, I: Input>(
+        cx: &Context<Pat>,
+        grammar: &crate::Grammar,
+        parser: &RefCell<Parser<'_, 'i, SlowBruteforceInterpreter<'_, Pat>, I, Pat>>,
+        parse_cached: &mut dyn FnMut((IRule, Range<'i>)) -> BTreeSet<usize>,
+        rule: IRule,
+        range: Range<'i>,
+    ) -> BTreeSet<usize>
+    where
+        I::Slice: InputMatch<Pat>,
+    {
+        match cx[rule] {
+            Rule::Empty => iter::once(0).collect(),
+            // FIXME(eddyb) find a way to avoid cloning the pattern.
+            Rule::Eat(ref pat) => parser
+                .borrow_mut()
+                .with_result_and_remaining(Range(range.frontiers().0), range)
+                .input_consume_left(pat.clone())
+                .map(|parser| parser.result().len())
+                .into_iter()
+                .collect(),
+            Rule::Call(r) => parse_cached((grammar.rules[&r].rule, range)),
+            Rule::Concat([left, right]) => {
+                parse_inner(cx, grammar, parser, parse_cached, left, range)
+                    .into_iter()
+                    .flat_map(|left_len| {
+                        let (left_result, after_left, _) = range.split_at(left_len);
+                        parse_inner(cx, grammar, parser, parse_cached, right, Range(after_left))
+                            .into_iter()
+                            .map(move |right_len| {
+                                let (right_result, after_right, _) = after_left.split_at(right_len);
                                 parser
                                     .borrow_mut()
-                                    .with_result_and_remaining(Range(result), Range(remaining))
-                                    .forest_add_choice(rule, case);
-                                len
+                                    .with_result_and_remaining(
+                                        Range(right_result),
+                                        Range(after_right),
+                                    )
+                                    .forest_add_split(
+                                        rule,
+                                        Node {
+                                            kind: left,
+                                            range: Range(left_result),
+                                        },
+                                    );
+                                left_len + right_len
                             })
+                    })
+                    .collect()
+            }
+            Rule::Or(ref cases) => cases
+                .iter()
+                .flat_map(|&case| {
+                    parse_inner(cx, grammar, parser, parse_cached, case, range)
+                        .into_iter()
+                        .map(move |len| {
+                            let (result, remaining, _) = range.split_at(len);
+                            parser
+                                .borrow_mut()
+                                .with_result_and_remaining(Range(result), Range(remaining))
+                                .forest_add_choice(rule, case);
+                            len
                         })
-                        .collect(),
-                    Rule::Opt(rule) => iter::once(0).chain(parse((rule, range))).collect(),
-                    Rule::RepeatMany(..) | Rule::RepeatMore(..) => {
-                        parse((rule.expand_repeats(cx), range))
-                    }
-                }
-            },
-        )((rule, full_input));
+                })
+                .collect(),
+            Rule::Opt(rule) => iter::once(0)
+                .chain(parse_inner(cx, grammar, parser, parse_cached, rule, range))
+                .collect(),
+            Rule::RepeatMany(..) | Rule::RepeatMore(..) => {
+                parse_cached((rule.expand_repeats(cx), range))
+            }
+        }
+    }
+
+    Parser::parse_with(SlowBruteforceInterpreter { cx, grammar }, input, |parser| {
+        let full_input = parser.remaining();
+        let parser = &RefCell::new(parser);
+        let results = bruteforce::memoize(|parse_cached, (rule, range)| {
+            parse_inner(cx, grammar, parser, parse_cached, rule, range)
+        })((grammar.rules[&named_rule].rule, full_input));
         results
             .into_iter()
             .map(|len| Node {
-                kind: rule,
+                kind: cx.intern(Rule::Call(named_rule)),
                 range: Range(full_input.split_at(len).0),
             })
             .rev()

diff --git a/tests/basic.rs b/tests/basic.rs
@@ -80,7 +80,7 @@ testcases![
     }}),
 }})",
 // FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list.
-    S("aax") => r#"1:3: error: expected ["a", "c", "b", "d"]"#;
+    S("aax") => r#"1:3: error: expected ["a", "b", "c", "d"]"#;
 
     gll10_g0_opaque {
         S = { a:A s:S "d" } |
@@ -102,7 +102,9 @@ testcases![
         s: 1:3-1:3 => S(_),
     }),
     b: 1:1-1:2 => B(_),
-})";
+})",
+// FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list.
+    S("aax") => r#"1:3: error: expected ["a", "b", "c", "d"]"#;
 
     gll13_g1 {
         S = X:{ a:"a" s:S b:"b" } |
@@ -118,7 +120,9 @@ testcases![
     a: 1:1-1:2,
     d: 1:2-1:3,
     b: 1:3-1:4,
-}})";
+}})",
+// FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list.
+    S("aax") => r#"1:3: error: expected ["a", "d"]"#;
 
     gll15_g0 {
         A = X:{ a:"a" x:A b:"b" } |
@@ -130,7 +134,9 @@ testcases![
     a: 1:1-1:2,
     x: 1:2-1:3 => A({Z:1:2-1:3}),
     c: 1:3-1:4,
-}})";
+}})",
+// FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list.
+    A("aax") => r#"1:3: error: expected ["a", "b", "c"]"#;
 
     gll15_g0_nested {
         A = X:{ a:"a" { x:A b:"b" } } |
@@ -142,7 +148,9 @@ testcases![
     a: 1:1-1:2,
     x: 1:2-1:3 => A({Z:1:2-1:3}),
     b: 1:3-1:4,
-}})";
+}})",
+// FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list.
+    A("aax") => r#"1:3: error: expected ["a", "b", "c"]"#;
 
     repeat_many_trailing {
         A = elems:"a"* %% "b";
@@ -157,6 +165,7 @@ testcases![
     1:1-1:2,
     1:3-1:4,
 ]})",
+// FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list.
     A("b") => r#"1:1: error: expected ["a"]"#;
 
     nested_or {
@@ -166,5 +175,7 @@ testcases![
 1:1-1:3 => A({
     x: 1:1-1:2,
     a: 1:2-1:3,
-})";
+})",
+// FIXME(eddyb) get replace quotes with backticks and pretify the `expected` list.
+    A("xy") => r#"1:2: error: expected ["a", "b"]"#;
 ];
diff --git a/tests/json.rs b/tests/json.rs
@@ -120,7 +120,7 @@ fn json_like_error() {
         stray_identifier
     };
 
-    let expected = r#"?: error: expected ["null", "false", "true", LITERAL, "[", "{", "("]"#;
+    let expected = r#"?: error: expected ["(", "[", "{", "false", "null", "true", LITERAL]"#;
 
     json_like_testcase(input, expected);
 }