From 1039d4517af1bd06a4d5b6b5ebafdcfe599eac18 Mon Sep 17 00:00:00 2001 From: jiyinyiyong Date: Sun, 7 Nov 2021 03:57:33 +0800 Subject: [PATCH] temporary error catching for escaping single quote and unicode; tag 0.1.14 --- Cargo.lock | 6 +++--- Cargo.toml | 4 ++-- benches/parsing.rs | 2 +- examples/demo.rs | 2 ++ examples/escape.rs | 6 ++++++ src/parser.rs | 26 ++++++++++++++++++++++++-- tests/lexer_test.rs | 20 ++++++++++++++++++++ 7 files changed, 58 insertions(+), 8 deletions(-) create mode 100644 examples/escape.rs diff --git a/Cargo.lock b/Cargo.lock index 1ff95df..051c692 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -60,7 +60,7 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "cirru_parser" -version = "0.1.13" +version = "0.1.14" dependencies = [ "criterion", "serde_json", @@ -442,9 +442,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.68" +version = "1.0.69" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f690853975602e1bfe1ccbf50504d67174e3bcf340f23b5ea9992e0587a52d8" +checksum = "e466864e431129c7e0d3476b92f20458e5879919a0596c6472738d9fa2d342f8" dependencies = [ "itoa", "ryu", diff --git a/Cargo.toml b/Cargo.toml index 9d092d3..f124324 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "cirru_parser" -version = "0.1.13" +version = "0.1.14" authors = ["jiyinyiyong "] edition = "2018" license = "MIT" @@ -21,7 +21,7 @@ exclude = [ [dependencies] [dev-dependencies] -serde_json = "1.0.68" +serde_json = "1.0.69" criterion = "0.3" diff --git a/benches/parsing.rs b/benches/parsing.rs index e27e242..728545f 100644 --- a/benches/parsing.rs +++ b/benches/parsing.rs @@ -1,4 +1,4 @@ -use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use criterion::{criterion_group, criterion_main, Criterion}; use std::fs; use cirru_parser::parse; diff --git a/examples/demo.rs b/examples/demo.rs index da23dab..2984d3e 100644 --- a/examples/demo.rs +++ b/examples/demo.rs @@ -18,6 +18,8 @@ defn fib (n) ); let large_demo = "/Users/chen/repo/calcit-lang/editor/compact.cirru"; + // let large_demo = "/Users/chen/repo/calcit-lang/respo-calcit-workflow/js-out/program-ir.cirru"; + // let large_demo = "/Users/chen/repo/calcit-lang/calcit_runner.rs/js-out/program-ir.cirru"; let content = fs::read_to_string(large_demo).unwrap(); match parse(&content) { diff --git a/examples/escape.rs b/examples/escape.rs new file mode 100644 index 0000000..21dbf21 --- /dev/null +++ b/examples/escape.rs @@ -0,0 +1,6 @@ +use cirru_parser::{lex, parse, Cirru}; + +pub fn main() { + println!("{:?}", parse("a \"b\\u{87DF}\"")); + println!("{:?}", parse("a \"b\\u{87DF}\" c d e f g f")); +} diff --git a/src/parser.rs b/src/parser.rs index 24281d9..3ff7f96 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -111,7 +111,7 @@ pub fn lex(initial_code: &str) -> Result { let mut buffer = String::from(""); let code = initial_code; - for c in code.chars() { + for (idx, c) in code.chars().enumerate() { match state { CirruLexState::Space => match c { ' ' => { @@ -196,6 +196,10 @@ pub fn lex(initial_code: &str) -> Result { state = CirruLexState::Str; buffer.push('"'); } + '\'' => { + state = CirruLexState::Str; + buffer.push('\''); + } 't' => { state = CirruLexState::Str; buffer.push('\t'); @@ -204,11 +208,29 @@ pub fn lex(initial_code: &str) -> Result { state = CirruLexState::Str; buffer.push('\n'); } + 'u' => { + // not supporting, but don't panic + let end = idx + 10; + let peek = if end >= code.len() { + &code[idx..] + } else { + &code[idx..end] + }; + println!("Unicode escaping is not supported yet: {:?} ...", peek); + buffer.push('\\'); + buffer.push('u'); + state = CirruLexState::Str; + } '\\' => { state = CirruLexState::Str; buffer.push('\\'); } - _ => return Err(String::from("unexpected character during string escaping")), + _ => { + return Err(format!( + "unexpected character during string escaping: {:?}", + c + )) + } }, CirruLexState::Indent => match c { ' ' => { diff --git a/tests/lexer_test.rs b/tests/lexer_test.rs index 9aa35ac..c58aa45 100644 --- a/tests/lexer_test.rs +++ b/tests/lexer_test.rs @@ -103,3 +103,23 @@ fn lex_strings() -> Result<(), String> { Ok(()) } + +#[test] +fn escape_chars() -> Result<(), String> { + assert_eq!( + lex(r#""\u{6c49}""#)?, + vec![ + CirruLexItem::Indent(0), + CirruLexItem::Str(String::from(r#"\u{6c49}"#)) + ] + ); + + assert_eq!( + lex(r#""\'""#)?, + vec![ + CirruLexItem::Indent(0), + CirruLexItem::Str(String::from(r#"'"#)) + ] + ); + Ok(()) +}