From 2de55376112950a0969c8b64d2e6dbd042e6a1da Mon Sep 17 00:00:00 2001 From: Igor Dejanovic Date: Thu, 4 Jan 2024 13:07:41 +0100 Subject: [PATCH] fix: regex recognizer anchoring --- .../calculator/calculator1/src/calculator.rs | 4 +-- .../calculator/calculator2/src/calculator.rs | 2 +- .../calculator/calculator3/src/calculator.rs | 2 +- .../calculator/calculator4/src/calculator.rs | 2 +- .../calculator/calculator5/src/calculator.rs | 2 +- rustemo-compiler/src/generator/base.rs | 2 +- rustemo-compiler/src/lang/rustemo.rs | 25 +++++++++++-------- tests/src/output_dir/output_dir.rs | 2 +- 8 files changed, 22 insertions(+), 19 deletions(-) diff --git a/docs/src/tutorials/calculator/calculator1/src/calculator.rs b/docs/src/tutorials/calculator/calculator1/src/calculator.rs index 5acaf55a..239cd6a4 100644 --- a/docs/src/tutorials/calculator/calculator1/src/calculator.rs +++ b/docs/src/tutorials/calculator/calculator1/src/calculator.rs @@ -295,13 +295,13 @@ pub(crate) static RECOGNIZERS: [TokenRecognizer; TERMINAL_COUNT] = [ TokenRecognizer( TokenKind::Operand, Recognizer::RegexMatch( - Lazy::new(|| { Regex::new(concat!("^", "\\d+(\\.\\d+)?")).unwrap() }), + Lazy::new(|| { Regex::new(concat!("^(", "\\d+(\\.\\d+)?", ")")).unwrap() }), ), ), TokenRecognizer( TokenKind::Operator, Recognizer::RegexMatch( - Lazy::new(|| { Regex::new(concat!("^", "\\+|-|\\*|/")).unwrap() }), + Lazy::new(|| { Regex::new(concat!("^(", "\\+|-|\\*|/", ")")).unwrap() }), ), ), ]; diff --git a/docs/src/tutorials/calculator/calculator2/src/calculator.rs b/docs/src/tutorials/calculator/calculator2/src/calculator.rs index b4ccff9a..714b59f7 100644 --- a/docs/src/tutorials/calculator/calculator2/src/calculator.rs +++ b/docs/src/tutorials/calculator/calculator2/src/calculator.rs @@ -421,7 +421,7 @@ pub(crate) static RECOGNIZERS: [TokenRecognizer; TERMINAL_COUNT] = [ TokenRecognizer( TokenKind::Number, Recognizer::RegexMatch( - Lazy::new(|| { Regex::new(concat!("^", "\\d+(\\.\\d+)?")).unwrap() }), + Lazy::new(|| { Regex::new(concat!("^(", "\\d+(\\.\\d+)?", ")")).unwrap() }), ), ), TokenRecognizer(TokenKind::Plus, Recognizer::StrMatch("+")), diff --git a/docs/src/tutorials/calculator/calculator3/src/calculator.rs b/docs/src/tutorials/calculator/calculator3/src/calculator.rs index 564bbb6c..4e9cfaea 100644 --- a/docs/src/tutorials/calculator/calculator3/src/calculator.rs +++ b/docs/src/tutorials/calculator/calculator3/src/calculator.rs @@ -421,7 +421,7 @@ pub(crate) static RECOGNIZERS: [TokenRecognizer; TERMINAL_COUNT] = [ TokenRecognizer( TokenKind::Number, Recognizer::RegexMatch( - Lazy::new(|| { Regex::new(concat!("^", "\\d+(\\.\\d+)?")).unwrap() }), + Lazy::new(|| { Regex::new(concat!("^(", "\\d+(\\.\\d+)?", ")")).unwrap() }), ), ), TokenRecognizer(TokenKind::Plus, Recognizer::StrMatch("+")), diff --git a/docs/src/tutorials/calculator/calculator4/src/calculator.rs b/docs/src/tutorials/calculator/calculator4/src/calculator.rs index 564bbb6c..4e9cfaea 100644 --- a/docs/src/tutorials/calculator/calculator4/src/calculator.rs +++ b/docs/src/tutorials/calculator/calculator4/src/calculator.rs @@ -421,7 +421,7 @@ pub(crate) static RECOGNIZERS: [TokenRecognizer; TERMINAL_COUNT] = [ TokenRecognizer( TokenKind::Number, Recognizer::RegexMatch( - Lazy::new(|| { Regex::new(concat!("^", "\\d+(\\.\\d+)?")).unwrap() }), + Lazy::new(|| { Regex::new(concat!("^(", "\\d+(\\.\\d+)?", ")")).unwrap() }), ), ), TokenRecognizer(TokenKind::Plus, Recognizer::StrMatch("+")), diff --git a/docs/src/tutorials/calculator/calculator5/src/calculator.rs b/docs/src/tutorials/calculator/calculator5/src/calculator.rs index 564bbb6c..4e9cfaea 100644 --- a/docs/src/tutorials/calculator/calculator5/src/calculator.rs +++ b/docs/src/tutorials/calculator/calculator5/src/calculator.rs @@ -421,7 +421,7 @@ pub(crate) static RECOGNIZERS: [TokenRecognizer; TERMINAL_COUNT] = [ TokenRecognizer( TokenKind::Number, Recognizer::RegexMatch( - Lazy::new(|| { Regex::new(concat!("^", "\\d+(\\.\\d+)?")).unwrap() }), + Lazy::new(|| { Regex::new(concat!("^(", "\\d+(\\.\\d+)?", ")")).unwrap() }), ), ), TokenRecognizer(TokenKind::Plus, Recognizer::StrMatch("+")), diff --git a/rustemo-compiler/src/generator/base.rs b/rustemo-compiler/src/generator/base.rs index dbb124e5..067b5ec8 100644 --- a/rustemo-compiler/src/generator/base.rs +++ b/rustemo-compiler/src/generator/base.rs @@ -618,7 +618,7 @@ impl<'g, 's> PartGenerator<'g, 's> for BasePartGenerator { let r = r.as_ref(); parse_quote! { TokenRecognizer(TokenKind::#token_kind, Recognizer::RegexMatch(Lazy::new(|| { - Regex::new(concat!("^", #r)).unwrap() + Regex::new(concat!("^(", #r, ")")).unwrap() }))) } }, diff --git a/rustemo-compiler/src/lang/rustemo.rs b/rustemo-compiler/src/lang/rustemo.rs index fde999bd..ca9b22e8 100644 --- a/rustemo-compiler/src/lang/rustemo.rs +++ b/rustemo-compiler/src/lang/rustemo.rs @@ -16900,27 +16900,29 @@ pub(crate) static RECOGNIZERS: [TokenRecognizer; TERMINAL_COUNT] = [ TokenKind::Name, Recognizer::RegexMatch( Lazy::new(|| { - Regex::new(concat!("^", "[a-zA-Z_][a-zA-Z0-9_\\.]*")).unwrap() + Regex::new(concat!("^(", "[a-zA-Z_][a-zA-Z0-9_\\.]*", ")")).unwrap() }), ), ), TokenRecognizer( TokenKind::RegexTerm, Recognizer::RegexMatch( - Lazy::new(|| { Regex::new(concat!("^", "/(\\\\.|[^/\\\\])*/")).unwrap() }), + Lazy::new(|| { + Regex::new(concat!("^(", "/(\\\\.|[^/\\\\])*/", ")")).unwrap() + }), ), ), TokenRecognizer( TokenKind::IntConst, Recognizer::RegexMatch( - Lazy::new(|| { Regex::new(concat!("^", "\\d+")).unwrap() }), + Lazy::new(|| { Regex::new(concat!("^(", "\\d+", ")")).unwrap() }), ), ), TokenRecognizer( TokenKind::FloatConst, Recognizer::RegexMatch( Lazy::new(|| { - Regex::new(concat!("^", "[+-]?[0-9]+[.][0-9]*([e][+-]?[0-9]+)?")) + Regex::new(concat!("^(", "[+-]?[0-9]+[.][0-9]*([e][+-]?[0-9]+)?", ")")) .unwrap() }), ), @@ -16928,7 +16930,7 @@ pub(crate) static RECOGNIZERS: [TokenRecognizer; TERMINAL_COUNT] = [ TokenRecognizer( TokenKind::BoolConst, Recognizer::RegexMatch( - Lazy::new(|| { Regex::new(concat!("^", "true|false")).unwrap() }), + Lazy::new(|| { Regex::new(concat!("^(", "true|false", ")")).unwrap() }), ), ), TokenRecognizer( @@ -16937,8 +16939,9 @@ pub(crate) static RECOGNIZERS: [TokenRecognizer; TERMINAL_COUNT] = [ Lazy::new(|| { Regex::new( concat!( - "^", - "(?s)(^'[^'\\\\]*(?:\\\\.[^'\\\\]*)*')|(^\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\")" + "^(", + "(?s)(^'[^'\\\\]*(?:\\\\.[^'\\\\]*)*')|(^\"[^\"\\\\]*(?:\\\\.[^\"\\\\]*)*\")", + ")" ), ) .unwrap() @@ -16948,26 +16951,26 @@ pub(crate) static RECOGNIZERS: [TokenRecognizer; TERMINAL_COUNT] = [ TokenRecognizer( TokenKind::Annotation, Recognizer::RegexMatch( - Lazy::new(|| { Regex::new(concat!("^", "@[a-zA-Z0-9_]+")).unwrap() }), + Lazy::new(|| { Regex::new(concat!("^(", "@[a-zA-Z0-9_]+", ")")).unwrap() }), ), ), TokenRecognizer( TokenKind::WS, Recognizer::RegexMatch( - Lazy::new(|| { Regex::new(concat!("^", "\\s+")).unwrap() }), + Lazy::new(|| { Regex::new(concat!("^(", "\\s+", ")")).unwrap() }), ), ), TokenRecognizer( TokenKind::CommentLine, Recognizer::RegexMatch( - Lazy::new(|| { Regex::new(concat!("^", "//.*")).unwrap() }), + Lazy::new(|| { Regex::new(concat!("^(", "//.*", ")")).unwrap() }), ), ), TokenRecognizer( TokenKind::NotComment, Recognizer::RegexMatch( Lazy::new(|| { - Regex::new(concat!("^", "((\\*[^/])|[^\\s*/]|/[^\\*])+")).unwrap() + Regex::new(concat!("^(", "((\\*[^/])|[^\\s*/]|/[^\\*])+", ")")).unwrap() }), ), ), diff --git a/tests/src/output_dir/output_dir.rs b/tests/src/output_dir/output_dir.rs index d42a61e0..0569d1c5 100644 --- a/tests/src/output_dir/output_dir.rs +++ b/tests/src/output_dir/output_dir.rs @@ -319,7 +319,7 @@ pub(crate) static RECOGNIZERS: [TokenRecognizer; TERMINAL_COUNT] = [ TokenRecognizer( TokenKind::Num, Recognizer::RegexMatch( - Lazy::new(|| { Regex::new(concat!("^", "\\d+")).unwrap() }), + Lazy::new(|| { Regex::new(concat!("^(", "\\d+", ")")).unwrap() }), ), ), ];