From f9789b1cdf030fb221c7875e10de8a36dffb9955 Mon Sep 17 00:00:00 2001 From: nhaajt <23617618+nhaajt@users.noreply.github.com> Date: Sun, 18 Feb 2024 01:07:28 +0100 Subject: [PATCH] fix(group-name): Fix valid group name (#300) + Separate group name rule to allow JS group name with underscores + Add tests to check for JS valid group name with underscores --- .../weaponregex/internal/parser/Parser.scala | 4 +- .../internal/parser/ParserJS.scala | 9 ++++ .../internal/parser/ParserJVM.scala | 9 ++++ .../internal/parser/ParserJSTest.scala | 42 +++++++++++++++++++ 4 files changed, 62 insertions(+), 2 deletions(-) diff --git a/core/src/main/scala/weaponregex/internal/parser/Parser.scala b/core/src/main/scala/weaponregex/internal/parser/Parser.scala index 2f357aa..f46beaf 100644 --- a/core/src/main/scala/weaponregex/internal/parser/Parser.scala +++ b/core/src/main/scala/weaponregex/internal/parser/Parser.scala @@ -441,13 +441,13 @@ abstract private[weaponregex] class Parser(val pattern: String) { def group[A: P]: P[Group] = Indexed("(" ~ RE ~ ")") .map { case (loc, expr) => Group(expr, isCapturing = true, loc) } - /** Parse a group name that starts with a letter and followed by zero or more alphanumeric characters + /** Parse a group name * @return * the parsed name string * @example * `"name1"` */ - def groupName[A: P]: P[String] = P(CharIn("a-z", "A-Z") ~ CharIn("a-z", "A-Z", "0-9").rep).! + def groupName[A: P]: P[String] /** Parse a named-capturing group * @return diff --git a/core/src/main/scala/weaponregex/internal/parser/ParserJS.scala b/core/src/main/scala/weaponregex/internal/parser/ParserJS.scala index e34c988..04e4d16 100644 --- a/core/src/main/scala/weaponregex/internal/parser/ParserJS.scala +++ b/core/src/main/scala/weaponregex/internal/parser/ParserJS.scala @@ -73,6 +73,15 @@ private[weaponregex] class ParserJS private[parser] (pattern: String, val flags: else P(preDefinedCharClass | metaCharacter | range | quoteChar | charClassCharLiteral) + /** Parse a group name + * @return + * the parsed name string + * @example + * `"name1"` + */ + override def groupName[A: P]: P[String] = + P(CharIn("a-z", "A-Z", "_") ~ CharIn("a-z", "A-Z", "0-9", "_").rep).! + /** Parse a quoted character (any character). If [[weaponregex.internal.parser.ParserJS unicodeMode]] is true, only * the following characters are allowed: `^ $ \ . * + ? ( ) [ ] { } |` or `/` * @return diff --git a/core/src/main/scala/weaponregex/internal/parser/ParserJVM.scala b/core/src/main/scala/weaponregex/internal/parser/ParserJVM.scala index ef73368..f986cf3 100644 --- a/core/src/main/scala/weaponregex/internal/parser/ParserJVM.scala +++ b/core/src/main/scala/weaponregex/internal/parser/ParserJVM.scala @@ -97,6 +97,15 @@ private[weaponregex] class ParserJVM private[parser] (pattern: String) extends P ) .map { case (loc, (hat, nodes)) => CharacterClass(nodes, loc, isPositive = hat.isEmpty) } + /** Parse a group name + * @return + * the parsed name string + * @example + * `"name1"` + */ + override def groupName[A: P]: P[String] = + P(CharIn("a-z", "A-Z") ~ CharIn("a-z", "A-Z", "0-9").rep).! + /** Intermediate parsing rule for special construct tokens which can parse either `namedGroup`, `nonCapturingGroup`, * `flagToggleGroup`, `flagNCGroup`, `lookaround` or `atomicGroup` * @return diff --git a/core/src/test/scala/weaponregex/internal/parser/ParserJSTest.scala b/core/src/test/scala/weaponregex/internal/parser/ParserJSTest.scala index e8b5e74..3f208f8 100644 --- a/core/src/test/scala/weaponregex/internal/parser/ParserJSTest.scala +++ b/core/src/test/scala/weaponregex/internal/parser/ParserJSTest.scala @@ -293,6 +293,48 @@ class ParserJSTest extends munit.FunSuite with ParserTest { treeBuildTest(parsedTree, pattern) } + test("Parse named capturing group with underscores in name") { + val pattern = "(?hello)(?world)" + val parsedTree = Parser(pattern, parserFlavor).getOrFail.to[Concat] + + assert(clue(parsedTree.children.head) match { + case NamedGroup(_: Concat, name, _) => name == "group_Name_1" + case _ => false + }) + assert(clue(parsedTree.children.last) match { + case NamedGroup(_: Concat, name, _) => name == "Group_Name_2" + case _ => false + }) + + treeBuildTest(parsedTree, pattern) + } + + test("Parse nested named capturing group with underscores in name") { + val pattern = "(?hello(?world))" + val parsedTree = Parser(pattern, parserFlavor).getOrFail + + assert(clue(parsedTree) match { + case NamedGroup(Concat(nodes, _), "group_Name_1", _) => + assert(clue(nodes.last) match { + case NamedGroup(_: Concat, "Group_Name_2", _) => true + case _ => false + }) + true + case _ => false + }) + + treeBuildTest(parsedTree, pattern) + } + + test("Parse named reference with underscores in name") { + val pattern = """\k""" + val parsedTree = Parser(pattern, parserFlavor).getOrFail.to[NameReference] + + assertEquals(parsedTree.name, "name_1") + + treeBuildTest(parsedTree, pattern) + } + test("Unparsable: flag toggle group i-i") { val pattern = "(?idmsuxU-idmsuxU)" parseErrorTest(pattern)