diff --git a/parser.d.ts b/parser.d.ts index 51b526f..4edef64 100644 --- a/parser.d.ts +++ b/parser.d.ts @@ -5,6 +5,7 @@ export type Features = { namedGroups?: boolean; unicodePropertyEscape?: boolean; unicodeSet?: boolean; + modifiers?: boolean; }; export type AstNodeType = @@ -98,15 +99,35 @@ export type CharacterClass = Base<"characterClass"> & { kind: "union" | _If; }; -export type NonCapturingGroup = Base<"group"> & { - behavior: - | "ignore" - | "lookahead" - | "lookbehind" - | "negativeLookahead" - | "negativeLookbehind"; - body: RootNode[]; -}; +export type ModifierFlags = { + enabling: string, + disabling: string +} + +export type NonCapturingGroup = Base<"group"> & + ( + | { + behavior: + | "lookahead" + | "lookbehind" + | "negativeLookahead" + | "negativeLookbehind"; + body: RootNode[]; + } + | ({ + behavior: "ignore"; + body: RootNode[]; + } & _If< + F["modifiers"], + { + modifierFlags?: ModifierFlags; + }, + { + modifierFlags: undefined; + } + >) + ); + export type CapturingGroup = Base<"group"> & { behavior: "normal"; diff --git a/parser.js b/parser.js index fd40a56..1cea991 100644 --- a/parser.js +++ b/parser.js @@ -207,6 +207,16 @@ // ClassHalfOfDouble :: // one of & - ! # % , : ; < = > @ _ ` ~ // +// -------------------------------------------------------------- +// NOTE: The following productions refer to the +// "Regular Expression Pattern Modifiers for ECMAScript" proposal. +// https://github.com/tc39/proposal-regexp-modifiers +// -------------------------------------------------------------- +// +// Atom :: +// ( ? RegularExpressionFlags : Disjunction ) +// ( ? RegularExpressionFlags - RegularExpressionFlags : Disjunction ) +// "use strict"; (function() { @@ -718,7 +728,8 @@ // \ AtomEscape // CharacterClass // ( GroupSpecifier Disjunction ) - // ( ? : Disjunction ) + // ( ? RegularExpressionFlags : Disjunction ) + // ( ? RegularExpressionFlags - RegularExpressionFlags : Disjunction ) // ExtendedAtom :: // ExtendedPatternCharacter // ExtendedPatternCharacter :: @@ -766,6 +777,9 @@ group.name = name; return group; } + else if (features.modifiers && str.indexOf("(?") == pos && str[pos+2] != ":") { + return parseModifiersGroup(); + } else { // ( Disjunction ) // ( ? : Disjunction ) @@ -773,6 +787,50 @@ } } + function parseModifiersGroup() { + function hasDupChar(str) { + var i = 0; + while (i < str.length) { + if (str.indexOf(str[i], i + 1) != -1) { + return true; + } + i++; + } + return false; + } + + var from = pos; + incr(2); + + var enablingFlags = matchReg(/^[sim]+/); + var disablingFlags; + if(match("-")){ + disablingFlags = matchReg(/^[sim]+/); + if (!disablingFlags) { + bail('Invalid flags for modifiers group'); + } + } else if(!enablingFlags){ + bail('Invalid flags for modifiers group'); + } + + enablingFlags = enablingFlags ? enablingFlags[0] : ""; + disablingFlags = disablingFlags ? disablingFlags[0] : ""; + + var flags = enablingFlags + disablingFlags; + if(flags.length > 3 || hasDupChar(flags)) { + bail('flags cannot be duplicated for modifiers group'); + } + + var modifiersGroup = finishGroup("ignore", from); + + modifiersGroup.modifierFlags = { + enabling: enablingFlags, + disabling: disablingFlags + }; + + return modifiersGroup; + } + function parseUnicodeSurrogatePairEscape(firstEscape) { if (isUnicodeMode) { var first, second; diff --git a/test/index.js b/test/index.js index 5ae49f3..df7957b 100644 --- a/test/index.js +++ b/test/index.js @@ -62,6 +62,15 @@ runTests('./test-data-unicode-set.json', 'v', { unicodeSet: true, unicodePropertyEscape: true }); +runTests('./test-data-modifiers-group.json', '', { + modifiers: true, + namedGroups: true, +}); +runTests('./test-data-named-groups.json', '', { + modifiers: true, + namedGroups: true, +}); + (function testUVError() { var message = 'It should throw an error when using both the "u" and "v" flags.'; diff --git a/test/test-data-modifiers-group.json b/test/test-data-modifiers-group.json new file mode 100644 index 0000000..ac09574 --- /dev/null +++ b/test/test-data-modifiers-group.json @@ -0,0 +1,544 @@ +{ + "^[a-z](?-i:[a-z])$": { + "type": "alternative", + "body": [ + { + "type": "anchor", + "kind": "start", + "range": [ + 0, + 1 + ], + "raw": "^" + }, + { + "type": "characterClass", + "kind": "union", + "body": [ + { + "type": "characterClassRange", + "min": { + "type": "value", + "kind": "symbol", + "codePoint": 97, + "range": [ + 2, + 3 + ], + "raw": "a" + }, + "max": { + "type": "value", + "kind": "symbol", + "codePoint": 122, + "range": [ + 4, + 5 + ], + "raw": "z" + }, + "range": [ + 2, + 5 + ], + "raw": "a-z" + } + ], + "negative": false, + "range": [ + 1, + 6 + ], + "raw": "[a-z]" + }, + { + "type": "group", + "behavior": "ignore", + "body": [ + { + "type": "value", + "kind": "symbol", + "codePoint": 58, + "range": [ + 10, + 11 + ], + "raw": ":" + }, + { + "type": "characterClass", + "kind": "union", + "body": [ + { + "type": "characterClassRange", + "min": { + "type": "value", + "kind": "symbol", + "codePoint": 97, + "range": [ + 12, + 13 + ], + "raw": "a" + }, + "max": { + "type": "value", + "kind": "symbol", + "codePoint": 122, + "range": [ + 14, + 15 + ], + "raw": "z" + }, + "range": [ + 12, + 15 + ], + "raw": "a-z" + } + ], + "negative": false, + "range": [ + 11, + 16 + ], + "raw": "[a-z]" + } + ], + "range": [ + 6, + 17 + ], + "raw": "(?-i:[a-z])", + "modifierFlags": { + "enabling": "", + "disabling": "i" + } + }, + { + "type": "anchor", + "kind": "end", + "range": [ + 17, + 18 + ], + "raw": "$" + } + ], + "range": [ + 0, + 18 + ], + "raw": "^[a-z](?-i:[a-z])$" + }, + "^(?i:[a-z])[a-z]$": { + "type": "alternative", + "body": [ + { + "type": "anchor", + "kind": "start", + "range": [ + 0, + 1 + ], + "raw": "^" + }, + { + "type": "group", + "behavior": "ignore", + "body": [ + { + "type": "value", + "kind": "symbol", + "codePoint": 58, + "range": [ + 4, + 5 + ], + "raw": ":" + }, + { + "type": "characterClass", + "kind": "union", + "body": [ + { + "type": "characterClassRange", + "min": { + "type": "value", + "kind": "symbol", + "codePoint": 97, + "range": [ + 6, + 7 + ], + "raw": "a" + }, + "max": { + "type": "value", + "kind": "symbol", + "codePoint": 122, + "range": [ + 8, + 9 + ], + "raw": "z" + }, + "range": [ + 6, + 9 + ], + "raw": "a-z" + } + ], + "negative": false, + "range": [ + 5, + 10 + ], + "raw": "[a-z]" + } + ], + "range": [ + 1, + 11 + ], + "raw": "(?i:[a-z])", + "modifierFlags": { + "enabling": "i", + "disabling": "" + } + }, + { + "type": "characterClass", + "kind": "union", + "body": [ + { + "type": "characterClassRange", + "min": { + "type": "value", + "kind": "symbol", + "codePoint": 97, + "range": [ + 12, + 13 + ], + "raw": "a" + }, + "max": { + "type": "value", + "kind": "symbol", + "codePoint": 122, + "range": [ + 14, + 15 + ], + "raw": "z" + }, + "range": [ + 12, + 15 + ], + "raw": "a-z" + } + ], + "negative": false, + "range": [ + 11, + 16 + ], + "raw": "[a-z]" + }, + { + "type": "anchor", + "kind": "end", + "range": [ + 16, + 17 + ], + "raw": "$" + } + ], + "range": [ + 0, + 17 + ], + "raw": "^(?i:[a-z])[a-z]$" + }, + "^(?i-s:[a-z])[a-z]$": { + "type": "alternative", + "body": [ + { + "type": "anchor", + "kind": "start", + "range": [ + 0, + 1 + ], + "raw": "^" + }, + { + "type": "group", + "behavior": "ignore", + "body": [ + { + "type": "value", + "kind": "symbol", + "codePoint": 58, + "range": [ + 6, + 7 + ], + "raw": ":" + }, + { + "type": "characterClass", + "kind": "union", + "body": [ + { + "type": "characterClassRange", + "min": { + "type": "value", + "kind": "symbol", + "codePoint": 97, + "range": [ + 8, + 9 + ], + "raw": "a" + }, + "max": { + "type": "value", + "kind": "symbol", + "codePoint": 122, + "range": [ + 10, + 11 + ], + "raw": "z" + }, + "range": [ + 8, + 11 + ], + "raw": "a-z" + } + ], + "negative": false, + "range": [ + 7, + 12 + ], + "raw": "[a-z]" + } + ], + "range": [ + 1, + 13 + ], + "raw": "(?i-s:[a-z])", + "modifierFlags": { + "enabling": "i", + "disabling": "s" + } + }, + { + "type": "characterClass", + "kind": "union", + "body": [ + { + "type": "characterClassRange", + "min": { + "type": "value", + "kind": "symbol", + "codePoint": 97, + "range": [ + 14, + 15 + ], + "raw": "a" + }, + "max": { + "type": "value", + "kind": "symbol", + "codePoint": 122, + "range": [ + 16, + 17 + ], + "raw": "z" + }, + "range": [ + 14, + 17 + ], + "raw": "a-z" + } + ], + "negative": false, + "range": [ + 13, + 18 + ], + "raw": "[a-z]" + }, + { + "type": "anchor", + "kind": "end", + "range": [ + 18, + 19 + ], + "raw": "$" + } + ], + "range": [ + 0, + 19 + ], + "raw": "^(?i-s:[a-z])[a-z]$" + }, + "^[a-z](?:[a-z])$": { + "type": "alternative", + "body": [ + { + "type": "anchor", + "kind": "start", + "range": [ + 0, + 1 + ], + "raw": "^" + }, + { + "type": "characterClass", + "kind": "union", + "body": [ + { + "type": "characterClassRange", + "min": { + "type": "value", + "kind": "symbol", + "codePoint": 97, + "range": [ + 2, + 3 + ], + "raw": "a" + }, + "max": { + "type": "value", + "kind": "symbol", + "codePoint": 122, + "range": [ + 4, + 5 + ], + "raw": "z" + }, + "range": [ + 2, + 5 + ], + "raw": "a-z" + } + ], + "negative": false, + "range": [ + 1, + 6 + ], + "raw": "[a-z]" + }, + { + "type": "group", + "behavior": "ignore", + "body": [ + { + "type": "characterClass", + "kind": "union", + "body": [ + { + "type": "characterClassRange", + "min": { + "type": "value", + "kind": "symbol", + "codePoint": 97, + "range": [ + 10, + 11 + ], + "raw": "a" + }, + "max": { + "type": "value", + "kind": "symbol", + "codePoint": 122, + "range": [ + 12, + 13 + ], + "raw": "z" + }, + "range": [ + 10, + 13 + ], + "raw": "a-z" + } + ], + "negative": false, + "range": [ + 9, + 14 + ], + "raw": "[a-z]" + } + ], + "range": [ + 6, + 15 + ], + "raw": "(?:[a-z])" + }, + { + "type": "anchor", + "kind": "end", + "range": [ + 15, + 16 + ], + "raw": "$" + } + ], + "range": [ + 0, + 16 + ], + "raw": "^[a-z](?:[a-z])$" + }, + "^[a-z](?imi:[a-z])$": { + "type": "error", + "name": "SyntaxError", + "message": "flags cannot be duplicated for modifiers group at position 11\n [a-z](?imi:[a-z])$\n ^", + "input": "^[a-z](?imi:[a-z])$" + }, + "^[a-z](?i-mi:[a-z])$": { + "type": "error", + "name": "SyntaxError", + "message": "flags cannot be duplicated for modifiers group at position 12\n a-z](?i-mi:[a-z])$\n ^", + "input": "^[a-z](?i-mi:[a-z])$" + }, + "^[a-z](?-mim:[a-z])$": { + "type": "error", + "name": "SyntaxError", + "message": "flags cannot be duplicated for modifiers group at position 12\n a-z](?-mim:[a-z])$\n ^", + "input": "^[a-z](?-mim:[a-z])$" + }, + "^[a-z](?-:[a-z])$": { + "type": "error", + "name": "SyntaxError", + "message": "Invalid flags for modifiers group at position 9\n ^[a-z](?-:[a-z])$\n ^", + "input": "^[a-z](?-:[a-z])$" + } +} diff --git a/test/types.ts b/test/types.ts index 4c14ede..2600b85 100644 --- a/test/types.ts +++ b/test/types.ts @@ -1,4 +1,4 @@ -import { AstNodeType, Identifier, parse, RootNode } from "../parser"; +import { AstNodeType, Identifier, ModifierFlags, parse, RootNode } from "../parser"; function assert(input: T): void {} @@ -88,3 +88,15 @@ if (nodeWithUnicodeSet.type === "characterClass") { nodeWithUnicodeSet.kind === "subtraction"; assert<"union" | "intersection" | "subtraction">(nodeWithUnicodeSet.kind); } + +let nodeWithModifiers: RootNode<{ modifiers: true }>; +nodeWithModifiers = parse("", "", { + modifiers: true, +}); + +if ( + nodeWithModifiers.type === "group" && + nodeWithModifiers.behavior === "ignore" +) { + assert(nodeWithModifiers.modifierFlags); +} diff --git a/test/update-fixtures.js b/test/update-fixtures.js index d44a7e9..9df50c7 100644 --- a/test/update-fixtures.js +++ b/test/update-fixtures.js @@ -57,3 +57,6 @@ updateFixtures('./test-data-unicode-set.json', 'v', { unicodeSet: true, unicodePropertyEscape: true }); +updateFixtures('./test-data-modifiers-group.json', '', { + modifiers: true, +});