-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.mjs
127 lines (113 loc) · 4.98 KB
/
index.mjs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
// Exceeding this is a likely indicator of infinite recursion, or a significantly over-complicated Grammar
const MAX_LEVEL=1024;
export default class Grammar {
_def; // Original Grammar Input (for reference/debugging)
_re = {}; // Built Regexes (TOP and components)
/** Construct a new Grammar with given definition.
*
* @param def An object where keys are rule names, and values are
* RegExp objects or strings. At a minimum, a 'TOP' rule must be
* defined.
*/
constructor(def) {
// Validate input is an object, and that it contains a TOP definition
if (typeof def !== 'object' || !def.TOP) {
throw "Invalid input";
}
this._def = def;
// Recursively parse definitions, throwing an exception in the event if any error is encountered, or depth limit is exceeded
this._build('TOP');
}
/** Internal function to recursively build regex for the given key.
* @param key Name of rule to be parsed
* @param level Recursion level. If this exceeds Grammar.MAX_DEPTH, we throw an exception with the assumption that we have a Grammar with an illegal infinitely recursive definition.
*/
_build(key, level=0) {
if (level > MAX_LEVEL) {
throw "Exceeded maximum recursion depth. Input may have a cyclic dependency";
}
const rule = this._def[key];
if (!rule) {
throw key + " is not a defined grammar rule";
}
var src;
if (typeof rule == "string") {
src = rule.replace(/\s+/g,"\\s*");
} else if (Array.isArray(rule)) {
// A list of rules or values to be or'd together.
src = rule.reduce((accum, val, idx, arr) => {
if (val instanceof RegExp) {
val = val.source;
}
val = "(?:" + val + ")"; // Force grouping regardless of val content
if (accum) {
return accum + "|" + val;
} else {
return val;
}
});
} else if (rule instanceof RegExp) {
src = rule.source;
} else {
throw key + " is not a recognized Grammar input type (regex object or string(";
}
var cache = {};
const pattern = src
// Match pattern rules, ie: $foo, providing that a corresponding rule was defined
.replace(/\$(\w+)/g, (match, name) => {
if (!this._def[name]) {
// TODO: Verbosity to disable this warning, which is aimed at alerting user against typos while permitting actual matches
console.warn("Ignoring undefined key " + name);
return match;
}
if (!this._re[name]) {
// Parse new child element
this._build(name, level+1);
}
const parser = this._re[name];
/* Regex based on NPM Package regular-grammar
* Simple grammars will work directly, Unclear what this original regex did, aside from cleaning up some harmless duplicate groupings that could be avoided through better grammar definition
*/
var rtv = parser.source
// .replace(/\((?!\?:)/g, '(?:')
// .replace(/(\\\\)*\\\(\?:/g, '$1\\(')
;
// And extend with named capture groups for each sub-component
return `(?<${name}>${rtv})`;
})
// Second-stage to ensure uniqueness of all named capture groups
.replace(/\(\?<((?:[A-Za-z][^_\W]+_?)+)(\d+)?>/g,
(match, name, num) => {
// Prepend key, and remove any trailing slashes/indexes
var newName = key + '_' + name.replace(/_$/,"");
if (cache[newName]) {
return `(?<${newName}_${cache[newName]++}>`;
} else {
cache[newName] = 1;
return `(?<${newName}>`;
}
});
;
//console.log("Parsing ", key, " = ", pattern); // debug
this._re[key] = new RegExp(pattern);
}
/** Retrieve the built top-level RegExp */
get regex() {
return this._re.TOP;
}
/** Apply this Grammar to the given string.
* @param str String to evaluate
* @param key If specified, evaluate against this sub-rule instead of 'TOP'
* @returns RegExp match results.
*/
match(str,key='TOP') {
if (!this._re[key]) {
this._build(key);
}
return str.match(this._re[key]);
}
/** Describe this Grammar (TODO) */
describe() {
console.log(this._re.TOP); // DEBUG/TODO
}
}