-
Notifications
You must be signed in to change notification settings - Fork 10
/
Copy pathwaul-core
executable file
·1400 lines (1028 loc) · 136 KB
/
waul-core
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env node
// Waul precompiler (or derivative), copyright 2012 Spencer Tipping
// Licensed under the terms of the MIT source code license
// http://github.com/spencertipping/caterwaul
(function (f) {return f(f)}) (function (initializer, key, undefined) {
// Global caterwaul variable.
// Caterwaul creates a global symbol, caterwaul. Like jQuery, there's a mechanism to get the original one back if you don't want to replace it. You can call caterwaul.deglobalize() to return
// caterwaul and restore the global that was there when Caterwaul was loaded (might be useful in the unlikely event that someone else named their library Caterwaul). Note that deglobalize() is
// available only on the global caterwaul() function.
(function (f) {return f(f)})(function (initializer) {
var calls_init = function () {var f = function () {return f.init.apply(f, arguments)}; return f},
original_global = typeof caterwaul === 'undefined' ? undefined : caterwaul,
caterwaul_global = calls_init();
caterwaul_global.deglobalize = function () {caterwaul = original_global; return caterwaul_global};
caterwaul_global.core_initializer = initializer;
caterwaul_global.context = this;
// The merge() function is compromised for the sake of Internet Explorer, which contains a bug-ridden and otherwise horrible implementation of Javascript. The problem is that, due to a bug in
// hasOwnProperty and DontEnum within JScript, these two expressions are evaluated incorrectly:
// | for (var k in {toString: 5}) alert(k); // no alert on IE
// ({toString: 5}).hasOwnProperty('toString') // false on IE
// To compensate, merge() manually copies toString if it is present on the extension object.
caterwaul_global.merge = (function (o) {for (var k in o) if (o.hasOwnProperty(k)) return true})({toString: true}) ?
// hasOwnProperty, and presumably iteration, both work, so we use the sensible implementation of merge():
function (o) {for (var i = 1, l = arguments.length, _; i < l; ++i) if (_ = arguments[i]) for (var k in _) if (Object.prototype.hasOwnProperty.call(_, k)) o[k] = _[k]; return o} :
// hasOwnProperty, and possibly iteration, both fail, so we hack around the problem with this gem:
function (o) {for (var i = 1, l = arguments.length, _; i < l; ++i)
if (_ = arguments[i]) {for (var k in _) if (Object.prototype.hasOwnProperty.call(_, k)) o[k] = _[k];
if (_.toString && ! /\[native code\]/.test(_.toString.toString())) o.toString = _.toString} return o},
// Modules.
// Caterwaul 1.1.7 adds support for a structured form for defining modules. This isn't particularly interesting or revolutionary by itself; it's just a slightly more structured way to do what
// most Caterwaul extensions have been doing with toplevel functions. For example, a typical extension looks something like this:
// | caterwaul('js_all')(function ($) {
// $.something(...) = ...,
// where [...]})(caterwaul);
// Here's what the equivalent module syntax looks like:
// | caterwaul.module('foo', 'js_all', function ($) { // equivalent to caterwaul.module('foo', caterwaul('js_all')(function ($) {...}))
// $.something(...) = ...,
// where [...]});
// Note that the module name has absolutely nothing to do with what the module does. I'm adding modules for a different reason entirely. When you bind a module like this, Caterwaul stores the
// initialization function onto the global object. So, for example, when you run caterwaul.module('foo', f), you have the property that caterwaul.foo_initializer === f. This is significant
// because you can later reuse this function on a different Caterwaul object. In particular, you can do things like sending modules from the server to the client, since the Caterwaul global is
// supplied as a parameter rather than being closed over.
// You can invoke module() with just a name to get the initializer function for that module. This ultimately means that, given only a runtime instance of a Caterwaul function configured with one
// or modules, you can construct a string of Javascript code sufficient to recreate an equivalent Caterwaul function elsewhere. (The replicator() method does this by returning a syntax tree.)
caterwaul_global.modules = [];
caterwaul_global.module = function (name, transform, f) {
if (arguments.length === 1) return caterwaul_global[name + '_initializer'];
name + '_initializer' in caterwaul_global || caterwaul_global.modules.push(name);
f || (f = transform, transform = null);
(caterwaul_global[name + '_initializer'] = transform ? caterwaul_global(transform)(f) : f)(caterwaul_global);
return caterwaul_global};
return caterwaul = caterwaul_global});
// Utility methods.
// Utility functions here are:
// | 1. qw Splits a string into space-separated words and returns an array of the results. This is a Perl idiom that's really useful when writing lists of things.
// 2. se Side-effects on a value and returns the value.
// 3. fail Throws an error. This isn't particularly special except for the fact that the keyword 'throw' can't be used in expression context.
// 4. gensym Generates a string that will never have been seen before.
// 5. bind Fixes 'this' inside the function being bound. This is a common Javascript idiom, but is reimplemented here because we don't know which other libraries are available.
// 6. map Maps a function over an array-like object and returns an array of the results.
// 7. rmap Recursively maps a function over arrays.
// 8. hash Takes a string, splits it into words, and returns a hash mapping each of those words to true. This is used to construct sets.
// Side-effecting is used to initialize things statefully; for example:
// | return se(function () {return 5}, function (f) {
// f.sourceCode = 'return 5';
// });
// Gensyms are unique identifiers that end with high-entropy noise that won't appear in the source being compiled. The general format of a gensym is name_count_suffix, where 'name' is provided by
// whoever requested the gensym (this allows gensyms to be more readable), 'count' is a base-36 number that is incremented with each gensym, and 'suffix' is a constant base-64 string containing
// 128 bits of entropy. (Since 64 possibilities is 6 bits, this means that we have 22 characters.)
var qw = function (x) {return x.split(/\s+/)}, se = function (x, f) {return f && f.call(x, x) || x}, fail = function (m) {throw new Error(m)},
unique = key || (function () {for (var xs = [], d = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789$_', i = 21, n; i >= 0; --i) xs.push(d.charAt(Math.random() * 64 >>> 0));
return xs.join('')})(),
gensym = (function (c) {return function (name) {return [name || '', (++c).toString(36), unique].join('_')}})(0), is_gensym = function (s) {return s.substr(s.length - 22) === unique},
bind = function (f, t) {return function () {return f.apply(t, arguments)}},
map = function (f, xs) {for (var i = 0, ys = [], l = xs.length; i < l; ++i) ys.push(f(xs[i], i)); return ys},
rmap = function (f, xs) {return map(function (x) {return x instanceof Array ? rmap(f, x) : f(x)})},
hash = function (s) {for (var i = 0, xs = qw(s), o = {}, l = xs.length; i < l; ++i) o[xs[i]] = true; return annotate_keys(o)},
// Optimizations.
// The parser and lexer each assume valid input and do no validation. This is possible because any function passed in to caterwaul will already have been parsed by the Javascript interpreter;
// syntax errors would have caused an error there. This enables a bunch of optimization opportunities in the parser, ultimately making it not in any way recursive and requiring only three
// linear-time passes over the token stream. (An approximate figure; it actually does about 19 fractional passes, but not all nodes are reached.)
// Also, I'm not confident that all Javascript interpreters are smart about hash indexing. Particularly, suppose a hashtable has 10 entries, the longest of whose keys is 5 characters. If we
// throw a 2K string at it, it might very well hash that whole thing just to find that, surprise, the entry doesn't exist. That's a big performance hit if it happens very often. To prevent this
// kind of thing, I'm keeping track of the longest string in the hashtable by using the 'annotate_keys' function. 'has()' knows how to look up the maximum length of a hashtable to verify that
// the candidate is in it, resulting in the key lookup being only O(n) in the longest key (generally this ends up being nearly O(1), since I don't like to type long keys), and average-case O(1)
// regardless of the length of the candidate.
// As of Caterwaul 0.7.0 the _max_length property has been replaced by a gensym. This basically guarantees uniqueness, so the various hacks associated with working around the existence of the
// special _max_length key are no longer necessary.
// Caterwaul 1.3 introduces the gensym_entropy() method, which allows you to grab the 128 bits of pseudorandom data that are used to mark gensyms. Be careful with this. If you introduce this
// data into your code, you compromise the uniqueness of past and future gensyms because you know enough to reproduce them and predict their future values.
max_length_key = gensym('hash'),
annotate_keys = function (o) {var max = 0; for (var k in o) own.call(o, k) && (max = k.length > max ? k.length : max); o[max_length_key] = max; return o},
has = function (o, p) {return p != null && ! (p.length > o[max_length_key]) && own.call(o, p)}, own = Object.prototype.hasOwnProperty,
caterwaul_global = caterwaul.merge(caterwaul, {map: map, rmap: rmap, gensym: gensym, is_gensym: is_gensym, gensym_entropy: function () {return unique}}),
// Shared parser data.
// This data is used both for parsing and for serialization, so it's made available to all pieces of caterwaul.
// Precomputed table values.
// The lexer uses several character lookups, which I've optimized by using integer->boolean arrays. The idea is that instead of using string membership checking or a hash lookup, we use the
// character codes and index into a numerical array. This is guaranteed to be O(1) for any sensible implementation, and is probably the fastest JS way we can do this. For space efficiency,
// only the low 256 characters are indexed. High characters will trigger sparse arrays, which may degrade performance. Also, this parser doesn't handle Unicode characters properly; it assumes
// lower ASCII only.
// The lex_op table indicates which elements trigger regular expression mode. Elements that trigger this mode cause a following / to delimit a regular expression, whereas other elements would
// cause a following / to indicate division. By the way, the operator ! must be in the table even though it is never used. The reason is that it is a substring of !==; without it, !== would
// fail to parse.
// Caterwaul 1.1.3 adds support for Unicode characters, even though they're technically not allowed as identifiers in Javascript. All Unicode characters are treated as identifiers since
// Javascript assigns no semantics to them.
// Caterwaul 1.2 adds @ as an identifier character. This is a hack for me to encode metadata on symbols without having to build subtrees, and it is transparent to Javascript->Javascript
// compilation since @ is not a valid character in Javascript.
// ES5 adds function* and yield* as keywords, which breaks the "reserved words look like identifiers" property.
lex_starred = hash('function* yield*'),
lex_op = hash('. ?. new ++ -- u++ u-- u+ u- typeof void u~ u! u* u... ! * / % + - << >> >>> < > <= >= instanceof in of == != === !== & ^ | && || ?? ? = ' +
'+= -= *= /= %= &= |= ^= <<= >>= >>>= : , &&= ||= ??= => return throw case var const let async await yield yield* break continue else u; ;'),
lex_table = function (s) {for (var i = 0, xs = [false]; i < 8; ++i) xs.push.apply(xs, xs); for (var i = 0, l = s.length; i < l; ++i) xs[s.charCodeAt(i)] = true; return xs},
lex_float = lex_table('.0123456789'), lex_decimal = lex_table('0123456789'), lex_integer = lex_table('0123456789abcdefABCDEFx'), lex_exp = lex_table('eE'),
lex_space = lex_table(' \n\r\t'), lex_bracket = lex_table('()[]{}?:'), lex_opener = lex_table('([{?:'), lex_punct = lex_table('+-*/%&|^!~=<>?:;.,'),
lex_eol = lex_table('\n\r'), lex_regexp_suffix = lex_table('gims'), lex_quote = lex_table('\'"/'), lex_slash = '/'.charCodeAt(0),
lex_zero = '0'.charCodeAt(0), lex_postfix_unary = hash('++ --'), lex_ident = lex_table('@\\$_abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'),
lex_star = '*'.charCodeAt(0), lex_back = '\\'.charCodeAt(0), lex_x = 'x'.charCodeAt(0), lex_dot = '.'.charCodeAt(0),
lex_backtick = '`'.charCodeAt(0), lex_hash = '#'.charCodeAt(0), lex_qmark = '?'.charCodeAt(0), lex_n = 'n'.charCodeAt(0),
// Parse data.
// The lexer and parser aren't entirely separate, nor can they be considering the complexity of Javascript's grammar. The lexer ends up grouping parens and identifying block constructs such
// as 'if', 'for', 'while', and 'with'. The parser then folds operators and ends by folding these block-level constructs.
// Caterwaul 1.2.7 and 1.3 introduce a few fictional operators into the list. These operators are written such that they could never appear in valid Javascript, but are available to
// non-Javascript compilers like waul. So far these operators are:
// | :: right-associative; folds just after == and ===
// ::: right-associative; folds just after ::
// These operators matter only if you're writing waul-facing code. If you're writing Javascript-to-Javascript mappings you can ignore their existence, since no valid Javascript will contain
// them in the first place.
parse_reduce_order = map(hash, ['function function*', 'new', '( [ . [] () ?.', 'delete void', 'u++ u-- ++ -- typeof u~ u! u+ u- u* u...', '* / %', '+ -', '<< >> >>>',
'< > <= >= instanceof in', '== != === !==', '::', ':::', '&', '^', '|', '&&', '|| ??', '->', 'case async',
'? = += -= *= /= %= &= |= ^= <<= >>= >>>= &&= ||= ??=', '=>', ': of', ',', 'return throw break continue yield yield* await', 'var const let',
'if else try catch finally for switch with while do', ';']),
parse_associates_right = hash('= += -= *= /= %= &= ^= |= <<= >>= >>>= &&= ||= ??= :: ::: -> => ~ ! new typeof void u+ u* u- -- ++ u-- u++ ? if else function function* try catch finally for ' +
'switch case with while do async'),
parse_inverse_order = (function (xs) {for (var o = {}, i = 0, l = xs.length; i < l; ++i) for (var k in xs[i]) has(xs[i], k) && (o[k] = i); return annotate_keys(o)})(parse_reduce_order),
parse_index_forward = (function (rs) {for (var xs = [], i = 0, l = rs.length, _ = null; _ = rs[i], xs[i] = true, i < l; ++i)
for (var k in _) if (has(_, k) && (xs[i] = xs[i] && ! has(parse_associates_right, k))) break; return xs})(parse_reduce_order),
parse_lr = hash('[] . () ?. * / % + - << >> >>> < > <= >= instanceof in of == != === !== & ^ | && || ?? -> => = += -= *= /= %= &= |= ^= <<= >>= >>>= &&= ||= ??= , : ;'),
parse_r_until_block = annotate_keys({'function':2, 'function*':2, 'if':1, 'do':1, 'catch':1, 'try':1, 'for':1, 'while':1, 'with':1, 'switch':1}),
parse_accepts = annotate_keys({'if':'else', 'do':'while', 'catch':'finally', 'try':'catch'}), parse_invocation = hash('[] ()'),
parse_r_optional = hash('return throw break continue else'), parse_l = hash('++ --'), parse_group = annotate_keys({'(':')', '[':']', '{':'}', '?':':'}),
parse_block = hash('; {'), parse_invisible = hash('i;'), parse_r = hash('u+ u- u! u~ u++ u-- u* u... new typeof finally case let var const void delete yield yield* await'),
parse_ambiguous_group = hash('[ ('), parse_ternary = hash('?'), parse_not_a_value = hash('function function* if for while with catch void delete new typeof of in instanceof'),
parse_also_expression = hash('function function*'), parse_misleading_postfix = hash(':'),
// Syntax data structures.
// There are two data structures used for syntax trees. At first, paren-groups are linked into doubly-linked lists, described below. These are then folded into immutable array-based specific
// nodes. At the end of folding there is only one child per paren-group.
// Doubly-linked paren-group lists.
// When the token stream is grouped into paren groups it has a hierarchical linked structure that conceptually has these pointers:
// | +--------+
// +------ | node | ------+
// | +-> | | <--+ |
// first | | +--------+ | | last
// | | parent parent | |
// V | | V
// +--------+ +--------+
// | node | --- r --> | node | --- r ---/
// /--- l --- | | <-- l --- | |
// +--------+ +--------+
// The primary operation performed on this tree, at least initially, is repeated folding. So we have a chain of linear nodes, and one by one certain nodes fold their siblings underneath them,
// breaking the children's links and linking instead to the siblings' neighbors. For example, if we fold node (3) as a binary operator:
// | (1) <-> (2) <-> (3) <-> (4) <-> (5) (1) <--> (3) <--> (5)
// / \ / \ / \ / \ / \ --> / \ / \ / \
// / \
// (2) (4) <- No link between children
// / \ / \ (see 'Fold nodes', below)
// Fold nodes.
// Once a node has been folded (e.g. (3) in the diagram above), none of its children will change and it will gain no more children. The fact that none of its children will change can be shown
// inductively: suppose you've decided to fold the '+' in 'x + y' (here x and y are arbitrary expressions). This means that x and y are comprised of higher-precedence operators. Since there is
// no second pass back to high-precedence operators, x and y will not change nor will they interact with one another. The fact that a folded node never gains more children arrives from the fact
// that it is folded only once; this is by virtue of folding by index instead of by tree structure. (Though a good tree traversal algorithm also wouldn't hit the same node twice -- it's just
// less obvious when the tree is changing.)
// Anyway, the important thing about fold nodes is that their children don't change. This means that an array is a completely reasonable data structure to use for the children; it certainly
// makes the structure simpler. It also means that the only new links that must be added to nodes as they are folded are links to new children (via the array), and links to the new siblings.
// Once we have the array-form of fold nodes, we can build a query interface similar to jQuery, but designed for syntactic traversal. This will make routine operations such as macro
// transformation and quasiquoting far simpler later on.
// Both grouping and fold nodes are represented by the same data structure. In the case of grouping, the 'first' pointer is encoded as [0] -- that is, the first array element. It doesn't
// contain pointers to siblings of [0]; these are still accessed by their 'l' and 'r' pointers. As the structure is folded, the number of children of each paren group should be reduced to just
// one. At this point the remaining element's 'l' and 'r' pointers will both be null, which means that it is in hierarchical form instead of linked form.
// After the tree has been fully generated and we have the root node, we have no further use for the parent pointers. This means that we can use subtree sharing to save memory. Once we're past
// the fold stage, push() should be used instead of append(). append() works in a bidirectionally-linked tree context (much like the HTML DOM), whereas push() works like it does for arrays
// (i.e. no parent pointer).
// Syntax node functions.
// These functions are common to various pieces of syntax nodes. Not all of them will always make sense, but the prototypes of the constructors can be modified independently later on if it
// turns out to be an issue.
syntax_common = caterwaul_global.syntax_common = {
// Mutability.
// These functions let you modify nodes in-place. They're used during syntax folding and shouldn't really be used after that (hence the underscores).
_replace: function (n) {return (n.l = this.l) && (this.l.r = n), (n.r = this.r) && (this.r.l = n), this}, _append_to: function (n) {return n && n._append(this), this},
_reparent: function (n) {return this.p && this.p[0] === this && (this.p[0] = n), this}, _fold_l: function () {return this._append(this.l && this.l._unlink(this) || empty)},
_append: function (n) {return (this[this.length++] = n) && (n.p = this), this}, _fold_r: function () {return this._append(this.r && this.r._unlink(this) || empty)},
_sibling: function (n) {return n.p = this.p, (this.r = n).l = this}, _fold_lr: function () {return this._fold_l()._fold_r()},
_fold_rr: function () {return this._fold_r()._fold_r()},
_wrap: function (n) {return n.p = this._replace(n).p, this._reparent(n), delete this.l, delete this.r, this._append_to(n)},
_unlink: function (n) {return this.l && (this.l.r = this.r), this.r && (this.r.l = this.l), delete this.l, delete this.r, this._reparent(n)},
// These methods are OK for use after the syntax folding stage is over (though because syntax nodes are shared it's generally dangerous to go modifying them):
pop: function () {return delete this[--this.length], this}, push: function (x) {return this[this.length++] = caterwaul_global.syntax.promote(x || empty), this},
// Identification.
// You can request that a syntax node identify itself, in which case it will give you an identifier if it hasn't already. The identity is not determined until the first time it is requested,
// and after that it is stable. As of Caterwaul 0.7.0 the mechanism works differently (i.e. isn't borked) in that it replaces the prototype definition with an instance-specific closure the
// first time it gets called. This may reduce the number of decisions in the case that the node's ID has already been computed.
id: function () {var id = gensym('id'); return (this.id = function () {return id})()},
is_caterwaul_syntax: true,
// Traversal functions.
// each() is the usual side-effecting shallow traversal that returns 'this'. map() distributes a function over a node's children and returns the array of results, also as usual. Two variants,
// reach and rmap, perform the process recursively. reach is non-consing; it returns the original as a reference. rmap, on the other hand, follows some rules to cons a new tree. If the
// function passed to rmap() returns the node verbatim then its children are traversed. If it returns a distinct node, however, then traversal doesn't descend into the children of the newly
// returned tree but rather continues as if the original node had been a leaf. For example:
// | parent Let's suppose that a function f() has these mappings:
// / \
// node1 node2 f(parent) = parent f(node1) = q
// / \ | f(node2) = node2
// c1 c2 c3
// In this example, f() would be called on parent, node1, node2, and c3 in that order. c1 and c2 are omitted because node1 was replaced by q -- and there is hardly any point in going through
// the replaced node's previous children. (Nor is there much point in forcibly iterating over the new node's children, since presumably they are already processed.) If a mapping function
// returns something falsy, it will have exactly the same effect as returning the node without modification.
// Recursive map() and each() variants have another form starting with Caterwaul 1.1.3. These are pmap() and peach(), which recursively traverse the tree in post-order. That is, the node
// itself is visited after its children are.
// Using the old s() to do gensym-safe replacement requires that you invoke it only once, and this means that for complex macroexpansion you'll have a long array of values. This isn't ideal,
// so syntax trees provide a replace() function that handles replacement more gracefully:
// | qs[(foo(_foo), _before_bar + bar(_bar))].replace({_foo: qs[x], _before_bar: qs[3 + 5], _bar: qs[foo.bar]})
// Controlling rmap() traversal.
// rmap() provides a fairly rich interface to allow you to inform Caterwaul about what to do with each subtree. For each visited node, you can do three things:
// | 1. Replace the node with another value. The value you return should be either a string (in which case it will be promoted into a node), or a syntax node. Traversal stops here.
// 2. Preserve the original value, but descend into children. In this case you should return either the original tree or false.
// 3. Preserve the original value, but don't descend into children. In this case you should return true. This has the advantage that it avoids allocating copies of trees that you don't
// intend to modify. You can also use this to escape from an rmap() operation by continuing to return true.
each: function (f) {for (var i = 0, l = this.length; i < l; ++i) f(this[i], i); return this},
map: function (f) {for (var n = new this.constructor(this), i = 0, l = this.length; i < l; ++i) n.push(f(this[i], i) || this[i]); return n},
reach: function (f) {f(this); for (var i = 0, l = this.length; i < l; ++i) this[i].reach(f); return this},
rmap: function (f) {var r = f(this); return ! r || r === this ? this.map(function (n) {return n.rmap(f)}) : r === true ? this : r.rmap === undefined ? new this.constructor(r) : r},
peach: function (f) {for (var i = 0, l = this.length; i < l; ++i) this[i].peach(f); f(this); return this},
pmap: function (f) {var t = this.map(function (n) {return n.pmap(f)}); return f(t)},
clone: function () {return this.rmap(function () {return false})},
collect: function (p) {var ns = []; this.reach(function (n) {p(n) && ns.push(n)}); return ns},
replace: function (rs) {var r; return own.call(rs, this.data) && (r = rs[this.data]) ?
r.constructor === String ? se(this.map(function (n) {return n.replace(rs)}), function () {this.data = r}) : r :
this.map(function (n) {return n.replace(rs)})},
// Alteration.
// These functions let you make "changes" to a node by returning a modified copy.
thin_clone: function () {return this.map(function () {return false})},
repopulated_with: function (xs) {return new this.constructor(this.data, xs)},
with_data: function (d) {return new this.constructor(d, Array.prototype.slice.call(this))},
change: function (i, x) {return se(new this.constructor(this.data, Array.prototype.slice.call(this)), function (n) {n[i] = x})},
compose_single: function (i, f) {return this.change(i, f(this[i]))},
slice: function (x1, x2) {return new this.constructor(this.data, Array.prototype.slice.call(this, x1, x2))},
// General-purpose traversal.
// This is a SAX-style traversal model, useful for analytical or scope-oriented tree traversal. You specify a callback function that is invoked in pre-post-order on the tree (you get events
// for entering and exiting each node, including leaves). Each time a node is entered, the callback is invoked with an object of the form {entering: node}, where 'node' is the syntax node
// being entered. Each time a node is left, the callback is invoked with an object of the form {exiting: node}. The return value of the function is not used. Any null nodes are not traversed,
// since they would fail any standard truthiness tests for 'entering' or 'exiting'.
// I used to have a method to perform scope-annotated traversal, but I removed it for two reasons. First, I had no use for it (and no tests, so I had no reason to believe that it worked).
// Second, Caterwaul is too low-level to need such a method. That would be more appropriate for an analysis extension.
traverse: function (f) {f({entering: this}); f({exiting: this.each(function (n) {n.traverse(f)})}); return this},
// Structural transformation.
// Having nested syntax trees can be troublesome. For example, suppose you're writing a macro that needs a comma-separated list of terms. It's a lot of work to dig through the comma nodes,
// each of which is binary. Javascript is better suited to using a single comma node with an arbitrary number of children. (This also helps with the syntax tree API -- we can use .map() and
// .each() much more effectively.) Any binary operator can be transformed this way, and that is exactly what the flatten() method does. (flatten() returns a new tree; it doesn't modify the
// original.)
// The tree flattening operation looks like this for a left-associative binary operator:
// | (+)
// / \ (+)
// (+) z -> / | \
// / \ x y z
// x y
// This flatten() method returns the nodes along the chain of associativity, always from left to right. It is shallow, since generally you only need a localized flat tree. That is, it doesn't
// descend into the nodes beyond the one specified by the flatten() call. It takes an optional parameter indicating the operator to flatten over; if the operator in the tree differs, then the
// original node is wrapped in a unary node of the specified operator. The transformation looks like this:
// | (,)
// (+) |
// / \ .flatten(',') -> (+)
// x y / \
// x y
// Because ',' is a binary operator, a ',' tree with just one operand will be serialized exactly as its lone operand would be. This means that plurality over a binary operator such as comma
// or semicolon degrades gracefully for the unary case (this sentence makes more sense in the context of macro definitions; see in particular 'let' and 'where' in std.bind).
// The unflatten() method performs the inverse transformation. It doesn't delete a converted unary operator in the tree case, but if called on a node with more than two children it will nest
// according to associativity.
flatten: function (d) {d = d || this.data; return d !== this.data ? this.as(d) : ! (has(parse_lr, d) && this.length) ? this : has(parse_associates_right, d) ?
se(new this.constructor(d), bind(function (n) {for (var i = this; i && i.data === d; i = i[1]) n.push(i[0]); n.push(i)}, this)) :
se(new this.constructor(d), bind(function (n) {for (var i = this, ns = []; i.data === d; i = i[0]) i[1] && ns.push(i[1]); ns.push(i);
for (i = ns.length - 1; i >= 0; --i) n.push(ns[i])}, this))},
unflatten: function () {var t = this, right = has(parse_associates_right, this.data); return this.length <= 2 ? this : se(new this.constructor(this.data), function (n) {
if (right) for (var i = 0, l = t.length - 1; i < l; ++i) n = n.push(t[i]).push(i < l - 2 ? t.data : t[i])[1];
else for (var i = t.length - 1; i >= 1; --i) n = n.push(i > 1 ? t.data : t[0]).push(t[i])[0]})},
// Wrapping.
// Sometimes you want your syntax tree to have a particular operator, and if it doesn't have that operator you want to wrap it in a node that does. Perhaps the most common case of this is
// when you have a possibly-plural node representing a variable or expression -- often the case when you're dealing with argument lists -- and you want to be able to assume that it's wrapped
// in a comma node. Calling node.as(',') will return the node if it's a comma, and will return a new comma node containing the original one if it isn't.
as: function (d) {return this.data === d ? this : new caterwaul_global.syntax(d).push(this)},
// Value construction.
// Syntax nodes sometimes represent hard references to values instead of just syntax. (See 'References' for more information.) In order to compile a syntax tree in the right environment you
// need a mapping of symbols to these references, which is what the bindings() method returns. (It also collects references for all descendant nodes.) It takes an optional argument to
// populate, in case you already had a hash set aside for bindings -- though it always returns the hash.
// A bug in Caterwaul 0.5 and earlier failed to bind falsy values. This is no longer the case; nodes which bind values should indicate that they do so by setting a binds_a_value attribute
// (ref nodes do this on the prototype), indicating that their value should be read from the 'value' property. (This allows other uses of a 'value' property while making it unambiguous
// whether a particular node intends to bind something.)
// Caterwaul 1.1.6 adds the ability to bind values generated by expressions which are evaluated later. This is necessary for precompilation to work for things like the standard library
// 'using' modifier.
bindings: function (hash) {var result = hash || {}; this.reach(function (n) {n.add_bindings_to(result)}); return result},
expressions: function (hash) {var result = hash || {}; this.reach(function (n) {n.add_expressions_to(result)}); return result},
add_bindings_to: function (hash) {}, // No-ops for most syntax nodes, but see caterwaul_global.ref and caterwaul_global.expression_ref below.
add_expressions_to: function (hash) {},
resolve: function () {return this}, // Identity for most nodes. This is necessary to allow opaque refs to construct expression closures.
reduce: function () {return this}, // Ditto here; this is necessary to allow opaque refs to parse themselves but preserve expression ref bindings.
// Prefix/sufix data retrieval.
// Accessors that populate the prefix and suffix data fields if necessary. This is useful when dealing with nodes that don't have any prefix or suffix data to start with (i.e. stuff that
// wasn't generated by the parser). Temporary results are stored eagerly to reduce overall GC load -- the assumption is that this method will be called more than once.
// Infix data is an odd case. It is stored for operators with two terminal components, for instance paren groups. In this case, we need a total of three pieces of data to completely
// reconstruct the group. First, we need the stuff to the left of the left-paren (prefix). Second, we need the stuff to the left of the right-paren (infix). Third, if the group is at the end
// of the file, we need the stuff to the right of the whole thing (suffix). The suffix is used only for the last syntax node.
prefix: function (d) {return this.prefixes().push(d), this}, prefixes: function () {return this.prefix_data || (this.prefix_data = [])},
infix: function (d) {return this.infixes ().push(d), this}, infixes: function () {return this.infix_data || (this.infix_data = [])},
suffix: function (d) {return this.suffixes().push(d), this}, suffixes: function () {return this.suffix_data || (this.suffix_data = [])},
// Containment.
// You can ask a tree whether it contains any nodes that satisfy a given predicate. This is done using the .contains() method and is significantly more efficient than using .collect() if your
// tree does in fact contain a matching node.
contains: function (f) {var result = f(this);
if (result) return result;
for (var i = 0, l = this.length; i < l; ++i) if (result = this[i].contains(f)) return result},
// Matching.
// Any syntax tree can act as a matching pattern to destructure another one. It's often much more fun to do things this way than it is to try to pick it apart by hand. For example, suppose
// you wanted to determine whether a node represents a function that immediately returns, and to know what it returns. The simplest way to do it is like this:
// | var tree = ...
// var match = caterwaul.parse('function (_) {return _value}').match(tree);
// if (match) {
// var value = match._value;
// ...
// }
// The second parameter 'variables' stores a running total of match data. You don't provide this; match() creates it for you on the toplevel invocation. The entire original tree is available
// as a match variable called '_'; for example: t.match(u)._ === u if u matches t.
// Caterwaul 1.2 introduces syntax node metadata using @. This is not returned in the match result; for instance:
// | var pattern = caterwaul.parse('_x@0 + foo');
// pattern.match('bar + foo') -> {_x: {'bar'}, _: {'bar + foo'}}
match: function (target, variables) {target = target.constructor === String ? caterwaul_global.parse(target) : target;
variables || (variables = {_: target});
if (this.is_wildcard() && (!this.leaf_nodes_only() || !target.length)) return variables[this.without_metadata()] = target, variables;
else if (this.length === target.length && this.data === target.data) {for (var i = 0, l = this.length; i < l; ++i)
if (! this[i].match(target[i], variables)) return null;
return variables}},
// Inspection and syntactic serialization.
// Syntax nodes can be both inspected (producing a Lisp-like structural representation) and serialized (producing valid Javascript code). In the past, stray 'r' links were serialized as block
// comments. Now they are folded into implied semicolons by the parser, so they should never appear by the time serialization happens.
toString: function (depth) {var xs = ['']; this.serialize(xs, depth || -1); return xs.join('')},
structure: function () {if (this.length) return '(' + ['"' + this.data + '"'].concat(map(function (x) {return x != null ? x.structure() : x}, this)).join(' ') + ')';
else return this.data}};
// Syntax node subclassing.
// Caterwaul 1.1.1 generalizes the variadic syntax node model to support arbitrary subclasses. This is useful when defining syntax trees for languages other than Javascript. As of Caterwaul
// 1.1.2 this method is nondestructive with respect to the constructor and other arguments.
// Caterwaul 1.2 allows you to extend all syntax classes in existence at once by invoking syntax_extend on one or more prototype extension objects. For example, you can add a new foo method to
// all syntax trees like this:
// | caterwaul.syntax_extend({foo: function () {...}});
// This also defines the 'foo' method for all syntax classes that are created in the future. It does this by adding the method definitions to syntax_common, which is implicitly merged into the
// prototype of any syntax subclass. syntax_extend returns the global Caterwaul object.
caterwaul_global.syntax_subclasses = [];
caterwaul_global.syntax_subclass = function (ctor) {var extensions = Array.prototype.slice.call(arguments, 1), proxy = function () {return ctor.apply(this, arguments)};
caterwaul_global.merge.apply(this, [proxy.prototype, syntax_common].concat(extensions));
caterwaul_global.syntax_subclasses.push(proxy);
proxy.prototype.constructor = proxy;
return proxy};
caterwaul_global.syntax_extend = function () {for (var i = 0, l = caterwaul_global.syntax_subclasses.length, es = Array.prototype.slice.call(arguments); i < l; ++i)
caterwaul_global.merge.apply(this, [caterwaul_global.syntax_subclasses[i].prototype].concat(es));
caterwaul_global.merge.apply(this, [syntax_common].concat(es));
return caterwaul_global};
// Type detection and retrieval.
// These methods are used to detect the literal type of a node and to extract that value if it exists. You should use the as_x methods only once you know that the node does represent an x;
// otherwise you will get misleading results. (For example, calling as_boolean on a non-boolean will always return false.)
// Other methods are provided to tell you higher-level things about what this node does. For example, is_contextualized_invocation() tells you whether the node represents a call that can't be
// eta-reduced (if it were, then the 'this' binding would be lost).
// Wildcards are used for pattern matching and are identified by beginning with an underscore. This is a very frequently-called method, so I'm using a very inexpensive numeric check rather
// than a string comparison. The ASCII value for underscore is 95.
var parse_hex = caterwaul_global.parse_hex = function (digits) {for (var result = 0, i = 0, l = digits.length, d; i < l; ++i)
result *= 16, result += (d = digits.charCodeAt(i)) <= 58 ? d - 48 : (d & 0x5f) - 55;
return result},
parse_octal = caterwaul_global.parse_octal = function (digits) {for (var result = 0, i = 0, l = digits.length; i < l; ++i) result *= 8, result += digits.charCodeAt(i) - 48;
return result},
unescape_string = caterwaul_global.unescape_string = function (s) {for (var i = 0, c, l = s.length, result = [], is_escaped = false; i < l; ++i)
if (is_escaped) is_escaped = false,
result.push((c = s.charAt(i)) === '\\' ? '\\' :
c === 'n' ? '\n' : c === 'r' ? '\r' : c === 'b' ? '\b' : c === 'f' ? '\f' :
c === '0' ? '\u0000' : c === 't' ? '\t' : c === 'v' ? '\v' :
c === '"' || c === '\'' ? c :
c === 'x' ? String.fromCharCode(parse_hex(s.substring(i, ++i + 1))) :
c === 'u' ? String.fromCharCode(parse_hex(s.substring(i, (i += 3) + 1))) :
String.fromCharCode(parse_octal(s.substring(i, (i += 2) + 1))));
else if ((c = s.charAt(i)) === '\\') is_escaped = true;
else result.push(c);
return result.join('')};
caterwaul_global.javascript_tree_type_methods = {
is_string: function () {return /['"`]/.test(this.data.charAt(0))}, as_escaped_string: function () {return this.data.substr(1, this.data.length - 2)},
is_number: function () {return /^-?(0x|\d|\.\d+)/.test(this.data)}, as_number: function () {return Number(this.data)},
is_boolean: function () {return this.data === 'true' || this.data === 'false'}, as_boolean: function () {return this.data === 'true'},
is_regexp: function () {return /^\/./.test(this.data)}, as_escaped_regexp: function () {return this.data.substring(1, this.data.lastIndexOf('/'))},
is_array: function () {return this.data === '['}, as_unescaped_string: function () {return unescape_string(this.as_escaped_string())},
could_be_identifier: function () {return /^[A-Za-z_$@][A-Za-z0-9$_@]*$/.test(this.data)},
is_identifier: function () {return this.length === 0 && this.could_be_identifier() && ! this.is_boolean() && ! this.is_null_or_undefined() && ! has(lex_op, this.data)},
has_grouped_block: function () {return has(parse_r_until_block, this.data)}, is_block: function () {return has(parse_block, this.data)},
is_blockless_keyword: function () {return has(parse_r_optional, this.data)}, is_null_or_undefined: function () {return this.data === 'null' || this.data === 'undefined'},
is_constant: function () {return this.is_number() || this.is_string() || this.is_boolean() || this.is_regexp() || this.is_null_or_undefined()},
left_is_lvalue: function () {return /=$/.test(this.data) || /\+\+$/.test(this.data) || /--$/.test(this.data)},
is_empty: function () {return !this.length}, is_dereference: function () {return this.data === '.' || this.data === '[]'},
is_invocation: function () {return this.data === '()'}, is_contextualized_invocation: function () {return this.is_invocation() && this[0].is_dereference()},
has_parameter_list: function () {return this.data === 'function' || this.data === 'function*' || this.data === 'catch'},
has_lvalue_list: function () {return this.data === 'var' || this.data === 'const' || this.data === 'let'},
is_invisible: function () {return has(parse_invisible, this.data)}, is_binary_operator: function () {return has(parse_lr, this.data)},
is_prefix_unary_operator: function () {return has(parse_r, this.data)}, is_postfix_unary_operator: function () {return has(parse_l, this.data)},
is_unary_operator: function () {return this.is_prefix_unary_operator() || this.is_postfix_unary_operator()},
precedence: function () {return parse_inverse_order[this.data]}, is_right_associative: function () {return has(parse_associates_right, this.data)},
is_associative: function () {return /^[,;]$/.test(this.data)}, is_group: function () {return /^[(\[{][)\]]?$/.test(this.data)},
accepts: function (e) {return has(parse_accepts, this.data) && parse_accepts[this.data] === (e.data || e)}};
// Tree metadata.
// When you're writing macros, you often want a concise way to indicate the role of a given tree node. Caterwaul's lexer parses a large superset of Javascript proper, which gives you room to
// indicate things like this by inserting special characters into identifiers. The rules are:
// | 1. Nodes beginning with an underscore are wildcards.
// 2. Nodes beginning with @ are gensym-erased; they are guaranteed to match no other symbol. (This is also true of the character @ alone, used as an identifier.)
// 3. Nodes can use @ later on to indicate the presence of match constraints. For example, you can indicate that a wildcard matches only leaf nodes by adding @0 to the end.
caterwaul_global.javascript_tree_metadata_methods = {
could_have_metadata: function () {return this.could_be_identifier()}, without_metadata: function () {return this.data.replace(/@.*$/g, '')},
is_wildcard: function () {return this.data.charCodeAt(0) === 95}, leaf_nodes_only: function () {return /@0/.test(this.data)},
is_opaque: function () {return this.data.charCodeAt(0) === 64}};
// Javascript-specific serialization.
// These methods are specific to the Javascript language. Other languages will have different serialization logic.
caterwaul_global.javascript_tree_serialization_methods = {
// Block detection.
// Block detection is required for multi-level if/else statements. Consider this code:
// | if (foo) for (...) {}
// else bif;
// A naive approach (the one I was using before version 0.6) would miss the fact that the 'for' was trailed by a block, and insert a spurious semicolon, which would break compilation:
// | if (foo) for (...) {}; // <- note!
// else bif;
// What we do instead is dig through the tree and find out whether the last thing in the 'if' case ends with a block. If so, then no semicolon is inserted; otherwise we insert one. This
// algorithm makes serialization technically O(n^2), but nobody nests if/else blocks to such an extent that it would matter.
ends_with_block: function () {var block = this[this.length - 1];
if (block && block.data === parse_accepts[this.data]) block = block[0];
return this.data === '{' || has(parse_r_until_block, this.data) && (this.data !== 'function' && this.data !== 'function*' || this.length === 3)
&& block && block.ends_with_block()},
// There's a hack here for single-statement if-else statements. (See 'Grab-until-block behavior' in the parsing code below.) Basically, for various reasons the syntax tree won't munch the
// semicolon and connect it to the expression, so we insert one automatically whenever the second node in an if, else, while, etc. isn't a block.
// Update for Caterwaul 0.6.6: I had removed mandatory spacing for unary prefix operators, but now it's back. The reason is to help out the host Javascript lexer, which can misinterpret
// postfix increment/decrement: x + +y will be serialized as x++y, which is invalid Javascript. The fix is to introduce a space in front of the second plus: x+ +y, which is unambiguous.
// Update for caterwaul 1.0: The serialize() method is now aggressively optimized for common cases. It also uses a flattened array-based concatenation strategy rather than the deeply nested
// approach from before.
// Caterwaul 1.2.1 introduces syntax guarding, the introduction of parentheses where necessary to enforce precedence/associativity that is encoded in the tree but wouldn't be represented in
// serialization. For example, the tree (* (+ foo bar) bif) would be rendered as foo + bar * bif, resulting in Javascript reinterpreting the operator precedence. After guarding, it would be
// rendered as (foo + bar) * bif.
// Internally, guarding is done by providing subtrees with a threshold precedence; if a node has a higher precedence index than its parent, it is parenthesized. Associativity matters as well.
// For instance, the tree (+ foo (+ bar bif)) also requires grouping even though both operators are the same precedence, whereas (= foo (= bar bif)) does not. This is done by checking whether
// the child's index is positive; positive indices must be in a right-associative position, so they are handed a precedence index one smaller than the parent's actual precedence. (We
// basically want to push the child to parenthesize if it's the same precedence, since it's associating the wrong way.)
// Groups are unambiguous despite having high precedence. To prevent double-grouping in cases like this, a precedence of 'undefined' is passed into children of groups or invocations. This
// simulates a toplevel invocation, which is implicitly unparenthesized.
never_guarded: function () {return this.is_group() || this.precedence() > parse_inverse_order[',']},
guarded: function (p) {var this_p = this.never_guarded() ? undefined : this.precedence(), associative = this.is_associative(), right = this.is_right_associative(),
result = this.map(function (x, i) {return x.guarded(this_p - (!associative && !right && !!i))});
return this_p > p ? result.as('(') : result},
// Optimized serialization cases.
// We can tell a lot about how to serialize a node based on just a few properties. For example, if the node has zero length then its serialization is simply its data. This is the leaf case,
// which is likely to be half of the total number of nodes in the whole syntax tree. If a node has length 1, then we assume a prefix operator unless we identify it as postfix. Otherwise we
// break it down by the kind of operator that it is.
// Nodes might be flattened, so we can't assume any upper bound on the arity regardless of what kind of operator it is. Realistically you shouldn't hand flattened nodes over to the compile()
// function, but it isn't the end of the world if you do.
// Caterwaul 1.3 automatically parenthesizes low-precedence operators in the middle of a ternary node. This prevents the syntax errors that pop up if you say things like 'foo ? bar, bif :
// baz'. Even though this construct is unambiguous, most Javascript runtimes fail to accept it.
// Caterwaul 1.3.1 fixes a Unicode serialization bug. The lex table only covers char-codes 0-255, but doesn't cover anything beyond that, so Caterwaul 1.3 interpreted higher codes as
// non-identifiers. This isn't correct, however; anytime we're dealing with a Unicode character, we need to insert a space just to be safe. The trick is to subtract the two boolean values.
// true - true, false - false, and anything involving undefined will all evaluate to falsy values, and true - false and false - true are truthy. So we ask the question, "are these two things
// different enough that we can omit the space?"
serialize: function (xs, depth) {var ep = function (x) {e(!p && (!x || lex_ident[xs[xs.length - 1].charCodeAt(0)] - lex_ident[x.charCodeAt(0)]) ? '' : ' '), x && e(x)},
e = function (x) {x && xs.push(x)}, p = this.prefix_data && this.prefix_data.join(''),
l = this.length, d = this.data, d1 = depth - 1, i = this.infix_data && this.infix_data.join(''),
s = this.suffix_data && this.suffix_data.join('');
if (depth === 0 && (l || d.length > 32)) return e('...');
switch (l) {case 0: if (has(parse_r_optional, d)) return ep(d.replace(/^u/, '')), e(s);
else if (has(parse_group, d)) return ep(d), e(i), e(parse_group[d]), e(s);
else return ep(d), e(s);
case 1: if (has(parse_r, d) || has(parse_r_optional, d)) return ep(d.replace(/^u/, '')), this[0].serialize(xs, d1), e(s);
else if (has(parse_misleading_postfix, d)) return this[0].serialize(xs, d1), ep(d), e(s);
else if (has(parse_group, d)) return ep(d), this[0].serialize(xs, d1), e(i), e(parse_group[d]), e(s);
else if (has(parse_lr, d)) return ep(), this[0].serialize(xs, d1), e(s);
else return this[0].serialize(xs, d1), ep(d), e(s);
case 2: if (has(parse_invocation, d)) return this[0].serialize(xs, d1), ep(d.charAt(0)), this[1].serialize(xs, d1), e(i), e(d.charAt(1)), e(s);
else if (has(parse_r_until_block, d)) return ep(d), this[0].serialize(xs, d1), this[1].serialize(xs, d1), e(s);
else if (has(parse_invisible, d)) return this[0].serialize(xs, d1), this[1].serialize(xs, d1), e(s);
else return this[0].serialize(xs, d1), ep(d), this[1].serialize(xs, d1), e(s);
default: if (has(parse_ternary, d)) return this[0].serialize(xs, d1), ep(d), this[1].precedence() > this.precedence()
? (this[1].as('(').serialize(xs, d1), e(i), e(':'), this[2].serialize(xs, d1), e(s))
: (this[1]. serialize(xs, d1), e(i), e(':'), this[2].serialize(xs, d1), e(s));
else if (has(parse_r_until_block, d)) return this.accepts(this[2]) && ! this[1].ends_with_block()
? (ep(d), this[0].serialize(xs, d1), this[1].serialize(xs, d1), e(';'), this[2].serialize(xs, d1), e(s))
: (ep(d), this[0].serialize(xs, d1), this[1].serialize(xs, d1), this[2].serialize(xs, d1), e(s));
else return ep(), this.unflatten().serialize(xs, d1), e(s)}}};
// References.
// You can drop references into code that you're compiling. This is basically variable closure, but a bit more fun. For example:
// | caterwaul.compile(qs[function () {return _ + 1}].replace({_: new caterwaul.ref(3)}))() // -> 4
// What actually happens is that caterwaul.compile runs through the code replacing refs with gensyms, and the function is evaluated in a scope where those gensyms are bound to the values they
// represent. This gives you the ability to use a ref even as an lvalue, since it's really just a variable. References are always leaves on the syntax tree, so the prototype has a length of 0.
// Caterwaul 1.0 adds named gensyms, and one of the things you can do is name your refs accordingly. If you don't name one it will just be called 'ref', but you can make it more descriptive by
// passing in a second constructor argument. This name will automatically be wrapped in a gensym, but that gensym will be removed at compile-time unless you specify not to rename gensyms.
caterwaul_global.ref_common = caterwaul_global.merge({}, caterwaul_global.javascript_tree_type_methods,
caterwaul_global.javascript_tree_metadata_methods,
caterwaul_global.javascript_tree_serialization_methods,
// Reference replace() support.
// Refs aren't normal nodes; in particular, invoking the constructor as we do in replace() will lose the ref's value and cause all kinds of problems. In order to avoid this we override the
// replace() method for syntax refs to behave more sensibly. Note that you can't replace a ref with a syntax
{replace: function (replacements) {var r; return own.call(replacements, this.data) && (r = replacements[this.data]) ?
r.constructor === String ? se(new this.constructor(this.value), function () {this.data = r}) :
r : this},
length: 0});
caterwaul_global.ref = caterwaul_global.syntax_subclass(
function (value, name) {if (value instanceof this.constructor) this.value = value.value, this.data = value.data;
else this.value = value, this.data = gensym(name && name.constructor === String ? name : 'ref')},
caterwaul_global.ref_common, {add_bindings_to: function (hash) {hash[this.data] = this.value}});
// Expression references.
// These are a step in between references and regular syntax nodes. The idea is that you want to bind a value, but you have an expression that can be executed later to generate it. This gives
// Caterwaul more options than it would have if you used a regular reference node. In particular, it enables Caterwaul to precompile the source containing the expression ref, since the
// expression can be evaluated later. For example:
// | caterwaul.compile(qs[x + 1].replace({x: new caterwaul.expression_ref('50 * 2')})) // -> 101
// This ends up evaluating code that looks like this:
// | (function (ref_gensym) {
// return ref_gensym + 1;
// }).call(this, 50 * 2)
caterwaul_global.expression_ref = caterwaul_global.syntax_subclass(
function (e, name) {if (e instanceof this.constructor) this.e = e.e, this.data = e.data;
else this.e = e, this.data = gensym(name && name.constructor === String ? name : 'e')},
caterwaul_global.ref_common, {add_expressions_to: function (hash) {hash[this.data] = this.e}});
// Metadata.
// Caterwaul 1.3 allows you to add metadata to the syntax tree. The assumption is that it will be removed before you compile the tree; as such, it is represented as an identifier beginning with
// an @ sign; this will trigger a compilation error if you leave it there. The purpose of metadata is to hold extra information that you don't want to attach to a specific node.
caterwaul_global.metadata_node = caterwaul_global.syntax_subclass(
function (d, name) {if (d instanceof this.constructor) this.metadata = d.metadata, this.data = d.data;
else this.metadata = d, this.data = '@' + (name || '')},
caterwaul_global.ref_common);
// Opaque (unparsed) code references.
// This gives Caterwaul a way to assemble code in a more performant manner. In particular, it lets Caterwaul omit the expensive (and unnecessary) parse() operation during a replicator() call.
// The idea here is that this node contains subtrees, but they are unparsed; as such, it appears to have no children and simply returns the code as a string as its data. You can call the node's
// parse() method to return a parsed tree of its contents.
// Caterwaul 1.2b7 adds the ability to preserve expression refs bound in an opaque object. This is a necessary step to solve the precompiled module problem described in 'Expression refs,
// modules, and precompilation' below.
// If you create an opaque tree without specifying a table of expression refs, Caterwaul checks the object you're passing in for a table under the attribute 'caterwaul_expression_ref_table'.
// This is Caterwaul's standard way of encoding reconstructible expression references. It should be a table of this form:
// | {ref_name_1: string, ref_name_2: string, ...}
// Each string represents the expression that is used to construct the expression that ends up being bound. So, for example:
// | > f = caterwaul.compile(caterwaul.parse('function () {return _foo}').replace({_foo: new caterwaul.expression_ref(caterwaul.parse('3 + 4'))}))
// > f.caterwaul_expression_ref_table
// { e_a_gensym: '3+4' }
// If you ask an opaque node for its expression bindings, it will return more opaque nodes of those strings. This way you can reuse the bindings from the compile() function, but it won't incur
// any parsing overhead.
caterwaul_global.opaque_tree = caterwaul_global.syntax_subclass(
function (code, expression_refs) {if (code instanceof this.constructor) this.data = code.data, this.expression_refs = code.expression_refs;
else this.data = code.toString(),
this.expression_refs = expression_refs || code.caterwaul_expression_ref_table;
var rs = this.expression_refs;
for (var k in rs) own.call(rs, k) && rs[k].constructor === String && (rs[k] = new caterwaul_global.opaque_tree(rs[k]))},
{resolve: function () {return this.expression_refs ? caterwaul_global.late_bound_tree(new this.constructor(this.data), this.expression_refs) : this},
reduce: function () {return this.expression_refs ? caterwaul_global.late_bound_tree(this.parse(), this.expression_refs) : this.parse()},
guarded: function () {return this},
serialize: function (xs) {return xs.push(this.data), xs},
parse: function () {return caterwaul_global.parse(this.data)}});
// Syntax node constructor.
// Here's where we combine all of the pieces above into a single function with a large prototype. Note that the 'data' property is converted from a variety of types; so far we support strings,
// numbers, and booleans. Any of these can be added as children. Also, I'm using an instanceof check rather than (.constructor ===) to allow array subclasses such as Caterwaul finite sequences
// to be used.
// Caterwaul 1.2 adds the static caterwaul.syntax.from_string() constructor to simplify string-based syntax node construction.
caterwaul_global.syntax = se(caterwaul_global.syntax_subclass(
function (data) {if (data instanceof this.constructor) this.data = data.data, this.length = 0, this.prefix_data = data.prefix_data,
this.infix_data = data.infix_data,
this.suffix_data = data.suffix_data;
else {this.data = data && data.toString(); this.length = 0;
for (var i = 1, l = arguments.length, _; _ = arguments[i], i < l; ++i)
for (var j = 0, lj = _.length, it, c; _ instanceof Array ? (it = _[j], j < lj) : (it = _, ! j); ++j)
this._append(caterwaul_global.syntax.promote(it))}},
caterwaul_global.javascript_tree_type_methods,
caterwaul_global.javascript_tree_metadata_methods,
caterwaul_global.javascript_tree_serialization_methods),
function () {this.around = function (start, end, d) {var s = new caterwaul_global.syntax(d); s.start = start; s.end = end; return s};
this.from_string = function (s) {return new caterwaul_global.syntax('"' + s.replace(/\\/g, '\\\\').replace(/"/g, '\\"').
replace(/\n/g, '\\n') + '"')};
this.from_array = function (xs) {for (var i = 0, c = new caterwaul_global.syntax(','), l = xs.length; i < l; ++i) c.push(xs[i]);
return new caterwaul_global.syntax('[', c.length ? c.unflatten() : [])};
this.from_object = function (o) {var comma = new caterwaul_global.syntax(',');
for (var k in o) if (own.call(o, k)) comma.push(new caterwaul_global.syntax(
':', /^[$_A-Za-z][A-Za-z0-9$_]*$/.test(k) ? k : caterwaul_global.syntax.from_string(k), o[k].as('(')));
return new caterwaul_global.syntax('{', comma.length ? comma.unflatten() : [])}});
caterwaul_global.syntax.promote = function (v) {var c = v.constructor; return c === String || c === Number || c === Boolean ? new caterwaul_global.syntax(v) : v};
var empty = caterwaul_global.empty = new caterwaul_global.syntax('');
// Parsing.
// There are two distinct parts to parsing Javascript. One is parsing the irregular statement-mode expressions such as 'if (condition) {...}' and 'function f(x) {...}'; the other is parsing
// expression-mode stuff like arithmetic operators. In Rebase I tried to model everything as an expression, but that failed sometimes because it required that each operator have fixed arity. In
// particular this was infeasible for keywords such as 'break', 'continue', 'return', and some others (any of these can be nullary or unary). It also involved creating a bizarre hack for 'case
// x:' inside a switch block. This hack made the expression passed in to 'case' unavailable, as it would be buried in a ':' node.
// Caterwaul fixes these problems by using a proper context-free grammar. However, it's much looser than most grammars because it doesn't need to validate anything. Correspondingly, it can be
// much faster as well. Instead of guessing and backtracking as a recursive-descent parser would, it classifies many different branches into the same basic structure and fills in the blanks. One
// example of this is the () {} pair, which occurs in a bunch of different constructs, including function () {}, if () {}, for () {}, etc. In fact, any time a () group is followed by a {} group
// we can grab the token that precedes () (along with perhaps one more in the case of function f () {}), and group that under whichever keyword is responsible.
// Syntax folding.
// The first thing to happen is that parenthetical, square bracket, and braced groups are folded up. This happens in a single pass that is linear in the number of tokens, and other foldable
// tokens (including unary and binary operators) are indexed by associativity. The following pass runs through these indexes from high to low precedence and folds tokens into trees. By this
// point all of the parentheticals have been replaced by proper nodes (here I include ?: groups in parentheticals, since they behave the same way). Finally, high-level rules are applied to the
// remaining keywords, which are bound last. This forms a complete parse tree.
// Doing all of this efficiently requires a linked list rather than an array. This gets built during the initial paren grouping stage. Arrays are used for the indexes, which are left-to-right
// and are later processed in the order indicated by the operator associativity. That is, left-associative operators are processed 0 .. n and right associative are processed n .. 0. Keywords
// are categorized by behavior and folded after all of the other operators. Semicolons are folded last, from left to right.
// There are some corner cases due to Javascript's questionable heritage from C-style syntax. For example, most constructs take either syntax blocks or semicolon-delimited statements. Ideally,
// else, while, and catch are associated with their containing if, do, and try blocks, respectively. This can be done easily, as the syntax is folded right-to-left. Another corner case would
// come up if there were any binary operators with equal precedence and different associativity. Javascript doesn't have them however, and it wouldn't make much sense to; it would render
// expressions such as 'a op1 b op2 c' ambiguous if op1 and op2 shared precedence but each wanted to bind first. (I mention this because at first I was worried about it, but now I realize it
// isn't an issue.)
// Notationally (for easier processing later on), a distinction is made between invocation and grouping, and between dereferencing and array literals. Dereferencing and function invocation are
// placed into their own operators, where the left-hand side is the thing being invoked or dereferenced and the right-hand side is the paren-group or bracket-group that is responsible for the
// operation. Also, commas inside these groups are flattened into a single variadic (possibly nullary) comma node so that you don't have to worry about the tree structure. This is the case for
// all left-associative operators; right-associative operators preserve their hierarchical folding.
// Parse/lex shared logic.
// Lexing Javascript is not entirely straightforward, primarily because of regular expression literals. The first implementation of the lexer got things right 99% of the time by inferring the
// role of a / by its preceding token. The problem comes in when you have a case like this:
// | if (condition) /foo/.test(x)
// In this case, (condition) will be incorrectly inferred to be a regular expression (since the close-paren terminates an expression, usually), and /foo/ will be interpreted as division by foo.
// We mark the position before a token and then just increment the position. The token, then, can be retrieved by taking a substring from the mark to the position. This eliminates the need for
// intermediate concatenations. In a couple of cases I've gone ahead and done them anyway -- these are for operators, where we grab the longest contiguous substring that is defined. I'm not too
// worried about the O(n^2) complexity due to concatenation; they're bounded by four characters.
// OK, so why use charAt() instead of regular expressions? It's a matter of asymptotic performance. V8 implements great regular expressions (O(1) in the match length for the (.*)$ pattern), but
// the substring() method is O(n) in the number of characters returned. Firefox implements O(1) substring() but O(n) regular expression matching. Since there are O(n) tokens per document of n
// characters, any O(n) step makes lexing quadratic. So I have to use the only reliably constant-time method provided by strings, charAt() (or in this case, charCodeAt()).
// Of course, building strings via concatenation is also O(n^2), so I also avoid that for any strings that could be long. This is achieved by using a mark to indicate where the substring
// begins, and advancing i independently. The span between mark and i is the substring that will be selected, and since each substring both requires O(n) time and consumes n characters, the
// lexer as a whole is O(n). (Though perhaps with a large constant.)
// Parse function.
// As mentioned earlier, the parser and lexer aren't distinct. The lexer does most of the heavy lifting; it matches parens and brackets, arranges tokens into a hierarchical linked list, and
// provides an index of those tokens by their fold order. It does all of this by streaming tokens into a micro-parser whose language is grouping and that knows about the oddities required to
// handle regular expression cases. In the same function, though as a distinct case, the operators are folded and the syntax is compiled into a coherent tree form.
// The input to the parse function can be anything whose toString() produces valid Javascript code.
caterwaul_global.parse = function (input) {
// Caterwaul 1.3 revision: parse() is closed under null-ness.
if (input == null) return input;
// Caterwaul 1.1 revision: Allow the parse() function to be used as a 'make sure this thing is a syntax node' function.
if (input.constructor === caterwaul_global.syntax) return input;
// Lex variables.
// s, obviously, is the string being lexed. mark indicates the position of the stream, while i is used for lookahead. The difference is later read into a token and pushed onto the result. c
// is a temporary value used to store the current character code. re is true iff a slash would begin a regular expression. esc is a flag indicating whether the next character in a string or
// regular expression literal is escaped. exp indicates whether we've seen the exponent marker in a number. close is used for parsing single and double quoted strings; it contains the
// character code of the closing quotation mark. t is the token to be processed.
// Parse variables.
// grouping_stack and gs_top are used for paren/brace/etc. matching. head and parent mark two locations in the linked syntax tree; when a new group is created, parent points to the opener
// (i.e. (, [, ?, or {), while head points to the most recently added child. (Hence the somewhat complex logic in push().) indexes[] determines reduction order, and contains references to the
// nodes in the order in which they should be folded. invocation_nodes is an index of the nodes that will later need to be flattened.
// The push() function manages the mechanics of adding a node to the initial linked structure. There are a few cases here; one is when we've just created a paren group and have no 'head'
// node; in this case we append the node as 'head'. Another case is when 'head' exists; in that case we update head to be the new node, which gets added as a sibling of the old head.
var s = input.toString().replace(/^\s*|\s*$/g, ''), mark = 0, c = 0, re = true, esc = false, dot = false, exp = false, close = 0, t = '', i = 0, l = s.length,
cs = function (i) {return s.charCodeAt(i)}, grouping_stack = [], gs_top = null, head = null, parent = null, indexes = map(function () {return []}, parse_reduce_order),
invocation_nodes = [], all_nodes = [empty], new_node = function (n) {return all_nodes.push(n), n},
push = function (n) {return head ? head._sibling(head = n) : (head = n._append_to(parent)), new_node(n)}, syntax_node = this.syntax, groups = [], ternaries = [],
prefix = [], shift_prefix = function () {var k = prefix; prefix = []; return k}, prefixed_node = function (n) {n.prefix_data = shift_prefix(); return new_node(n)};
// Trivial case.
// The empty string will break the lexer because we won't generate a token (since we're already at the end). To prevent this we return an empty syntax node immediately, since this is an
// accurate representation of no input.
if (l === 0) return empty;
// Main lex loop.
// This loop takes care of reading all of the tokens in the input stream. At the end, we'll have a linked node structure with paren groups. At the beginning, we set the mark to the current
// position (we'll be incrementing i as we read characters), munch whitespace, and reset flags.
while ((mark = i) < l) {
esc = exp = dot = t = 0;
// Miscellaneous lexing.
// This includes bracket resetting (the top case, where an open-bracket of any sort triggers regexp mode) and comment removal. Both line and block comments are removed by comparing against
// lex_slash, which represents /, and lex_star, which represents *.
// Caterwaul 1.1.6 adds recognition of # comments, which are treated just like other line comments. This is relevant in practice because node.js supports shebang-line invocation of
// Javascript files.
if (lex_space[c = cs(i)]) while (++i < l && lex_space[cs(i)]);
else if (lex_bracket[c] && c !== lex_qmark) ++i, t = 1, re = lex_opener[c];
else if (c === lex_slash && cs(i + 1) === lex_star && (i += 2)) while (++i <= l && (cs(i - 1) !== lex_slash || cs(i - 2) !== lex_star));
else if (c === lex_slash && cs(i + 1) === lex_slash) while (++i <= l && ! lex_eol[cs(i - 1)]);
else if (c === lex_hash) while (++i <= l && ! lex_eol[cs(i - 1)]);
// Backtick-string lexing.
// Simple enough: just parse until we see another backtick, skipping any backslashed backticks in the meantime. We don't parse or split interpolated expressions (sorry).
else if (c === lex_backtick) {while (++i < l && (c = cs(i)) !== lex_backtick || esc) esc = ! esc && c === lex_back; ++i; re = !(t = 1)}
// Regexp and string literal lexing.
// These both take more or less the same form. The idea is that we have an opening delimiter, which can be ", ', or /; and we look for a closing delimiter that follows. It is syntactically
// illegal for a string to occur anywhere that a slash would indicate division (and it is also illegal to follow a string literal with extra characters), so reusing the regular expression
// logic for strings is not a problem. (This follows because we know ahead of time that the Javascript is valid.)
else if (lex_quote[c] && (close = c) && re && ! (re = ! (t = s.charAt(i)))) {while (++i < l && (c = cs(i)) !== close || esc) esc = ! esc && c === lex_back;
while (++i < l && lex_regexp_suffix[cs(i)]) ; t = 1}
// Numeric literal lexing.
// This is far more complex than the above cases. Numbers have several different formats, each of which requires some custom logic. The reason we need to parse numbers so exactly is that it
// influences how the rest of the stream is lexed. One example is '0.5.toString()', which is perfectly valid Javascript. What must be output here, though, is '0.5', '.', 'toString', '(',
// ')'; so we have to keep track of the fact that we've seen one dot and stop lexing the number on the second.
// Another case is exponent-notation: 3.0e10. The hard part here is that it's legal to put a + or - on the exponent, which normally terminates a number. Luckily we can safely skip over any
// character that comes directly after an E or e (so long as we're really in exponent mode, which I'll get to momentarily), since there must be at least one digit after an exponent.
// The final case, which restricts the logic somewhat, is hexadecimal numbers. These also contain the characters 'e' and 'E', but we cannot safely skip over the following character, and any
// decimal point terminates the number (since '0x5.toString()' is also valid Javascript). The same follows for octal numbers; the leading zero indicates that there will be no decimal point,
// which changes the lex mode (for example, '0644.toString()' is valid).
// So, all this said, there are different logic branches here. One handles guaranteed integer cases such as hex/octal, and the other handles regular numbers. The first branch is triggered
// whenever a number starts with zero and is followed by 'x' or a digit (for conciseness I call 'x' a digit), and the second case is triggered when '.' is followed by a digit, or when a
// digit starts.
// A trivial change, using regular expressions, would reduce this logic significantly. I chose to write it out longhand because (1) it's more fun that way, and (2) the regular expression
// approach has theoretically quadratic time in the length of the numbers, whereas this approach keeps things linear. Whether or not that actually makes a difference I have no idea.
// Finally, in response to a recently discovered failure case, a period must be followed by a digit if it starts a number. The failure is the string '.end', which will be lexed as '.en',
// 'd' if it is assumed to be a floating-point number. (In fact, any method or property beginning with 'e' will cause this problem.)
else if (c === lex_zero && lex_integer[cs(i + 1)]) {while (++i < l && lex_integer[cs(i)]); re = ! (t = 1); i += cs(i) === lex_n}
else if (lex_float[c] && (c !== lex_dot || lex_decimal[cs(i + 1)])) {while (++i < l && (lex_decimal[c = cs(i)] || dot ^ (dot |= c === lex_dot) || exp ^ (exp |= lex_exp[c] && ++i)));
while (i < l && lex_decimal[cs(i)]) ++i; re = ! (t = 1); i += cs(i) === lex_n}
// Operator lexing.
// The 're' flag is reused here. Some operators have both unary and binary modes, and as a heuristic (which happens to be accurate) we can assume that anytime we expect a regular
// expression, a unary operator is intended. The only exception are ++ and --, which are always unary but sometimes are prefix and other times are postfix. If re is true, then the prefix
// form is intended; otherwise, it is postfix. For this reason I've listed both '++' and 'u++' (same for --) in the operator tables; the lexer is actually doing more than its job here by
// identifying the variants of these operators.
// The only exception to the regular logic happens if the operator is postfix-unary. (e.g. ++, --.) If so, then the re flag must remain false, since expressions like 'x++ / 4' can be valid.
// We have a special branch to parse the "..." prefix-unary splat operator. It's difficult to deal with otherwise since ".." isn't a valid operator (so you can't build up to it).
else if (re && c === lex_dot && s.substr(i, 3) === '...') i += 3, t = 'u...';
else if (lex_punct[c] && (t = re ? 'u' : '', re = true)) {while (i < l && lex_punct[cs(i)] && has(lex_op, t + s.charAt(i))) t += s.charAt(i++); re = ! has(lex_postfix_unary, t)}
else if (c === lex_qmark) ++i, t = re = 1;
// Identifier lexing.
// If nothing else matches, then the token is lexed as a regular identifier or Javascript keyword. The 're' flag is set depending on whether the keyword expects a value. The nuance here is
// that you could write 'x / 5', and it is obvious that the / means division. But if you wrote 'return / 5', the / would be a regexp delimiter because return is an operator, not a value. So
// at the very end, in addition to assigning t, we also set the re flag if the word turns out to be an operator.
// Extended ASCII and above are considered identifiers. This allows Caterwaul to parse Unicode source, even though it will fail to distinguish between Unicode operator symbols and Unicode
// letters.
else {while (++i < l && (lex_ident[c = cs(i)] || c > 0x7f || has(lex_starred, s.substring(mark, i + 1)))); re = has(lex_op, t = s.substring(mark, i))}
// Token unification.
// t will contain true, false, or a string. If false, no token was lexed; this happens when we read a comment, for example. If true, the substring method should be used. (It's a shorthand to
// avoid duplicated logic.) For reasons that are not entirely intuitive, the lexer sometimes produces the artifact 'u;'. This is never useful, so I have a case dedicated to removing it.
if (i === mark) throw new Error('Caterwaul lex error at "' + s.substr(mark, 80) + '" with leading context "' + s.substr(mark - 80, 80) + '" (probably a Caterwaul bug)');
if (t === 0) {prefix.push(s.substring(mark, i)); continue}
t = t === 1 ? s.substring(mark, i) : t === 'u;' ? ';' : t;
// Grouping and operator indexing.
// Now that we have a token, we need to see whether it affects grouping status. There are a couple of possibilities. If it's an opener, then we create a new group; if it's a matching closer
// then we close the current group and pop out one layer. (We don't check for matching here. Any code provided to Caterwaul will already have been parsed by the host Javascript interpreter,
// so we know that it is valid.)
// All operator indexing is done uniformly, left-to-right. Note that the indexing isn't strictly by operator. It's by reduction order, which is arguably more important. That's what the
// parse_inverse_order table does: it maps operator names to parse_reduce_order subscripts. (e.g. 'new' -> 2.)
// Caterwaul 1.3 inserts empty sentinels into all brackets with no contents. So, for example, the empty array [] would contain a single child, caterwaul.empty. This makes it much easier to
// write destructuring binds against syntax trees.
// Caterwaul 1.3.1 stores infix data on group nodes such as (), [], {}, and ?:. The result is that it should be possible to reconstruct the original input string.
t === gs_top ? (grouping_stack.pop(), gs_top = grouping_stack[grouping_stack.length - 1], (head || parent).infix_data = shift_prefix(), head = head ? head.p : parent, parent = null)
: (has(parse_group, t) ? (grouping_stack.push(gs_top = parse_group[t]), parent = push(prefixed_node(syntax_node.around(mark, i, t))), groups.push(parent), head = null)
: push(prefixed_node(syntax_node.around(mark, i, t))),
has(parse_inverse_order, t) && indexes[parse_inverse_order[t]].push(head || parent)); // <- This is where the indexing happens
// Regexp flag special cases.
// Normally a () group wraps an expression, so a following / would indicate division. The only exception to this is when we have a block construct; in this case, the next token appears in
// statement-mode, which means that it begins, not modifies, a value. We'll know that we have such a case if (1) the immediately-preceding token is a close-paren, and (2) a block-accepting
// syntactic form occurs to its left.