forked from Perl/perl5
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpeep.c
4194 lines (3675 loc) · 153 KB
/
peep.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* peep.c
*
* Copyright (C) 1991-2022 by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
*
*/
/*
* Aragorn sped on up the hill. Every now and again he bent to the ground.
* Hobbits go light, and their footprints are not easy even for a Ranger to
* read, but not far from the top a spring crossed the path, and in the wet
* earth he saw what he was seeking.
* 'I read the signs aright,' he said to himself. 'Frodo ran to the hill-top.
* I wonder what he saw there? But he returned by the same way, and went down
* the hill again.'
*/
/* This file contains functions for optimizing and finalizing the OP
* structures that hold a compiled perl program
*/
#include "EXTERN.h"
#define PERL_IN_PEEP_C
#include "perl.h"
#define CALL_RPEEP(o) PL_rpeepp(aTHX_ o)
static void
S_scalar_slice_warning(pTHX_ const OP *o)
{
OP *kid;
const bool is_hash = o->op_type == OP_HSLICE
|| (o->op_type == OP_NULL && o->op_targ == OP_HSLICE);
SV *name;
if (!(o->op_private & OPpSLICEWARNING))
return;
if (PL_parser && PL_parser->error_count)
/* This warning can be nonsensical when there is a syntax error. */
return;
kid = cLISTOPo->op_first;
kid = OpSIBLING(kid); /* get past pushmark */
/* weed out false positives: any ops that can return lists */
switch (kid->op_type) {
case OP_BACKTICK:
case OP_GLOB:
case OP_READLINE:
case OP_MATCH:
case OP_RV2AV:
case OP_EACH:
case OP_VALUES:
case OP_KEYS:
case OP_SPLIT:
case OP_LIST:
case OP_SORT:
case OP_REVERSE:
case OP_ENTERSUB:
case OP_CALLER:
case OP_LSTAT:
case OP_STAT:
case OP_READDIR:
case OP_SYSTEM:
case OP_TMS:
case OP_LOCALTIME:
case OP_GMTIME:
case OP_ENTEREVAL:
return;
}
/* Don't warn if we have a nulled list either. */
if (kid->op_type == OP_NULL && kid->op_targ == OP_LIST)
return;
assert(OpSIBLING(kid));
name = op_varname(OpSIBLING(kid));
if (!name) /* XS module fiddling with the op tree */
return;
warn_elem_scalar_context(kid, name, is_hash, true);
}
/* info returned by S_sprintf_is_multiconcatable() */
struct sprintf_ismc_info {
SSize_t nargs; /* num of args to sprintf (not including the format) */
char *start; /* start of raw format string */
char *end; /* bytes after end of raw format string */
STRLEN total_len; /* total length (in bytes) of format string, not
including '%s' and half of '%%' */
STRLEN variant; /* number of bytes by which total_len_p would grow
if upgraded to utf8 */
bool utf8; /* whether the format is utf8 */
};
/* is the OP_SPRINTF o suitable for converting into a multiconcat op?
* i.e. its format argument is a const string with only '%s' and '%%'
* formats, and the number of args is known, e.g.
* sprintf "a=%s f=%s", $a[0], scalar(f());
* but not
* sprintf "i=%d a=%s f=%s", $i, @a, f();
*
* If successful, the sprintf_ismc_info struct pointed to by info will be
* populated.
*/
STATIC bool
S_sprintf_is_multiconcatable(pTHX_ OP *o,struct sprintf_ismc_info *info)
{
OP *pm, *constop, *kid;
SV *sv;
char *s, *e, *p;
SSize_t nargs, nformats;
STRLEN cur, total_len, variant;
bool utf8;
/* if sprintf's behaviour changes, die here so that someone
* can decide whether to enhance this function or skip optimising
* under those new circumstances */
assert(!(o->op_flags & OPf_STACKED));
assert(!(PL_opargs[OP_SPRINTF] & OA_TARGLEX));
assert(!(o->op_private & ~OPpARG4_MASK));
pm = cUNOPo->op_first;
if (pm->op_type != OP_PUSHMARK) /* weird coreargs stuff */
return FALSE;
constop = OpSIBLING(pm);
if (!constop || constop->op_type != OP_CONST)
return FALSE;
sv = cSVOPx_sv(constop);
if (SvMAGICAL(sv) || !SvPOK(sv))
return FALSE;
s = SvPV(sv, cur);
e = s + cur;
/* Scan format for %% and %s and work out how many %s there are.
* Abandon if other format types are found.
*/
nformats = 0;
total_len = 0;
variant = 0;
for (p = s; p < e; p++) {
if (*p != '%') {
total_len++;
if (!UTF8_IS_INVARIANT(*p))
variant++;
continue;
}
p++;
if (p >= e)
return FALSE; /* lone % at end gives "Invalid conversion" */
if (*p == '%')
total_len++;
else if (*p == 's')
nformats++;
else
return FALSE;
}
if (!nformats || nformats > PERL_MULTICONCAT_MAXARG)
return FALSE;
utf8 = cBOOL(SvUTF8(sv));
if (utf8)
variant = 0;
/* scan args; they must all be in scalar cxt */
nargs = 0;
kid = OpSIBLING(constop);
while (kid) {
if ((kid->op_flags & OPf_WANT) != OPf_WANT_SCALAR)
return FALSE;
nargs++;
kid = OpSIBLING(kid);
}
if (nargs != nformats)
return FALSE; /* e.g. sprintf("%s%s", $a); */
info->nargs = nargs;
info->start = s;
info->end = e;
info->total_len = total_len;
info->variant = variant;
info->utf8 = utf8;
return TRUE;
}
/* S_maybe_multiconcat():
*
* given an OP_STRINGIFY, OP_SASSIGN, OP_CONCAT or OP_SPRINTF op, possibly
* convert it (and its children) into an OP_MULTICONCAT. See the code
* comments just before pp_multiconcat() for the full details of what
* OP_MULTICONCAT supports.
*
* Basically we're looking for an optree with a chain of OP_CONCATS down
* the LHS (or an OP_SPRINTF), with possibly an OP_SASSIGN, and/or
* OP_STRINGIFY, and/or OP_CONCAT acting as '.=' at its head, e.g.
*
* $x = "$a$b-$c"
*
* looks like
*
* SASSIGN
* |
* STRINGIFY -- PADSV[$x]
* |
* |
* ex-PUSHMARK -- CONCAT/S
* |
* CONCAT/S -- PADSV[$d]
* |
* CONCAT -- CONST["-"]
* |
* PADSV[$a] -- PADSV[$b]
*
* Note that at this stage the OP_SASSIGN may have already been optimised
* away with OPpTARGET_MY set on the OP_STRINGIFY or OP_CONCAT.
*/
STATIC void
S_maybe_multiconcat(pTHX_ OP *o)
{
OP *lastkidop; /* the right-most of any kids unshifted onto o */
OP *topop; /* the top-most op in the concat tree (often equals o,
unless there are assign/stringify ops above it */
OP *parentop; /* the parent op of topop (or itself if no parent) */
OP *targmyop; /* the op (if any) with the OPpTARGET_MY flag */
OP *targetop; /* the op corresponding to target=... or target.=... */
OP *stringop; /* the OP_STRINGIFY op, if any */
OP *nextop; /* used for recreating the op_next chain without consts */
OP *kid; /* general-purpose op pointer */
UNOP_AUX_item *aux;
UNOP_AUX_item *lenp;
char *const_str, *p;
struct sprintf_ismc_info sprintf_info;
/* store info about each arg in args[];
* toparg is the highest used slot; argp is a general
* pointer to args[] slots */
struct {
void *p; /* initially points to const sv (or null for op);
later, set to SvPV(constsv), with ... */
STRLEN len; /* ... len set to SvPV(..., len) */
} *argp, *toparg, args[PERL_MULTICONCAT_MAXARG*2 + 1];
SSize_t nargs = 0;
SSize_t nconst = 0;
SSize_t nadjconst = 0; /* adjacent consts - may be demoted to args */
STRLEN variant;
bool utf8 = FALSE;
bool kid_is_last = FALSE; /* most args will be the RHS kid of a concat op;
the last-processed arg will the LHS of one,
as args are processed in reverse order */
U8 stacked_last = 0; /* whether the last seen concat op was STACKED */
STRLEN total_len = 0; /* sum of the lengths of the const segments */
U8 flags = 0; /* what will become the op_flags and ... */
U8 private_flags = 0; /* ... op_private of the multiconcat op */
bool is_sprintf = FALSE; /* we're optimising an sprintf */
bool is_targable = FALSE; /* targetop is an OPpTARGET_MY candidate */
bool prev_was_const = FALSE; /* previous arg was a const */
/* -----------------------------------------------------------------
* Phase 1:
*
* Examine the optree non-destructively to determine whether it's
* suitable to be converted into an OP_MULTICONCAT. Accumulate
* information about the optree in args[].
*/
argp = args;
targmyop = NULL;
targetop = NULL;
stringop = NULL;
topop = o;
parentop = o;
assert( o->op_type == OP_SASSIGN
|| o->op_type == OP_CONCAT
|| o->op_type == OP_SPRINTF
|| o->op_type == OP_STRINGIFY);
Zero(&sprintf_info, 1, struct sprintf_ismc_info);
/* first see if, at the top of the tree, there is an assign,
* append and/or stringify */
if (topop->op_type == OP_SASSIGN) {
/* expr = ..... */
if (o->op_ppaddr != PL_ppaddr[OP_SASSIGN])
return;
if (o->op_private & (OPpASSIGN_BACKWARDS|OPpASSIGN_CV_TO_GV))
return;
assert(!(o->op_private & ~OPpARG2_MASK)); /* barf on unknown flags */
parentop = topop;
topop = cBINOPo->op_first;
targetop = OpSIBLING(topop);
if (!targetop) /* probably some sort of syntax error */
return;
/* don't optimise away assign in 'local $foo = ....' */
if ( (targetop->op_private & OPpLVAL_INTRO)
/* these are the common ops which do 'local', but
* not all */
&& ( targetop->op_type == OP_GVSV
|| targetop->op_type == OP_RV2SV
|| targetop->op_type == OP_AELEM
|| targetop->op_type == OP_HELEM
)
)
return;
}
else if ( topop->op_type == OP_CONCAT
&& (topop->op_flags & OPf_STACKED)
&& (!(topop->op_private & OPpCONCAT_NESTED))
)
{
/* expr .= ..... */
/* OPpTARGET_MY shouldn't be able to be set here. If it is,
* decide what to do about it */
assert(!(o->op_private & OPpTARGET_MY));
/* barf on unknown flags */
assert(!(o->op_private & ~(OPpARG2_MASK|OPpTARGET_MY)));
private_flags |= OPpMULTICONCAT_APPEND;
targetop = cBINOPo->op_first;
parentop = topop;
topop = OpSIBLING(targetop);
/* $x .= <FOO> gets optimised to rcatline instead */
if (topop->op_type == OP_READLINE)
return;
}
if (targetop) {
/* Can targetop (the LHS) if it's a padsv, be optimised
* away and use OPpTARGET_MY instead?
*/
if ( (targetop->op_type == OP_PADSV)
&& !(targetop->op_private & OPpDEREF)
&& !(targetop->op_private & OPpPAD_STATE)
/* we don't support 'my $x .= ...' */
&& ( o->op_type == OP_SASSIGN
|| !(targetop->op_private & OPpLVAL_INTRO))
)
is_targable = TRUE;
}
if (topop->op_type == OP_STRINGIFY) {
if (topop->op_ppaddr != PL_ppaddr[OP_STRINGIFY])
return;
stringop = topop;
/* barf on unknown flags */
assert(!(o->op_private & ~(OPpARG4_MASK|OPpTARGET_MY)));
if ((topop->op_private & OPpTARGET_MY)) {
if (o->op_type == OP_SASSIGN)
return; /* can't have two assigns */
targmyop = topop;
}
private_flags |= OPpMULTICONCAT_STRINGIFY;
parentop = topop;
topop = cBINOPx(topop)->op_first;
assert(OP_TYPE_IS_OR_WAS_NN(topop, OP_PUSHMARK));
topop = OpSIBLING(topop);
}
if (topop->op_type == OP_SPRINTF) {
if (topop->op_ppaddr != PL_ppaddr[OP_SPRINTF])
return;
if (S_sprintf_is_multiconcatable(aTHX_ topop, &sprintf_info)) {
nargs = sprintf_info.nargs;
total_len = sprintf_info.total_len;
variant = sprintf_info.variant;
utf8 = sprintf_info.utf8;
is_sprintf = TRUE;
private_flags |= OPpMULTICONCAT_FAKE;
toparg = argp;
/* we have an sprintf op rather than a concat optree.
* Skip most of the code below which is associated with
* processing that optree. We also skip phase 2, determining
* whether its cost effective to optimise, since for sprintf,
* multiconcat is *always* faster */
goto create_aux;
}
/* note that even if the sprintf itself isn't multiconcatable,
* the expression as a whole may be, e.g. in
* $x .= sprintf("%d",...)
* the sprintf op will be left as-is, but the concat/S op may
* be upgraded to multiconcat
*/
}
else if (topop->op_type == OP_CONCAT) {
if (topop->op_ppaddr != PL_ppaddr[OP_CONCAT])
return;
if ((topop->op_private & OPpTARGET_MY)) {
if (o->op_type == OP_SASSIGN || targmyop)
return; /* can't have two assigns */
targmyop = topop;
}
}
/* Is it safe to convert a sassign/stringify/concat op into
* a multiconcat? */
assert((PL_opargs[OP_SASSIGN] & OA_CLASS_MASK) == OA_BINOP);
assert((PL_opargs[OP_CONCAT] & OA_CLASS_MASK) == OA_BINOP);
assert((PL_opargs[OP_STRINGIFY] & OA_CLASS_MASK) == OA_LISTOP);
assert((PL_opargs[OP_SPRINTF] & OA_CLASS_MASK) == OA_LISTOP);
STATIC_ASSERT_STMT( STRUCT_OFFSET(BINOP, op_last)
== STRUCT_OFFSET(UNOP_AUX, op_aux));
STATIC_ASSERT_STMT( STRUCT_OFFSET(LISTOP, op_last)
== STRUCT_OFFSET(UNOP_AUX, op_aux));
/* Now scan the down the tree looking for a series of
* CONCAT/OPf_STACKED ops on the LHS (with the last one not
* stacked). For example this tree:
*
* |
* CONCAT/STACKED
* |
* CONCAT/STACKED -- EXPR5
* |
* CONCAT/STACKED -- EXPR4
* |
* CONCAT -- EXPR3
* |
* EXPR1 -- EXPR2
*
* corresponds to an expression like
*
* (EXPR1 . EXPR2 . EXPR3 . EXPR4 . EXPR5)
*
* Record info about each EXPR in args[]: in particular, whether it is
* a stringifiable OP_CONST and if so what the const sv is.
*
* The reason why the last concat can't be STACKED is the difference
* between
*
* ((($a .= $a) .= $a) .= $a) .= $a
*
* and
* $a . $a . $a . $a . $a
*
* The main difference between the optrees for those two constructs
* is the presence of the last STACKED. As well as modifying $a,
* the former sees the changed $a between each concat, so if $s is
* initially 'a', the first returns 'a' x 16, while the latter returns
* 'a' x 5. And pp_multiconcat can't handle that kind of thing.
*/
kid = topop;
for (;;) {
OP *argop;
SV *sv;
bool last = FALSE;
if ( kid->op_type == OP_CONCAT
&& !kid_is_last
) {
OP *k1, *k2;
k1 = cUNOPx(kid)->op_first;
k2 = OpSIBLING(k1);
/* shouldn't happen except maybe after compile err? */
if (!k2)
return;
/* avoid turning (A . B . ($lex = C) ...) into (A . B . C ...) */
if (kid->op_private & OPpTARGET_MY)
kid_is_last = TRUE;
stacked_last = (kid->op_flags & OPf_STACKED);
if (!stacked_last)
kid_is_last = TRUE;
kid = k1;
argop = k2;
}
else {
argop = kid;
last = TRUE;
}
if ( nargs + nadjconst > PERL_MULTICONCAT_MAXARG - 2
|| (argp - args + 1) > (PERL_MULTICONCAT_MAXARG*2 + 1) - 2)
{
/* At least two spare slots are needed to decompose both
* concat args. If there are no slots left, continue to
* examine the rest of the optree, but don't push new values
* on args[]. If the optree as a whole is legal for conversion
* (in particular that the last concat isn't STACKED), then
* the first PERL_MULTICONCAT_MAXARG elements of the optree
* can be converted into an OP_MULTICONCAT now, with the first
* child of that op being the remainder of the optree -
* which may itself later be converted to a multiconcat op
* too.
*/
if (last) {
/* the last arg is the rest of the optree */
argp++->p = NULL;
nargs++;
}
}
else if ( argop->op_type == OP_CONST
&& ((sv = cSVOPx_sv(argop)))
/* defer stringification until runtime of 'constant'
* things that might stringify variantly, e.g. the radix
* point of NVs, or overloaded RVs */
&& (SvPOK(sv) || SvIOK(sv))
&& (!SvGMAGICAL(sv))
) {
if (argop->op_private & OPpCONST_STRICT)
no_bareword_allowed(argop);
argp++->p = sv;
utf8 |= cBOOL(SvUTF8(sv));
nconst++;
if (prev_was_const)
/* this const may be demoted back to a plain arg later;
* make sure we have enough arg slots left */
nadjconst++;
prev_was_const = !prev_was_const;
}
else {
argp++->p = NULL;
nargs++;
prev_was_const = FALSE;
}
if (last)
break;
}
toparg = argp - 1;
if (stacked_last)
return; /* we don't support ((A.=B).=C)...) */
/* look for two adjacent consts and don't fold them together:
* $o . "a" . "b"
* should do
* $o->concat("a")->concat("b")
* rather than
* $o->concat("ab")
* (but $o .= "a" . "b" should still fold)
*/
{
bool seen_nonconst = FALSE;
for (argp = toparg; argp >= args; argp--) {
if (argp->p == NULL) {
seen_nonconst = TRUE;
continue;
}
if (!seen_nonconst)
continue;
if (argp[1].p) {
/* both previous and current arg were constants;
* leave the current OP_CONST as-is */
argp->p = NULL;
nconst--;
nargs++;
}
}
}
/* -----------------------------------------------------------------
* Phase 2:
*
* At this point we have determined that the optree *can* be converted
* into a multiconcat. Having gathered all the evidence, we now decide
* whether it *should*.
*/
/* we need at least one concat action, e.g.:
*
* Y . Z
* X = Y . Z
* X .= Y
*
* otherwise we could be doing something like $x = "foo", which
* if treated as a concat, would fail to COW.
*/
if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 2)
return;
/* Benchmarking seems to indicate that we gain if:
* * we optimise at least two actions into a single multiconcat
* (e.g., concat+concat, sassign+concat);
* * or if we can eliminate at least 1 OP_CONST;
* * or if we can eliminate a padsv via OPpTARGET_MY
*/
if (
/* eliminated at least one OP_CONST */
nconst >= 1
/* eliminated an OP_SASSIGN */
|| o->op_type == OP_SASSIGN
/* eliminated an OP_PADSV */
|| (!targmyop && is_targable)
)
/* definitely a net gain to optimise */
goto optimise;
/* ... if not, what else? */
/* special-case '$lex1 = expr . $lex1' (where expr isn't lex1):
* multiconcat is faster (due to not creating a temporary copy of
* $lex1), whereas for a general $lex1 = $lex2 . $lex3, concat is
* faster.
*/
if ( nconst == 0
&& nargs == 2
&& targmyop
&& topop->op_type == OP_CONCAT
) {
PADOFFSET t = targmyop->op_targ;
OP *k1 = cBINOPx(topop)->op_first;
OP *k2 = cBINOPx(topop)->op_last;
if ( k2->op_type == OP_PADSV
&& k2->op_targ == t
&& ( k1->op_type != OP_PADSV
|| k1->op_targ != t)
)
goto optimise;
}
/* need at least two concats */
if (nargs + nconst + cBOOL(private_flags & OPpMULTICONCAT_APPEND) < 3)
return;
/* -----------------------------------------------------------------
* Phase 3:
*
* At this point the optree has been verified as ok to be optimised
* into an OP_MULTICONCAT. Now start changing things.
*/
optimise:
/* stringify all const args and determine utf8ness */
variant = 0;
for (argp = args; argp <= toparg; argp++) {
SV *sv = (SV*)argp->p;
if (!sv)
continue; /* not a const op */
if (utf8 && !SvUTF8(sv))
sv_utf8_upgrade_nomg(sv);
argp->p = SvPV_nomg(sv, argp->len);
total_len += argp->len;
/* see if any strings would grow if converted to utf8 */
if (!utf8) {
variant += variant_under_utf8_count((U8 *) argp->p,
(U8 *) argp->p + argp->len);
}
}
/* create and populate aux struct */
create_aux:
aux = (UNOP_AUX_item*)PerlMemShared_malloc(
sizeof(UNOP_AUX_item)
* (
PERL_MULTICONCAT_HEADER_SIZE
+ ((nargs + 1) * (variant ? 2 : 1))
)
);
const_str = (char *)PerlMemShared_malloc(total_len ? total_len : 1);
/* Extract all the non-const expressions from the concat tree then
* dispose of the old tree, e.g. convert the tree from this:
*
* o => SASSIGN
* |
* STRINGIFY -- TARGET
* |
* ex-PUSHMARK -- CONCAT
* |
* CONCAT -- EXPR5
* |
* CONCAT -- EXPR4
* |
* CONCAT -- EXPR3
* |
* EXPR1 -- EXPR2
*
*
* to:
*
* o => MULTICONCAT
* |
* ex-PUSHMARK -- EXPR1 -- EXPR2 -- EXPR3 -- EXPR4 -- EXPR5 -- TARGET
*
* except that if EXPRi is an OP_CONST, it's discarded.
*
* During the conversion process, EXPR ops are stripped from the tree
* and unshifted onto o. Finally, any of o's remaining original
* children are discarded and o is converted into an OP_MULTICONCAT.
*
* In this middle of this, o may contain both: unshifted args on the
* left, and some remaining original args on the right. lastkidop
* is set to point to the right-most unshifted arg to delineate
* between the two sets.
*/
if (is_sprintf) {
/* create a copy of the format with the %'s removed, and record
* the sizes of the const string segments in the aux struct */
char *q, *oldq;
lenp = aux + PERL_MULTICONCAT_IX_LENGTHS;
p = sprintf_info.start;
q = const_str;
oldq = q;
for (; p < sprintf_info.end; p++) {
if (*p == '%') {
p++;
if (*p != '%') {
(lenp++)->ssize = q - oldq;
oldq = q;
continue;
}
}
*q++ = *p;
}
lenp->ssize = q - oldq;
assert((STRLEN)(q - const_str) == total_len);
/* Attach all the args (i.e. the kids of the sprintf) to o (which
* may or may not be topop) The pushmark and const ops need to be
* kept in case they're an op_next entry point.
*/
lastkidop = cLISTOPx(topop)->op_last;
kid = cUNOPx(topop)->op_first; /* pushmark */
op_null(kid);
op_null(OpSIBLING(kid)); /* const */
if (o != topop) {
kid = op_sibling_splice(topop, NULL, -1, NULL); /* cut all args */
op_sibling_splice(o, NULL, 0, kid); /* and attach to o */
lastkidop->op_next = o;
}
}
else {
p = const_str;
lenp = aux + PERL_MULTICONCAT_IX_LENGTHS;
lenp->ssize = -1;
/* Concatenate all const strings into const_str.
* Note that args[] contains the RHS args in reverse order, so
* we scan args[] from top to bottom to get constant strings
* in L-R order
*/
for (argp = toparg; argp >= args; argp--) {
if (!argp->p)
/* not a const op */
(++lenp)->ssize = -1;
else {
STRLEN l = argp->len;
Copy(argp->p, p, l, char);
p += l;
if (lenp->ssize == -1)
lenp->ssize = l;
else
lenp->ssize += l;
}
}
kid = topop;
nextop = o;
lastkidop = NULL;
for (argp = args; argp <= toparg; argp++) {
/* only keep non-const args, except keep the first-in-next-chain
* arg no matter what it is (but nulled if OP_CONST), because it
* may be the entry point to this subtree from the previous
* op_next.
*/
bool last = (argp == toparg);
OP *prev;
/* set prev to the sibling *before* the arg to be cut out,
* e.g. when cutting EXPR:
*
* |
* kid= CONCAT
* |
* prev= CONCAT -- EXPR
* |
*/
if (argp == args && kid->op_type != OP_CONCAT) {
/* in e.g. '$x .= f(1)' there's no RHS concat tree
* so the expression to be cut isn't kid->op_last but
* kid itself */
OP *o1, *o2;
/* find the op before kid */
o1 = NULL;
o2 = cUNOPx(parentop)->op_first;
while (o2 && o2 != kid) {
o1 = o2;
o2 = OpSIBLING(o2);
}
assert(o2 == kid);
prev = o1;
kid = parentop;
}
else if (kid == o && lastkidop)
prev = last ? lastkidop : OpSIBLING(lastkidop);
else
prev = last ? NULL : cUNOPx(kid)->op_first;
if (!argp->p || last) {
/* cut RH op */
OP *aop = op_sibling_splice(kid, prev, 1, NULL);
/* and unshift to front of o */
op_sibling_splice(o, NULL, 0, aop);
/* record the right-most op added to o: later we will
* free anything to the right of it */
if (!lastkidop)
lastkidop = aop;
aop->op_next = nextop;
if (last) {
if (argp->p)
/* null the const at start of op_next chain */
op_null(aop);
}
else if (prev)
nextop = prev->op_next;
}
/* the last two arguments are both attached to the same concat op */
if (argp < toparg - 1)
kid = prev;
}
}
/* Populate the aux struct */
aux[PERL_MULTICONCAT_IX_NARGS].ssize = nargs;
aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv = utf8 ? NULL : const_str;
aux[PERL_MULTICONCAT_IX_PLAIN_LEN].ssize = utf8 ? 0 : total_len;
aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = const_str;
aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = total_len;
/* if variant > 0, calculate a variant const string and lengths where
* the utf8 version of the string will take 'variant' more bytes than
* the plain one. */
if (variant) {
char *p = const_str;
STRLEN ulen = total_len + variant;
UNOP_AUX_item *lens = aux + PERL_MULTICONCAT_IX_LENGTHS;
UNOP_AUX_item *ulens = lens + (nargs + 1);
char *up = (char*)PerlMemShared_malloc(ulen);
SSize_t n;
aux[PERL_MULTICONCAT_IX_UTF8_PV].pv = up;
aux[PERL_MULTICONCAT_IX_UTF8_LEN].ssize = ulen;
for (n = 0; n < (nargs + 1); n++) {
SSize_t i;
char * orig_up = up;
for (i = (lens++)->ssize; i > 0; i--) {
U8 c = *p++;
append_utf8_from_native_byte(c, (U8**)&up);
}
(ulens++)->ssize = (i < 0) ? i : up - orig_up;
}
}
if (stringop) {
/* if there was a top(ish)-level OP_STRINGIFY, we need to keep
* that op's first child - an ex-PUSHMARK - because the op_next of
* the previous op may point to it (i.e. it's the entry point for
* the o optree)
*/
OP *pmop =
(stringop == o)
? op_sibling_splice(o, lastkidop, 1, NULL)
: op_sibling_splice(stringop, NULL, 1, NULL);
assert(OP_TYPE_IS_OR_WAS_NN(pmop, OP_PUSHMARK));
op_sibling_splice(o, NULL, 0, pmop);
if (!lastkidop)
lastkidop = pmop;
}
/* Optimise
* target = A.B.C...
* target .= A.B.C...
*/
if (targetop) {
assert(!targmyop);
if (o->op_type == OP_SASSIGN) {
/* Move the target subtree from being the last of o's children
* to being the last of o's preserved children.
* Note the difference between 'target = ...' and 'target .= ...':
* for the former, target is executed last; for the latter,
* first.
*/
kid = OpSIBLING(lastkidop);
op_sibling_splice(o, kid, 1, NULL); /* cut target op */
op_sibling_splice(o, lastkidop, 0, targetop); /* and paste */
lastkidop->op_next = kid->op_next;
lastkidop = targetop;
}
else {
/* Move the target subtree from being the first of o's
* original children to being the first of *all* o's children.
*/
if (lastkidop) {
op_sibling_splice(o, lastkidop, 1, NULL); /* cut target op */
op_sibling_splice(o, NULL, 0, targetop); /* and paste*/
}
else {
/* if the RHS of .= doesn't contain a concat (e.g.
* $x .= "foo"), it gets missed by the "strip ops from the
* tree and add to o" loop earlier */
assert(topop->op_type != OP_CONCAT);
if (stringop) {
/* in e.g. $x .= "$y", move the $y expression
* from being a child of OP_STRINGIFY to being the
* second child of the OP_CONCAT
*/
assert(cUNOPx(stringop)->op_first == topop);
op_sibling_splice(stringop, NULL, 1, NULL);
op_sibling_splice(o, cUNOPo->op_first, 0, topop);
}
assert(topop == OpSIBLING(cBINOPo->op_first));
if (toparg->p)
op_null(topop);
lastkidop = topop;
}
}
if (is_targable) {
/* optimise
* my $lex = A.B.C...
* $lex = A.B.C...
* $lex .= A.B.C...
* The original padsv op is kept but nulled in case it's the
* entry point for the optree (which it will be for
* '$lex .= ... '
*/
private_flags |= OPpTARGET_MY;
private_flags |= (targetop->op_private & OPpLVAL_INTRO);
o->op_targ = targetop->op_targ;
targetop->op_targ = 0;
op_null(targetop);
}
else
flags |= OPf_STACKED;
}
else if (targmyop) {
private_flags |= OPpTARGET_MY;
if (o != targmyop) {
o->op_targ = targmyop->op_targ;
targmyop->op_targ = 0;
}
}
/* detach the emaciated husk of the sprintf/concat optree and free it */
for (;;) {
kid = op_sibling_splice(o, lastkidop, 1, NULL);
if (!kid)
break;
op_free(kid);
}
/* and convert o into a multiconcat */
o->op_flags = (flags|OPf_KIDS|stacked_last
|(o->op_flags & (OPf_WANT|OPf_PARENS)));
o->op_private = private_flags;
o->op_type = OP_MULTICONCAT;
o->op_ppaddr = PL_ppaddr[OP_MULTICONCAT];
cUNOP_AUXo->op_aux = aux;