forked from Perl/perl5
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathpp_hot.c
6947 lines (6157 loc) · 236 KB
/
pp_hot.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
/* pp_hot.c
*
* Copyright (C) 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
* 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 by Larry Wall and others
*
* You may distribute under the terms of either the GNU General Public
* License or the Artistic License, as specified in the README file.
*
*/
/*
* Then he heard Merry change the note, and up went the Horn-cry of Buckland,
* shaking the air.
*
* Awake! Awake! Fear, Fire, Foes! Awake!
* Fire, Foes! Awake!
*
* [p.1007 of _The Lord of the Rings_, VI/viii: "The Scouring of the Shire"]
*/
/* This file contains 'hot' pp ("push/pop") functions that
* execute the opcodes that make up a perl program. A typical pp function
* expects to find its arguments on the stack, and usually pushes its
* results onto the stack, hence the 'pp' terminology. Each OP structure
* contains a pointer to the relevant pp_foo() function.
*
* By 'hot', we mean common ops whose execution speed is critical.
* By gathering them together into a single file, we encourage
* CPU cache hits on hot code. Also it could be taken as a warning not to
* change any code in this file unless you're sure it won't affect
* performance.
*/
#include "EXTERN.h"
#define PERL_IN_PP_HOT_C
#include "perl.h"
#include "regcomp.h"
#include "feature.h"
/* Hot code. */
#ifdef PERL_RC_STACK
/* common code for pp_wrap() and xs_wrap():
* free any original arguments, and bump and shift down any return
* args
*/
STATIC void
S_pp_xs_wrap_return(pTHX_ I32 nargs, I32 old_sp)
{
I32 nret = (I32)(PL_stack_sp - PL_stack_base) - old_sp;
assert(nret >= 0);
/* bump any returned values */
if (nret) {
SV **svp = PL_stack_sp - nret + 1;
while (svp <= PL_stack_sp) {
SvREFCNT_inc(*svp);
svp++;
}
}
PL_curstackinfo->si_stack_nonrc_base = 0;
/* free the original args and shift the returned valued down */
if (nargs) {
SV **svp = PL_stack_sp - nret;
I32 i = nargs;
while (i--) {
SvREFCNT_dec(*svp);
*svp = NULL;
svp--;
}
if (nret) {
Move(PL_stack_sp - nret + 1,
PL_stack_sp - nret - nargs + 1,
nret, SV*);
}
PL_stack_sp -= nargs;
}
}
/* pp_wrap():
* wrapper function for pp() functions to turn them into functions
* that can operate on a reference-counted stack, by taking a non-
* reference-counted copy of the current stack frame, calling the real
* pp() function, then incrementing the reference count of any returned
* args.
*
* nargs or nlists indicate the number of stack arguments or the
* number of stack lists (delimited by MARKs) which the function expects.
*/
OP*
Perl_pp_wrap(pTHX_ Perl_ppaddr_t real_pp_fn, I32 nargs, int nlists)
{
PERL_ARGS_ASSERT_PP_WRAP;
if (!rpp_stack_is_rc())
/* stack-already non-RC; nothing needing wrapping */
return real_pp_fn(aTHX);
OP *next_op;
I32 old_sp = (I32)(PL_stack_sp - PL_stack_base);
assert(nargs >= 0);
assert(nlists >= 0);
assert(AvREAL(PL_curstack));
PL_curstackinfo->si_stack_nonrc_base = PL_stack_sp - PL_stack_base + 1;
if (nlists) {
assert(nargs == 0);
I32 mark = PL_markstack_ptr[-nlists+1];
nargs = (PL_stack_sp - PL_stack_base) - mark;
assert(nlists <= 2); /* if ever more, make below a loop */
PL_markstack_ptr[0] += nargs;
if (nlists == 2)
PL_markstack_ptr[-1] += nargs;
}
if (nargs) {
/* duplicate all the arg pointers further up the stack */
rpp_extend(nargs);
Copy(PL_stack_sp - nargs + 1, PL_stack_sp + 1, nargs, SV*);
PL_stack_sp += nargs;
}
next_op = real_pp_fn(aTHX);
/* we should still be a split stack */
assert(AvREAL(PL_curstack));
assert(PL_curstackinfo->si_stack_nonrc_base);
S_pp_xs_wrap_return(aTHX_ nargs, old_sp);
return next_op;
}
/* xs_wrap():
* similar in concept to pp_wrap: make a non-referenced-counted copy of
* a (not refcount aware) XS sub's args, call the XS subs, then bump any
* return values and free the original args */
void
Perl_xs_wrap(pTHX_ XSUBADDR_t xsub, CV *cv)
{
PERL_ARGS_ASSERT_XS_WRAP;
I32 old_sp = (I32)(PL_stack_sp - PL_stack_base);
I32 mark = PL_markstack_ptr[0];
I32 nargs = (PL_stack_sp - PL_stack_base) - mark;
/* we should be a fully refcounted stack */
assert(AvREAL(PL_curstack));
assert(!PL_curstackinfo->si_stack_nonrc_base);
PL_curstackinfo->si_stack_nonrc_base = PL_stack_sp - PL_stack_base + 1;
if (nargs) {
/* duplicate all the arg pointers further up the stack */
rpp_extend(nargs);
Copy(PL_stack_sp - nargs + 1, PL_stack_sp + 1, nargs, SV*);
PL_stack_sp += nargs;
PL_markstack_ptr[0] += nargs;
}
xsub(aTHX_ cv);
S_pp_xs_wrap_return(aTHX_ nargs, old_sp);
}
#endif
/* Private helper function for Perl_rpp_replace_2_1_COMMON()
* and rpp_popfree_2_NN().
* Free the two passed SVs, whose original ref counts are rc1 and rc2.
* Assumes the stack initially looked like
* .... sv1 sv2
* and is now:
* .... X
* but where sv2 is still on the slot above the current PL_stack_sp.
*/
void
Perl_rpp_free_2_(pTHX_ SV *const sv1, SV *const sv2,
const U32 rc1, const U32 rc2)
{
PERL_ARGS_ASSERT_RPP_FREE_2_;
#ifdef PERL_RC_STACK
if (rc1 > 1)
SvREFCNT(sv1) = rc1 - 1;
else {
/* temporarily reclaim sv2 on stack in case we die while freeing sv1 */
assert(PL_stack_sp[1] == sv2);
PL_stack_sp++;
Perl_sv_free2(aTHX_ sv1, rc1);
PL_stack_sp--;
}
if (rc2 > 1)
SvREFCNT(sv2) = rc2 - 1;
else
Perl_sv_free2(aTHX_ sv2, rc2);
#else
PERL_UNUSED_VAR(sv1);
PERL_UNUSED_VAR(sv2);
PERL_UNUSED_VAR(rc1);
PERL_UNUSED_VAR(rc2);
#endif
}
/* ----------------------------------------------------------- */
PP(pp_const)
{
rpp_xpush_1(cSVOP_sv);
return NORMAL;
}
PP(pp_nextstate)
{
PL_curcop = (COP*)PL_op;
TAINT_NOT; /* Each statement is presumed innocent */
rpp_popfree_to_NN(PL_stack_base + CX_CUR()->blk_oldsp);
FREETMPS;
PERL_ASYNC_CHECK();
return NORMAL;
}
PP(pp_gvsv)
{
assert(SvTYPE(cGVOP_gv) == SVt_PVGV);
rpp_xpush_1(
UNLIKELY(PL_op->op_private & OPpLVAL_INTRO)
? save_scalar(cGVOP_gv)
: GvSVn(cGVOP_gv));
return NORMAL;
}
/* also used for: pp_lineseq() pp_regcmaybe() pp_scalar() pp_scope() */
PP(pp_null)
{
return NORMAL;
}
/* This is sometimes called directly by pp_coreargs, pp_grepstart and
amagic_call. */
PP(pp_pushmark)
{
PUSHMARK(PL_stack_sp);
return NORMAL;
}
PP(pp_stringify)
{
dTARGET;
sv_copypv(TARG, *PL_stack_sp);
SvSETMAGIC(TARG);
rpp_replace_1_1_NN(TARG);
return NORMAL;
}
PP(pp_gv)
{
/* cGVOP_gv might be a real GV or might be an RV to a CV */
assert(SvTYPE(cGVOP_gv) == SVt_PVGV ||
(SvTYPE(cGVOP_gv) <= SVt_PVMG && SvROK(cGVOP_gv) && SvTYPE(SvRV(cGVOP_gv)) == SVt_PVCV));
rpp_xpush_1(MUTABLE_SV(cGVOP_gv));
return NORMAL;
}
/* also used for: pp_andassign() */
PP(pp_and)
{
PERL_ASYNC_CHECK();
{
SV * const sv = *PL_stack_sp;
if (!SvTRUE_NN(sv))
return NORMAL;
else {
if (PL_op->op_type == OP_AND)
rpp_popfree_1_NN();
return cLOGOP->op_other;
}
}
}
/*
* Mashup of simple padsv + sassign OPs
* Doesn't support the following lengthy and unlikely sassign case:
* (UNLIKELY(PL_op->op_private & OPpASSIGN_CV_TO_GV))
* These cases have a separate optimization, so are not handled here:
* (PL_op->op_private & OPpASSIGN_BACKWARDS) {or,and,dor}assign
*/
PP(pp_padsv_store)
{
OP * const op = PL_op;
SV** const padentry = &PAD_SVl(op->op_targ);
SV* targ = *padentry; /* lvalue to assign into */
SV* const val = *PL_stack_sp; /* RHS value to assign */
/* !OPf_STACKED is not handled by this OP */
assert(op->op_flags & OPf_STACKED);
/* Inlined, simplified pp_padsv here */
if ((op->op_private & (OPpLVAL_INTRO|OPpPAD_STATE)) == OPpLVAL_INTRO) {
save_clearsv(padentry);
}
/* Inlined, simplified pp_sassign from here */
assert(TAINTING_get || !TAINT_get);
if (UNLIKELY(TAINT_get) && !SvTAINTED(val))
TAINT_NOT;
if (
UNLIKELY(SvTEMP(targ)) && !SvSMAGICAL(targ) && SvREFCNT(targ) == 1 &&
(!isGV_with_GP(targ) || SvFAKE(targ)) && ckWARN(WARN_MISC)
)
Perl_warner(aTHX_
packWARN(WARN_MISC), "Useless assignment to a temporary"
);
SvSetMagicSV(targ, val);
assert(GIMME_V == G_VOID);
rpp_popfree_1_NN();
return NORMAL;
}
/* A mashup of simplified AELEMFAST_LEX + SASSIGN OPs */
PP(pp_aelemfastlex_store)
{
OP * const op = PL_op;
SV* const val = *PL_stack_sp; /* RHS value to assign */
AV * const av = MUTABLE_AV(PAD_SV(op->op_targ));
const I8 key = (I8)PL_op->op_private;
SV * targ = NULL;
/* !OPf_STACKED is not handled by this OP */
assert(op->op_flags & OPf_STACKED);
/* Inlined, simplified pp_aelemfast here */
assert(SvTYPE(av) == SVt_PVAV);
/* inlined av_fetch() for simple cases ... */
if (!SvRMAGICAL(av) && key >=0 && key <= AvFILLp(av)) {
targ = AvARRAY(av)[key];
}
/* ... else do it the hard way */
if (!targ) {
SV **svp = av_fetch(av, key, 1);
if (svp)
targ = *svp;
else
DIE(aTHX_ PL_no_aelem, (int)key);
}
/* Inlined, simplified pp_sassign from here */
assert(TAINTING_get || !TAINT_get);
if (UNLIKELY(TAINT_get) && !SvTAINTED(val))
TAINT_NOT;
/* This assertion is a deviation from pp_sassign, which uses an if()
* condition to check for "Useless assignment to a temporary" and
* warns if the condition is true. Here, the condition should NEVER
* be true when the LHS is the result of an array fetch. The
* assertion is here as a final check that this remains the case.
*/
assert(!(SvTEMP(targ) && SvREFCNT(targ) == 1 && !SvSMAGICAL(targ)));
SvSetMagicSV(targ, val);
assert(GIMME_V == G_VOID);
rpp_popfree_1_NN();
return NORMAL;
}
PP(pp_sassign)
{
/* sassign keeps its args in the optree traditionally backwards.
So we pop them differently.
*/
SV *left = PL_stack_sp[0];
SV *right = PL_stack_sp[-1];
if (PL_op->op_private & OPpASSIGN_BACKWARDS) { /* {or,and,dor}assign */
SV * const temp = left;
left = right; right = temp;
PL_stack_sp[0] = left;
PL_stack_sp[-1] = right;
}
assert(TAINTING_get || !TAINT_get);
if (UNLIKELY(TAINT_get) && !SvTAINTED(right))
TAINT_NOT;
if (UNLIKELY(PL_op->op_private & OPpASSIGN_CV_TO_GV)) {
/* *foo =\&bar */
SV * const cv = SvRV(right);
const U32 cv_type = SvTYPE(cv);
const bool is_gv = isGV_with_GP(left);
const bool got_coderef = cv_type == SVt_PVCV || cv_type == SVt_PVFM;
if (!got_coderef) {
assert(SvROK(cv));
}
/* Can do the optimisation if left (LVALUE) is not a typeglob,
right (RVALUE) is a reference to something, and we're in void
context. */
if (!got_coderef && !is_gv && GIMME_V == G_VOID) {
/* Is the target symbol table currently empty? */
GV * const gv = gv_fetchsv_nomg(left, GV_NOINIT, SVt_PVGV);
if (SvTYPE(gv) != SVt_PVGV && !SvOK(gv)) {
/* Good. Create a new proxy constant subroutine in the target.
The gv becomes a(nother) reference to the constant. */
SV *const value = SvRV(cv);
SvUPGRADE(MUTABLE_SV(gv), SVt_IV);
SvPCS_IMPORTED_on(gv);
SvRV_set(gv, value);
SvREFCNT_inc_simple_void(value);
rpp_replace_2_1_NN(left);
return NORMAL;
}
}
/* Need to fix things up. */
if (!is_gv) {
/* Need to fix GV. */
SV *sv = MUTABLE_SV(gv_fetchsv_nomg(left,GV_ADD, SVt_PVGV));
rpp_replace_1_1_NN(sv);
left = sv;
}
if (!got_coderef) {
/* We've been returned a constant rather than a full subroutine,
but they expect a subroutine reference to apply. */
if (SvROK(cv)) {
ENTER_with_name("sassign_coderef");
SvREFCNT_inc_void(SvRV(cv));
/* newCONSTSUB takes a reference count on the passed in SV
from us. We set the name to NULL, otherwise we get into
all sorts of fun as the reference to our new sub is
donated to the GV that we're about to assign to.
*/
SvRV_set(right, MUTABLE_SV(newCONSTSUB(GvSTASH(left), NULL,
SvRV(cv))));
SvREFCNT_dec_NN(cv);
LEAVE_with_name("sassign_coderef");
} else {
/* What can happen for the corner case *{"BONK"} = \&{"BONK"};
is that
First: ops for \&{"BONK"}; return us the constant in the
symbol table
Second: ops for *{"BONK"} cause that symbol table entry
(and our reference to it) to be upgraded from RV
to typeblob)
Thirdly: We get here. cv is actually PVGV now, and its
GvCV() is actually the subroutine we're looking for
So change the reference so that it points to the subroutine
of that typeglob, as that's what they were after all along.
*/
GV *const upgraded = MUTABLE_GV(cv);
CV *const source = GvCV(upgraded);
assert(source);
assert(CvFLAGS(source) & CVf_CONST);
SvREFCNT_inc_simple_void_NN(source);
SvREFCNT_dec_NN(upgraded);
SvRV_set(right, MUTABLE_SV(source));
}
}
}
if (
rpp_is_lone(left) && !SvSMAGICAL(left) &&
(!isGV_with_GP(left) || SvFAKE(left)) && ckWARN(WARN_MISC)
)
Perl_warner(aTHX_
packWARN(WARN_MISC), "Useless assignment to a temporary"
);
SvSetMagicSV(left, right);
if (LIKELY(GIMME_V == G_VOID))
rpp_popfree_2_NN(); /* pop left and right */
else {
/* pop right, leave left on the stack */
assert(PL_stack_sp[-1] == right);
assert(PL_stack_sp[0] == left);
*--PL_stack_sp = left;
#ifdef PERL_RC_STACK
SvREFCNT_dec_NN(right);
#endif
}
return NORMAL;
}
PP(pp_cond_expr)
{
PERL_ASYNC_CHECK();
bool ok = SvTRUE_NN(*PL_stack_sp);
rpp_popfree_1_NN();
return (ok ? cLOGOP->op_other : cLOGOP->op_next);
}
PP(pp_unstack)
{
PERL_CONTEXT *cx;
PERL_ASYNC_CHECK();
TAINT_NOT; /* Each statement is presumed innocent */
cx = CX_CUR();
rpp_popfree_to_NN(PL_stack_base + CX_CUR()->blk_oldsp);
FREETMPS;
if (!(PL_op->op_flags & OPf_SPECIAL)) {
assert(CxTYPE(cx) == CXt_BLOCK || CxTYPE_is_LOOP(cx));
CX_LEAVE_SCOPE(cx);
}
return NORMAL;
}
/* The main body of pp_concat, not including the magic/overload and
* stack handling.
* It does targ = left . right.
* Moved into a separate function so that pp_multiconcat() can use it
* too.
*/
PERL_STATIC_INLINE void
S_do_concat(pTHX_ SV *left, SV *right, SV *targ, U8 targmy)
{
bool lbyte;
STRLEN rlen;
const char *rpv = NULL;
bool rbyte = FALSE;
bool rcopied = FALSE;
if (TARG == right && right != left) { /* $r = $l.$r */
rpv = SvPV_nomg_const(right, rlen);
rbyte = !DO_UTF8(right);
right = newSVpvn_flags(rpv, rlen, SVs_TEMP);
rpv = SvPV_const(right, rlen); /* no point setting UTF-8 here */
rcopied = TRUE;
}
if (TARG != left) { /* not $l .= $r */
STRLEN llen;
const char* const lpv = SvPV_nomg_const(left, llen);
lbyte = !DO_UTF8(left);
sv_setpvn(TARG, lpv, llen);
if (!lbyte)
SvUTF8_on(TARG);
else
SvUTF8_off(TARG);
}
else { /* $l .= $r and left == TARG */
if (!SvOK(left)) {
if ((left == right /* $l .= $l */
|| targmy) /* $l = $l . $r */
&& ckWARN(WARN_UNINITIALIZED)
)
report_uninit(left);
SvPVCLEAR(left);
}
else {
SvPV_force_nomg_nolen(left);
}
lbyte = !DO_UTF8(left);
if (IN_BYTES)
SvUTF8_off(left);
}
if (!rcopied) {
rpv = SvPV_nomg_const(right, rlen);
rbyte = !DO_UTF8(right);
}
if (lbyte != rbyte) {
if (lbyte)
sv_utf8_upgrade_nomg(TARG);
else {
if (!rcopied)
right = newSVpvn_flags(rpv, rlen, SVs_TEMP);
sv_utf8_upgrade_nomg(right);
rpv = SvPV_nomg_const(right, rlen);
}
}
sv_catpvn_nomg(TARG, rpv, rlen);
SvSETMAGIC(TARG);
}
PP(pp_concat)
{
SV *targ = (PL_op->op_flags & OPf_STACKED)
? PL_stack_sp[-1]
: PAD_SV(PL_op->op_targ);
if (rpp_try_AMAGIC_2(concat_amg, AMGf_assign))
return NORMAL;
SV *right = PL_stack_sp[0];
SV *left = PL_stack_sp[-1];
S_do_concat(aTHX_ left, right, targ, PL_op->op_private & OPpTARGET_MY);
rpp_replace_2_1_NN(targ);
return NORMAL;
}
/* pp_multiconcat()
Concatenate one or more args, possibly interleaved with constant string
segments. The result may be assigned to, or appended to, a variable or
expression.
Several op_flags and/or op_private bits indicate what the target is, and
whether it's appended to. Valid permutations are:
- (PADTMP) = (A.B.C....)
OPpTARGET_MY $lex = (A.B.C....)
OPpTARGET_MY,OPpLVAL_INTRO my $lex = (A.B.C....)
OPpTARGET_MY,OPpMULTICONCAT_APPEND $lex .= (A.B.C....)
OPf_STACKED expr = (A.B.C....)
OPf_STACKED,OPpMULTICONCAT_APPEND expr .= (A.B.C....)
Other combinations like (A.B).(C.D) are not optimised into a multiconcat
op, as it's too hard to get the correct ordering of ties, overload etc.
In addition:
OPpMULTICONCAT_FAKE: not a real concat, instead an optimised
sprintf "...%s...". Don't call '.'
overloading: only use '""' overloading.
OPpMULTICONCAT_STRINGIFY: the RHS was of the form
"...$a...$b..." rather than
"..." . $a . "..." . $b . "..."
An OP_MULTICONCAT is of type UNOP_AUX. The fixed slots of the aux array are
defined with PERL_MULTICONCAT_IX_FOO constants, where:
FOO index description
-------- ----- ----------------------------------
NARGS 0 number of arguments
PLAIN_PV 1 non-utf8 constant string
PLAIN_LEN 2 non-utf8 constant string length
UTF8_PV 3 utf8 constant string
UTF8_LEN 4 utf8 constant string length
LENGTHS 5 first of nargs+1 const segment lengths
The idea is that a general string concatenation will have a fixed (known
at compile time) number of variable args, interspersed with constant
strings, e.g. "a=$a b=$b\n"
All the constant string segments "a=", " b=" and "\n" are stored as a
single string "a= b=\n", pointed to from the PLAIN_PV/UTF8_PV slot, along
with a series of segment lengths: e.g. 2,3,1. In the case where the
constant string is plain but has a different utf8 representation, both
variants are stored, and two sets of (nargs+1) segments lengths are stored
in the slots beginning at PERL_MULTICONCAT_IX_LENGTHS.
A segment length of -1 indicates that there is no constant string at that
point; this distinguishes between e.g. ($a . $b) and ($a . "" . $b), which
have differing overloading behaviour.
*/
PP(pp_multiconcat)
{
SV *targ; /* The SV to be assigned or appended to */
char *targ_pv; /* where within SvPVX(targ) we're writing to */
STRLEN targ_len; /* SvCUR(targ) */
SV **toparg; /* the highest arg position on the stack */
UNOP_AUX_item *aux; /* PL_op->op_aux buffer */
UNOP_AUX_item *const_lens; /* the segment length array part of aux */
const char *const_pv; /* the current segment of the const string buf */
SSize_t nargs; /* how many args were expected */
SSize_t stack_adj; /* how much to adjust PL_stack_sp on return */
STRLEN grow; /* final size of destination string (targ) */
UV targ_count; /* how many times targ has appeared on the RHS */
bool is_append; /* OPpMULTICONCAT_APPEND flag is set */
bool slow_concat; /* args too complex for quick concat */
U32 dst_utf8; /* the result will be utf8 (indicate this with
SVf_UTF8 in a U32, rather than using bool,
for ease of testing and setting) */
/* for each arg, holds the result of an SvPV() call */
struct multiconcat_svpv {
const char *pv;
SSize_t len;
}
*targ_chain, /* chain of slots where targ has appeared on RHS */
*svpv_p, /* ptr for looping through svpv_buf */
*svpv_base, /* first slot (may be greater than svpv_buf), */
*svpv_end, /* and slot after highest result so far, of: */
svpv_buf[PERL_MULTICONCAT_MAXARG]; /* buf for storing SvPV() results */
aux = cUNOP_AUXx(PL_op)->op_aux;
stack_adj = nargs = aux[PERL_MULTICONCAT_IX_NARGS].ssize;
is_append = cBOOL(PL_op->op_private & OPpMULTICONCAT_APPEND);
/* get targ from the stack or pad */
toparg = PL_stack_sp;
if (PL_op->op_flags & OPf_STACKED) {
stack_adj++;
if (is_append) {
/* for 'expr .= ...', expr is the bottom item on the stack */
targ = PL_stack_sp[-nargs];
}
else {
/* for 'expr = ...', expr is the top item on the stack */
targ = *PL_stack_sp;
toparg--;
}
}
else {
SV **svp = &(PAD_SVl(PL_op->op_targ));
targ = *svp;
if (PL_op->op_private & OPpLVAL_INTRO) {
assert(PL_op->op_private & OPpTARGET_MY);
save_clearsv(svp);
}
if (!nargs)
/* $lex .= "const" doesn't cause anything to be pushed */
rpp_extend(1);
}
grow = 1; /* allow for '\0' at minimum */
targ_count = 0;
targ_chain = NULL;
targ_len = 0;
svpv_end = svpv_buf;
/* only utf8 variants of the const strings? */
dst_utf8 = aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv ? 0 : SVf_UTF8;
/* --------------------------------------------------------------
* Phase 1:
*
* stringify (i.e. SvPV()) every arg and store the resultant pv/len/utf8
* triplets in svpv_buf[]. Also increment 'grow' by the args' lengths.
*
* utf8 is indicated by storing a negative length.
*
* Where an arg is actually targ, the stringification is deferred:
* the length is set to 0, and the slot is added to targ_chain.
*
* If a magic, overloaded, or otherwise weird arg is found, which
* might have side effects when stringified, the loop is abandoned and
* we goto a code block where a more basic 'emulate calling
* pp_cpncat() on each arg in turn' is done.
*/
for (SV **svp = toparg - (nargs - 1); svp <= toparg; svp++, svpv_end++) {
U32 utf8;
STRLEN len;
SV *sv;
assert(svpv_end - svpv_buf < PERL_MULTICONCAT_MAXARG);
sv = *svp;
/* this if/else chain is arranged so that common/simple cases
* take few conditionals */
if (LIKELY((SvFLAGS(sv) & (SVs_GMG|SVf_ROK|SVf_POK)) == SVf_POK)) {
/* common case: sv is a simple non-magical PV */
if (targ == sv) {
/* targ appears on RHS.
* Delay storing PV pointer; instead, add slot to targ_chain
* so it can be populated later, after targ has been grown and
* we know its final SvPVX() address.
*/
targ_on_rhs:
svpv_end->len = 0; /* zerojng here means we can skip
updating later if targ_len == 0 */
svpv_end->pv = (char*)targ_chain;
targ_chain = svpv_end;
targ_count++;
continue;
}
len = SvCUR(sv);
svpv_end->pv = SvPVX(sv);
}
else if (UNLIKELY(SvFLAGS(sv) & (SVs_GMG|SVf_ROK)))
/* may have side effects: tie, overload etc.
* Abandon 'stringify everything first' and handle
* args in strict order. Note that already-stringified args
* will be reprocessed, which is safe because the each first
* stringification would have been idempotent.
*/
goto do_magical;
else if (SvNIOK(sv)) {
if (targ == sv)
goto targ_on_rhs;
/* stringify general valid scalar */
svpv_end->pv = sv_2pv_flags(sv, &len, 0);
}
else if (!SvOK(sv)) {
if (ckWARN(WARN_UNINITIALIZED))
/* an undef value in the presence of warnings may trigger
* side affects */
goto do_magical;
svpv_end->pv = "";
len = 0;
}
else
goto do_magical; /* something weird */
utf8 = (SvFLAGS(sv) & SVf_UTF8);
dst_utf8 |= utf8;
ASSUME(len < SSize_t_MAX);
svpv_end->len = utf8 ? -(SSize_t)len : (SSize_t)len;
grow += len;
}
/* --------------------------------------------------------------
* Phase 2:
*
* Stringify targ:
*
* if targ appears on the RHS or is appended to, force stringify it;
* otherwise set it to "". Then set targ_len.
*/
if (is_append) {
/* abandon quick route if using targ might have side effects */
if (UNLIKELY(SvFLAGS(targ) & (SVs_GMG|SVf_ROK)))
goto do_magical;
if (SvOK(targ)) {
U32 targ_utf8;
stringify_targ:
SvPV_force_nomg_nolen(targ);
targ_utf8 = SvFLAGS(targ) & SVf_UTF8;
if (UNLIKELY(dst_utf8 & ~targ_utf8)) {
if (LIKELY(!IN_BYTES))
sv_utf8_upgrade_nomg(targ);
}
else
dst_utf8 |= targ_utf8;
targ_len = SvCUR(targ);
grow += targ_len * (targ_count + is_append);
goto phase3;
}
else if (ckWARN(WARN_UNINITIALIZED))
/* warning might have side effects */
goto do_magical;
/* the undef targ will be silently SvPVCLEAR()ed below */
}
else if (UNLIKELY(SvTYPE(targ) >= SVt_REGEXP)) {
/* Assigning to some weird LHS type. Don't force the LHS to be an
* empty string; instead, do things 'long hand' by using the
* overload code path, which concats to a TEMP sv and does
* sv_catsv() calls rather than COPY()s. This ensures that even
* bizarre code like this doesn't break or crash:
* *F = *F . *F.
* (which makes the 'F' typeglob an alias to the
* '*main::F*main::F' typeglob).
*/
goto do_magical;
}
else if (targ_chain)
/* targ was found on RHS.
* Force stringify it, using the same code as the append branch
* above, except that we don't need the magic/overload/undef
* checks as these will already have been done in the phase 1
* loop.
*/
goto stringify_targ;
/* unrolled SvPVCLEAR() - mostly: no need to grow or set SvCUR() to 0;
* those will be done later. */
SV_CHECK_THINKFIRST_COW_DROP(targ);
SvUPGRADE(targ, SVt_PV);
SvFLAGS(targ) &= ~(SVf_OK|SVf_IVisUV|SVf_UTF8);
SvFLAGS(targ) |= (SVf_POK|SVp_POK|dst_utf8);
phase3:
/* --------------------------------------------------------------
* Phase 3:
*
* UTF-8 tweaks and grow targ:
*
* Now that we know the length and utf8-ness of both the targ and
* args, grow targ to the size needed to accumulate all the args, based
* on whether targ appears on the RHS, whether we're appending, and
* whether any non-utf8 args expand in size if converted to utf8.
*
* For the latter, if dst_utf8 we scan non-utf8 args looking for
* variant chars, and adjust the svpv->len value of those args to the
* utf8 size and negate it to flag them. At the same time we un-negate
* the lens of any utf8 args since after this phase we no longer care
* whether an arg is utf8 or not.
*
* Finally, initialise const_lens and const_pv based on utf8ness.
* Note that there are 3 permutations:
*
* * If the constant string is invariant whether utf8 or not (e.g. "abc"),
* then aux[PERL_MULTICONCAT_IX_PLAIN_PV/LEN] are the same as
* aux[PERL_MULTICONCAT_IX_UTF8_PV/LEN] and there is one set of
* segment lengths.
*
* * If the string is fully utf8, e.g. "\x{100}", then
* aux[PERL_MULTICONCAT_IX_PLAIN_PV/LEN] == (NULL,0) and there is
* one set of segment lengths.
*
* * If the string has different plain and utf8 representations
* (e.g. "\x80"), then aux[PERL_MULTICONCAT_IX_PLAIN_PV/LEN]]
* holds the plain rep, while aux[PERL_MULTICONCAT_IX_UTF8_PV/LEN]
* holds the utf8 rep, and there are 2 sets of segment lengths,
* with the utf8 set following after the plain set.
*
* On entry to this section the (pv,len) pairs in svpv_buf have the
* following meanings:
* (pv, len) a plain string
* (pv, -len) a utf8 string
* (NULL, 0) left-most targ \ linked together R-to-L
* (next, 0) other targ / in targ_chain
*/
/* turn off utf8 handling if 'use bytes' is in scope */
if (UNLIKELY(dst_utf8 && IN_BYTES)) {
dst_utf8 = 0;
SvUTF8_off(targ);
/* undo all the negative lengths which flag utf8-ness */
for (svpv_p = svpv_buf; svpv_p < svpv_end; svpv_p++) {
SSize_t len = svpv_p->len;
if (len < 0)
svpv_p->len = -len;
}
}
/* grow += total of lengths of constant string segments */
{
SSize_t len;
len = aux[dst_utf8 ? PERL_MULTICONCAT_IX_UTF8_LEN
: PERL_MULTICONCAT_IX_PLAIN_LEN].ssize;
slow_concat = cBOOL(len);
grow += len;
}
const_lens = aux + PERL_MULTICONCAT_IX_LENGTHS;
if (dst_utf8) {
const_pv = aux[PERL_MULTICONCAT_IX_UTF8_PV].pv;
if ( aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv
&& const_pv != aux[PERL_MULTICONCAT_IX_PLAIN_PV].pv)
/* separate sets of lengths for plain and utf8 */
const_lens += nargs + 1;
/* If the result is utf8 but some of the args aren't,
* calculate how much extra growth is needed for all the chars
* which will expand to two utf8 bytes.
* Also, if the growth is non-zero, negate the length to indicate
* that this is a variant string. Conversely, un-negate the
* length on utf8 args (which was only needed to flag non-utf8
* args in this loop */
for (svpv_p = svpv_buf; svpv_p < svpv_end; svpv_p++) {
SSize_t len, extra;
len = svpv_p->len;
if (len <= 0) {
svpv_p->len = -len;
continue;
}
extra = variant_under_utf8_count((U8 *) svpv_p->pv,
(U8 *) svpv_p->pv + len);
if (UNLIKELY(extra)) {
grow += extra;
/* -ve len indicates special handling */
svpv_p->len = -(len + extra);
slow_concat = TRUE;
}
}
}