@@ -83,6 +83,8 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) {
83
83
vm .memory = 0
84
84
vm .ip = 0
85
85
86
+ var fnArgsBuf []any
87
+
86
88
for vm .ip < len (program .Bytecode ) {
87
89
if debug && vm .debug {
88
90
<- vm .step
@@ -355,62 +357,47 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) {
355
357
vm .push (out )
356
358
357
359
case OpCall1 :
358
- a := vm .pop ( )
359
- out , err := program.functions [arg ](a )
360
+ args := vm .getArgsForFunc ( & fnArgsBuf , program , 1 )
361
+ out , err := program .functions [arg ](args ... )
360
362
if err != nil {
361
363
panic (err )
362
364
}
363
365
vm .push (out )
364
366
365
367
case OpCall2 :
366
- b := vm .pop ()
367
- a := vm .pop ()
368
- out , err := program .functions [arg ](a , b )
368
+ args := vm .getArgsForFunc (& fnArgsBuf , program , 2 )
369
+ out , err := program .functions [arg ](args ... )
369
370
if err != nil {
370
371
panic (err )
371
372
}
372
373
vm .push (out )
373
374
374
375
case OpCall3 :
375
- c := vm .pop ()
376
- b := vm .pop ()
377
- a := vm .pop ()
378
- out , err := program .functions [arg ](a , b , c )
376
+ args := vm .getArgsForFunc (& fnArgsBuf , program , 3 )
377
+ out , err := program .functions [arg ](args ... )
379
378
if err != nil {
380
379
panic (err )
381
380
}
382
381
vm .push (out )
383
382
384
383
case OpCallN :
385
384
fn := vm .pop ().(Function )
386
- size := arg
387
- in := make ([]any , size )
388
- for i := int (size ) - 1 ; i >= 0 ; i -- {
389
- in [i ] = vm .pop ()
390
- }
391
- out , err := fn (in ... )
385
+ args := vm .getArgsForFunc (& fnArgsBuf , program , arg )
386
+ out , err := fn (args ... )
392
387
if err != nil {
393
388
panic (err )
394
389
}
395
390
vm .push (out )
396
391
397
392
case OpCallFast :
398
393
fn := vm .pop ().(func (... any ) any )
399
- size := arg
400
- in := make ([]any , size )
401
- for i := int (size ) - 1 ; i >= 0 ; i -- {
402
- in [i ] = vm .pop ()
403
- }
404
- vm .push (fn (in ... ))
394
+ args := vm .getArgsForFunc (& fnArgsBuf , program , arg )
395
+ vm .push (fn (args ... ))
405
396
406
397
case OpCallSafe :
407
398
fn := vm .pop ().(SafeFunction )
408
- size := arg
409
- in := make ([]any , size )
410
- for i := int (size ) - 1 ; i >= 0 ; i -- {
411
- in [i ] = vm .pop ()
412
- }
413
- out , mem , err := fn (in ... )
399
+ args := vm .getArgsForFunc (& fnArgsBuf , program , arg )
400
+ out , mem , err := fn (args ... )
414
401
if err != nil {
415
402
panic (err )
416
403
}
@@ -609,6 +596,56 @@ func (vm *VM) scope() *Scope {
609
596
return vm .Scopes [len (vm .Scopes )- 1 ]
610
597
}
611
598
599
+ // getArgsForFunc lazily initializes the buffer the first time it is called for
600
+ // a given program (thus, it also needs "program" to run). It will
601
+ // take "needed" elements from the buffer and populate them with vm.pop() in
602
+ // reverse order. Because the estimation can fall short, this function can
603
+ // occasionally make a new allocation.
604
+ func (vm * VM ) getArgsForFunc (bufPtr * []any , program * Program , needed int ) []any {
605
+ // Step 1: fix estimations and preallocate
606
+ if * bufPtr == nil {
607
+ estimatedFnArgsCount := estimateFnArgsCount (program )
608
+ if estimatedFnArgsCount < needed {
609
+ // in the case that the first call is for example OpCallN with a large
610
+ // number of arguments, then make sure we will be able to serve them at
611
+ // least.
612
+ estimatedFnArgsCount = needed
613
+ }
614
+
615
+ // in the case that we are preparing the arguments for the first
616
+ // function call of the program, then *bufPtr will be nil, so we
617
+ // initialize it. We delay this initial allocation here because a
618
+ // program could have many function calls but exit earlier than the
619
+ // first call, so in that case we avoid allocating unnecessarily
620
+ * bufPtr = make ([]any , estimatedFnArgsCount )
621
+ }
622
+
623
+ // Step 2: get the final slice that will be returned
624
+ var buf []any
625
+ if len (* bufPtr ) >= needed {
626
+ // in this case, we are successfully using the single preallocation. We
627
+ // use the full slice expression [low : high : max] because in that way
628
+ // a function that receives this slice as variadic arguments will not be
629
+ // able to make modifications to contiguous elements with append(). If
630
+ // they call append on their variadic arguments they will make a new
631
+ // allocation.
632
+ buf = (* bufPtr )[:needed :needed ]
633
+ * bufPtr = (* bufPtr )[needed :] // advance the buffer
634
+ } else {
635
+ // if we have been making calls to something like OpCallN with many more
636
+ // arguments than what we estimated, then we will need to allocate
637
+ // separately
638
+ buf = make ([]any , needed )
639
+ }
640
+
641
+ // Step 3: populate the final slice bulk copying from the stack. This is the
642
+ // exact order and copy() is a highly optimized operation
643
+ copy (buf , vm .Stack [len (vm .Stack )- needed :])
644
+ vm .Stack = vm .Stack [:len (vm .Stack )- needed ]
645
+
646
+ return buf
647
+ }
648
+
612
649
func (vm * VM ) Step () {
613
650
vm .step <- struct {}{}
614
651
}
@@ -623,3 +660,31 @@ func clearSlice[S ~[]E, E any](s S) {
623
660
s [i ] = zero // clear mem, optimized by the compiler, in Go 1.21 the "clear" builtin can be used
624
661
}
625
662
}
663
+
664
+ // estimateFnArgsCount inspects a *Program and estimates how many function
665
+ // arguments will be required to run it.
666
+ func estimateFnArgsCount (program * Program ) int {
667
+ // Implementation note: a program will not necessarily go through all
668
+ // operations, but this is just an estimation
669
+ var count int
670
+ for _ , op := range program .Bytecode {
671
+ switch op {
672
+ case OpCall1 :
673
+ count ++
674
+ case OpCall2 :
675
+ count += 2
676
+ case OpCall3 :
677
+ count += 3
678
+ case OpCallN :
679
+ // we don't know exactly but we know at least 4, so be conservative
680
+ // as this is only an optimization and we also want to avoid
681
+ // excessive preallocation
682
+ count += 4
683
+ case OpCallFast , OpCallSafe :
684
+ // here we don't know either, but we can guess it could be common to
685
+ // receive up to 3 arguments in a function
686
+ count += 3
687
+ }
688
+ }
689
+ return count
690
+ }
0 commit comments