Skip to content

Commit cf666b4

Browse files
committed
optimize vm allocation of function arguments
1 parent 1c09e5e commit cf666b4

File tree

1 file changed

+92
-27
lines changed

1 file changed

+92
-27
lines changed

vm/vm.go

Lines changed: 92 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) {
8383
vm.memory = 0
8484
vm.ip = 0
8585

86+
var fnArgsBuf []any
87+
8688
for vm.ip < len(program.Bytecode) {
8789
if debug && vm.debug {
8890
<-vm.step
@@ -355,62 +357,47 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) {
355357
vm.push(out)
356358

357359
case OpCall1:
358-
a := vm.pop()
359-
out, err := program.functions[arg](a)
360+
args := vm.getArgsForFunc(&fnArgsBuf, program, 1)
361+
out, err := program.functions[arg](args...)
360362
if err != nil {
361363
panic(err)
362364
}
363365
vm.push(out)
364366

365367
case OpCall2:
366-
b := vm.pop()
367-
a := vm.pop()
368-
out, err := program.functions[arg](a, b)
368+
args := vm.getArgsForFunc(&fnArgsBuf, program, 2)
369+
out, err := program.functions[arg](args...)
369370
if err != nil {
370371
panic(err)
371372
}
372373
vm.push(out)
373374

374375
case OpCall3:
375-
c := vm.pop()
376-
b := vm.pop()
377-
a := vm.pop()
378-
out, err := program.functions[arg](a, b, c)
376+
args := vm.getArgsForFunc(&fnArgsBuf, program, 3)
377+
out, err := program.functions[arg](args...)
379378
if err != nil {
380379
panic(err)
381380
}
382381
vm.push(out)
383382

384383
case OpCallN:
385384
fn := vm.pop().(Function)
386-
size := arg
387-
in := make([]any, size)
388-
for i := int(size) - 1; i >= 0; i-- {
389-
in[i] = vm.pop()
390-
}
391-
out, err := fn(in...)
385+
args := vm.getArgsForFunc(&fnArgsBuf, program, arg)
386+
out, err := fn(args...)
392387
if err != nil {
393388
panic(err)
394389
}
395390
vm.push(out)
396391

397392
case OpCallFast:
398393
fn := vm.pop().(func(...any) any)
399-
size := arg
400-
in := make([]any, size)
401-
for i := int(size) - 1; i >= 0; i-- {
402-
in[i] = vm.pop()
403-
}
404-
vm.push(fn(in...))
394+
args := vm.getArgsForFunc(&fnArgsBuf, program, arg)
395+
vm.push(fn(args...))
405396

406397
case OpCallSafe:
407398
fn := vm.pop().(SafeFunction)
408-
size := arg
409-
in := make([]any, size)
410-
for i := int(size) - 1; i >= 0; i-- {
411-
in[i] = vm.pop()
412-
}
413-
out, mem, err := fn(in...)
399+
args := vm.getArgsForFunc(&fnArgsBuf, program, arg)
400+
out, mem, err := fn(args...)
414401
if err != nil {
415402
panic(err)
416403
}
@@ -609,6 +596,56 @@ func (vm *VM) scope() *Scope {
609596
return vm.Scopes[len(vm.Scopes)-1]
610597
}
611598

599+
// getArgsForFunc lazily initializes the buffer the first time it is called for
600+
// a given program (thus, it also needs "program" to run). It will
601+
// take "needed" elements from the buffer and populate them with vm.pop() in
602+
// reverse order. Because the estimation can fall short, this function can
603+
// occasionally make a new allocation.
604+
func (vm *VM) getArgsForFunc(bufPtr *[]any, program *Program, needed int) []any {
605+
// Step 1: fix estimations and preallocate
606+
if *bufPtr == nil {
607+
estimatedFnArgsCount := estimateFnArgsCount(program)
608+
if estimatedFnArgsCount < needed {
609+
// in the case that the first call is for example OpCallN with a large
610+
// number of arguments, then make sure we will be able to serve them at
611+
// least.
612+
estimatedFnArgsCount = needed
613+
}
614+
615+
// in the case that we are preparing the arguments for the first
616+
// function call of the program, then *bufPtr will be nil, so we
617+
// initialize it. We delay this initial allocation here because a
618+
// program could have many function calls but exit earlier than the
619+
// first call, so in that case we avoid allocating unnecessarily
620+
*bufPtr = make([]any, estimatedFnArgsCount)
621+
}
622+
623+
// Step 2: get the final slice that will be returned
624+
var buf []any
625+
if len(*bufPtr) >= needed {
626+
// in this case, we are successfully using the single preallocation. We
627+
// use the full slice expression [low : high : max] because in that way
628+
// a function that receives this slice as variadic arguments will not be
629+
// able to make modifications to contiguous elements with append(). If
630+
// they call append on their variadic arguments they will make a new
631+
// allocation.
632+
buf = (*bufPtr)[:needed:needed]
633+
*bufPtr = (*bufPtr)[needed:] // advance the buffer
634+
} else {
635+
// if we have been making calls to something like OpCallN with many more
636+
// arguments than what we estimated, then we will need to allocate
637+
// separately
638+
buf = make([]any, needed)
639+
}
640+
641+
// Step 3: populate the final slice bulk copying from the stack. This is the
642+
// exact order and copy() is a highly optimized operation
643+
copy(buf, vm.Stack[len(vm.Stack)-needed:])
644+
vm.Stack = vm.Stack[:len(vm.Stack)-needed]
645+
646+
return buf
647+
}
648+
612649
func (vm *VM) Step() {
613650
vm.step <- struct{}{}
614651
}
@@ -623,3 +660,31 @@ func clearSlice[S ~[]E, E any](s S) {
623660
s[i] = zero // clear mem, optimized by the compiler, in Go 1.21 the "clear" builtin can be used
624661
}
625662
}
663+
664+
// estimateFnArgsCount inspects a *Program and estimates how many function
665+
// arguments will be required to run it.
666+
func estimateFnArgsCount(program *Program) int {
667+
// Implementation note: a program will not necessarily go through all
668+
// operations, but this is just an estimation
669+
var count int
670+
for _, op := range program.Bytecode {
671+
switch op {
672+
case OpCall1:
673+
count++
674+
case OpCall2:
675+
count += 2
676+
case OpCall3:
677+
count += 3
678+
case OpCallN:
679+
// we don't know exactly but we know at least 4, so be conservative
680+
// as this is only an optimization and we also want to avoid
681+
// excessive preallocation
682+
count += 4
683+
case OpCallFast, OpCallSafe:
684+
// here we don't know either, but we can guess it could be common to
685+
// receive up to 3 arguments in a function
686+
count += 3
687+
}
688+
}
689+
return count
690+
}

0 commit comments

Comments
 (0)