@@ -83,6 +83,8 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) {
8383 vm .memory = 0
8484 vm .ip = 0
8585
86+ var fnArgsBuf []any
87+
8688 for vm .ip < len (program .Bytecode ) {
8789 if debug && vm .debug {
8890 <- vm .step
@@ -355,62 +357,47 @@ func (vm *VM) Run(program *Program, env any) (_ any, err error) {
355357 vm .push (out )
356358
357359 case OpCall1 :
358- a := vm .pop ( )
359- out , err := program.functions [arg ](a )
360+ args := vm .getArgsForFunc ( & fnArgsBuf , program , 1 )
361+ out , err := program .functions [arg ](args ... )
360362 if err != nil {
361363 panic (err )
362364 }
363365 vm .push (out )
364366
365367 case OpCall2 :
366- b := vm .pop ()
367- a := vm .pop ()
368- out , err := program .functions [arg ](a , b )
368+ args := vm .getArgsForFunc (& fnArgsBuf , program , 2 )
369+ out , err := program .functions [arg ](args ... )
369370 if err != nil {
370371 panic (err )
371372 }
372373 vm .push (out )
373374
374375 case OpCall3 :
375- c := vm .pop ()
376- b := vm .pop ()
377- a := vm .pop ()
378- out , err := program .functions [arg ](a , b , c )
376+ args := vm .getArgsForFunc (& fnArgsBuf , program , 3 )
377+ out , err := program .functions [arg ](args ... )
379378 if err != nil {
380379 panic (err )
381380 }
382381 vm .push (out )
383382
384383 case OpCallN :
385384 fn := vm .pop ().(Function )
386- size := arg
387- in := make ([]any , size )
388- for i := int (size ) - 1 ; i >= 0 ; i -- {
389- in [i ] = vm .pop ()
390- }
391- out , err := fn (in ... )
385+ args := vm .getArgsForFunc (& fnArgsBuf , program , arg )
386+ out , err := fn (args ... )
392387 if err != nil {
393388 panic (err )
394389 }
395390 vm .push (out )
396391
397392 case OpCallFast :
398393 fn := vm .pop ().(func (... any ) any )
399- size := arg
400- in := make ([]any , size )
401- for i := int (size ) - 1 ; i >= 0 ; i -- {
402- in [i ] = vm .pop ()
403- }
404- vm .push (fn (in ... ))
394+ args := vm .getArgsForFunc (& fnArgsBuf , program , arg )
395+ vm .push (fn (args ... ))
405396
406397 case OpCallSafe :
407398 fn := vm .pop ().(SafeFunction )
408- size := arg
409- in := make ([]any , size )
410- for i := int (size ) - 1 ; i >= 0 ; i -- {
411- in [i ] = vm .pop ()
412- }
413- out , mem , err := fn (in ... )
399+ args := vm .getArgsForFunc (& fnArgsBuf , program , arg )
400+ out , mem , err := fn (args ... )
414401 if err != nil {
415402 panic (err )
416403 }
@@ -609,6 +596,56 @@ func (vm *VM) scope() *Scope {
609596 return vm .Scopes [len (vm .Scopes )- 1 ]
610597}
611598
599+ // getArgsForFunc lazily initializes the buffer the first time it is called for
600+ // a given program (thus, it also needs "program" to run). It will
601+ // take "needed" elements from the buffer and populate them with vm.pop() in
602+ // reverse order. Because the estimation can fall short, this function can
603+ // occasionally make a new allocation.
604+ func (vm * VM ) getArgsForFunc (bufPtr * []any , program * Program , needed int ) []any {
605+ // Step 1: fix estimations and preallocate
606+ if * bufPtr == nil {
607+ estimatedFnArgsCount := estimateFnArgsCount (program )
608+ if estimatedFnArgsCount < needed {
609+ // in the case that the first call is for example OpCallN with a large
610+ // number of arguments, then make sure we will be able to serve them at
611+ // least.
612+ estimatedFnArgsCount = needed
613+ }
614+
615+ // in the case that we are preparing the arguments for the first
616+ // function call of the program, then *bufPtr will be nil, so we
617+ // initialize it. We delay this initial allocation here because a
618+ // program could have many function calls but exit earlier than the
619+ // first call, so in that case we avoid allocating unnecessarily
620+ * bufPtr = make ([]any , estimatedFnArgsCount )
621+ }
622+
623+ // Step 2: get the final slice that will be returned
624+ var buf []any
625+ if len (* bufPtr ) >= needed {
626+ // in this case, we are successfully using the single preallocation. We
627+ // use the full slice expression [low : high : max] because in that way
628+ // a function that receives this slice as variadic arguments will not be
629+ // able to make modifications to contiguous elements with append(). If
630+ // they call append on their variadic arguments they will make a new
631+ // allocation.
632+ buf = (* bufPtr )[:needed :needed ]
633+ * bufPtr = (* bufPtr )[needed :] // advance the buffer
634+ } else {
635+ // if we have been making calls to something like OpCallN with many more
636+ // arguments than what we estimated, then we will need to allocate
637+ // separately
638+ buf = make ([]any , needed )
639+ }
640+
641+ // Step 3: populate the final slice bulk copying from the stack. This is the
642+ // exact order and copy() is a highly optimized operation
643+ copy (buf , vm .Stack [len (vm .Stack )- needed :])
644+ vm .Stack = vm .Stack [:len (vm .Stack )- needed ]
645+
646+ return buf
647+ }
648+
612649func (vm * VM ) Step () {
613650 vm .step <- struct {}{}
614651}
@@ -623,3 +660,31 @@ func clearSlice[S ~[]E, E any](s S) {
623660 s [i ] = zero // clear mem, optimized by the compiler, in Go 1.21 the "clear" builtin can be used
624661 }
625662}
663+
664+ // estimateFnArgsCount inspects a *Program and estimates how many function
665+ // arguments will be required to run it.
666+ func estimateFnArgsCount (program * Program ) int {
667+ // Implementation note: a program will not necessarily go through all
668+ // operations, but this is just an estimation
669+ var count int
670+ for _ , op := range program .Bytecode {
671+ switch op {
672+ case OpCall1 :
673+ count ++
674+ case OpCall2 :
675+ count += 2
676+ case OpCall3 :
677+ count += 3
678+ case OpCallN :
679+ // we don't know exactly but we know at least 4, so be conservative
680+ // as this is only an optimization and we also want to avoid
681+ // excessive preallocation
682+ count += 4
683+ case OpCallFast , OpCallSafe :
684+ // here we don't know either, but we can guess it could be common to
685+ // receive up to 3 arguments in a function
686+ count += 3
687+ }
688+ }
689+ return count
690+ }
0 commit comments