Speed up calling iseq bmethods

Currently, bmethod arguments are copied from the VM stack to the
C stack in vm_call_bmethod, then copied from the C stack to the VM
stack later in invoke_iseq_block_from_c.  This is inefficient.

This adds vm_call_iseq_bmethod and vm_call_noniseq_bmethod.
vm_call_iseq_bmethod is an optimized method that skips stack
copies (though there is one copy to remove the receiver from
the stack), and avoids calling vm_call_bmethod_body,
rb_vm_invoke_bmethod, invoke_block_from_c_proc,
invoke_iseq_block_from_c, and vm_yield_setup_args.

Th vm_call_iseq_bmethod argument handling is similar to the
way normal iseq methods are called, and allows for similar
performance optimizations when using splats or keywords.
However, even in the no argument case it's still significantly
faster.

A benchmark is added for bmethod calling.  In my environment,
it improves bmethod calling performance by 38-59% for simple
bmethod calls, and up to 180% for bmethod calls passing
literal keywords on both sides.

```

./miniruby-iseq-bmethod:  18159792.6 i/s
          ./miniruby-m:  13174419.1 i/s - 1.38x  slower

                   bmethod_simple_1
./miniruby-iseq-bmethod:  15890745.4 i/s
          ./miniruby-m:  10008972.7 i/s - 1.59x  slower

             bmethod_simple_0_splat
./miniruby-iseq-bmethod:  13142804.3 i/s
          ./miniruby-m:  11168595.2 i/s - 1.18x  slower

             bmethod_simple_1_splat
./miniruby-iseq-bmethod:  12375791.0 i/s
          ./miniruby-m:   8491140.1 i/s - 1.46x  slower

                   bmethod_no_splat
./miniruby-iseq-bmethod:  10151258.8 i/s
          ./miniruby-m:   8716664.1 i/s - 1.16x  slower

                    bmethod_0_splat
./miniruby-iseq-bmethod:   8138802.5 i/s
          ./miniruby-m:   7515600.2 i/s - 1.08x  slower

                    bmethod_1_splat
./miniruby-iseq-bmethod:   8028372.7 i/s
          ./miniruby-m:   5947658.6 i/s - 1.35x  slower

                   bmethod_10_splat
./miniruby-iseq-bmethod:   6953514.1 i/s
          ./miniruby-m:   4840132.9 i/s - 1.44x  slower

                  bmethod_100_splat
./miniruby-iseq-bmethod:   5287288.4 i/s
          ./miniruby-m:   2243218.4 i/s - 2.36x  slower

                         bmethod_kw
./miniruby-iseq-bmethod:   8931358.2 i/s
          ./miniruby-m:   3185818.6 i/s - 2.80x  slower

                      bmethod_no_kw
./miniruby-iseq-bmethod:  12281287.4 i/s
          ./miniruby-m:  10041727.9 i/s - 1.22x  slower

                   bmethod_kw_splat
./miniruby-iseq-bmethod:   5618956.8 i/s
          ./miniruby-m:   3657549.5 i/s - 1.54x  slower
```
This commit is contained in:
Jeremy Evans 2023-03-23 14:39:31 -07:00
parent 99c6d19e50
commit f6254f77f7
Notes: git 2023-04-25 15:06:42 +00:00
3 changed files with 124 additions and 5 deletions

View File

@ -0,0 +1,37 @@
prelude: |
define_method(:a0){}
define_method(:a1){|a| a}
define_method(:s){|*a| a}
define_method(:b){|kw: 1| kw}
t0 = 0.times.to_a
t1 = 1.times.to_a
t10 = 10.times.to_a
t100 = 100.times.to_a
kw = {kw: 2}
benchmark:
bmethod_simple_0: |
a0
bmethod_simple_1: |
a1(1)
bmethod_simple_0_splat: |
a0(*t0)
bmethod_simple_1_splat: |
a1(*t1)
bmethod_no_splat: |
s
bmethod_0_splat: |
s(*t0)
bmethod_1_splat: |
s(*t1)
bmethod_10_splat: |
s(*t10)
bmethod_100_splat: |
s(*t100)
bmethod_kw: |
b(kw: 1)
bmethod_no_kw: |
b
bmethod_kw_splat: |
b(**kw)
loop_count: 6000000

View File

@ -100,6 +100,19 @@ class TestCall < Test::Unit::TestCase
}
end
def test_call_bmethod_proc
pr = proc{|sym| sym}
define_singleton_method(:a, &pr)
ary = [10]
assert_equal(10, a(*ary))
pr = proc{|*sym| sym}
define_singleton_method(:a, &pr)
ary = [10]
assert_equal([10], a(*ary))
assert_equal([10], a(10))
end
def test_call_splat_order
bug12860 = '[ruby-core:77701] [Bug# 12860]'
ary = [1, 2]

View File

@ -3555,16 +3555,61 @@ vm_call_bmethod_body(rb_execution_context_t *ec, struct rb_calling_info *calling
return val;
}
static int vm_callee_setup_block_arg(rb_execution_context_t *ec, struct rb_calling_info *calling, const struct rb_callinfo *ci, const rb_iseq_t *iseq, VALUE *argv, const enum arg_setup_type arg_setup_type);
static VALUE invoke_bmethod(rb_execution_context_t *ec, const rb_iseq_t *iseq, VALUE self, const struct rb_captured_block *captured, const rb_callable_method_entry_t *me, VALUE type, int opt_pc);
static VALUE
vm_call_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
vm_call_iseq_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
{
RB_DEBUG_COUNTER_INC(ccf_bmethod);
RB_DEBUG_COUNTER_INC(ccf_iseq_bmethod);
const struct rb_callcache *cc = calling->cc;
const rb_callable_method_entry_t *cme = vm_cc_cme(cc);
VALUE procv = cme->def->body.bmethod.proc;
if (!RB_OBJ_SHAREABLE_P(procv) &&
cme->def->body.bmethod.defined_ractor != rb_ractor_self(rb_ec_ractor_ptr(ec))) {
rb_raise(rb_eRuntimeError, "defined with an un-shareable Proc in a different Ractor");
}
rb_proc_t *proc;
GetProcPtr(procv, proc);
const struct rb_block *block = &proc->block;
while (vm_block_type(block) == block_type_proc) {
block = vm_proc_block(block->as.proc);
}
VM_ASSERT(vm_block_type(block) == block_type_iseq);
const struct rb_captured_block *captured = &block->as.captured;
const rb_iseq_t *iseq = rb_iseq_check(captured->code.iseq);
int i, opt_pc;
VALUE *sp = cfp->sp - calling->argc - 1;
for (i = 0; i < calling->argc; i++) {
sp[i] = sp[i+1];
}
if (vm_ci_flag(calling->ci) & VM_CALL_ARGS_SIMPLE) {
opt_pc = vm_callee_setup_block_arg(ec, calling, calling->ci, iseq, sp, arg_setup_method);
}
else {
opt_pc = setup_parameters_complex(ec, iseq, calling, calling->ci, sp, arg_setup_method);
}
cfp->sp = sp;
return invoke_bmethod(ec, iseq, calling->recv, captured, cme,
VM_FRAME_MAGIC_BLOCK | VM_FRAME_FLAG_LAMBDA, opt_pc);
}
static VALUE
vm_call_noniseq_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
{
RB_DEBUG_COUNTER_INC(ccf_noniseq_bmethod);
VALUE *argv;
int argc;
const struct rb_callinfo *ci = calling->ci;
CALLER_SETUP_ARG(cfp, calling, ci, ALLOW_HEAP_ARGV);
CALLER_SETUP_ARG(cfp, calling, calling->ci, ALLOW_HEAP_ARGV);
if (UNLIKELY(calling->heap_argv)) {
argv = RARRAY_PTR(calling->heap_argv);
cfp->sp -= 2;
@ -3579,6 +3624,30 @@ vm_call_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_c
return vm_call_bmethod_body(ec, calling, argv);
}
static VALUE
vm_call_bmethod(rb_execution_context_t *ec, rb_control_frame_t *cfp, struct rb_calling_info *calling)
{
RB_DEBUG_COUNTER_INC(ccf_bmethod);
const struct rb_callcache *cc = calling->cc;
const rb_callable_method_entry_t *cme = vm_cc_cme(cc);
VALUE procv = cme->def->body.bmethod.proc;
rb_proc_t *proc;
GetProcPtr(procv, proc);
const struct rb_block *block = &proc->block;
while (vm_block_type(block) == block_type_proc) {
block = vm_proc_block(block->as.proc);
}
if (vm_block_type(block) == block_type_iseq) {
CC_SET_FASTPATH(cc, vm_call_iseq_bmethod, TRUE);
return vm_call_iseq_bmethod(ec, cfp, calling);
}
CC_SET_FASTPATH(cc, vm_call_noniseq_bmethod, TRUE);
return vm_call_noniseq_bmethod(ec, cfp, calling);
}
VALUE
rb_find_defined_class_by_owner(VALUE current_class, VALUE target_owner)
{