WIP JIT-to-JIT returns

This commit is contained in:
Maxime Chevalier-Boisvert 2021-02-09 16:24:06 -05:00 committed by Alan Wu
parent 6341fc21b2
commit 9d8cc01b75
6 changed files with 91 additions and 20 deletions

View File

@ -114,7 +114,7 @@ Compile an interpreter entry block to be inserted into an iseq
Returns `NULL` if compilation fails. Returns `NULL` if compilation fails.
*/ */
uint8_t* uint8_t*
ujit_entry_prologue() ujit_entry_prologue(void)
{ {
RUBY_ASSERT(cb != NULL); RUBY_ASSERT(cb != NULL);
@ -248,9 +248,9 @@ gen_dup(jitstate_t* jit, ctx_t* ctx)
x86opnd_t dup_val = ctx_stack_pop(ctx, 1); x86opnd_t dup_val = ctx_stack_pop(ctx, 1);
x86opnd_t loc0 = ctx_stack_push(ctx, T_NONE); x86opnd_t loc0 = ctx_stack_push(ctx, T_NONE);
x86opnd_t loc1 = ctx_stack_push(ctx, T_NONE); x86opnd_t loc1 = ctx_stack_push(ctx, T_NONE);
mov(cb, RAX, dup_val); mov(cb, REG0, dup_val);
mov(cb, loc0, RAX); mov(cb, loc0, REG0);
mov(cb, loc1, RAX); mov(cb, loc1, REG0);
return true; return true;
} }
@ -1191,6 +1191,23 @@ gen_opt_swb_cfunc(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const r
bool rb_simple_iseq_p(const rb_iseq_t *iseq); bool rb_simple_iseq_p(const rb_iseq_t *iseq);
void
gen_return_branch(codeblock_t* cb, uint8_t* target0, uint8_t* target1, uint8_t shape)
{
switch (shape)
{
case SHAPE_NEXT0:
case SHAPE_NEXT1:
RUBY_ASSERT(false);
break;
case SHAPE_DEFAULT:
mov(cb, REG0, const_ptr_opnd(target0));
mov(cb, member_opnd(REG_CFP, rb_control_frame_t, jit_return), REG0);
break;
}
}
static bool static bool
gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_callable_method_entry_t *cme, int32_t argc) gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb_callable_method_entry_t *cme, int32_t argc)
{ {
@ -1251,13 +1268,32 @@ gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb
cmp(cb, klass_opnd, REG1); cmp(cb, klass_opnd, REG1);
jne_ptr(cb, side_exit); jne_ptr(cb, side_exit);
// Store incremented PC into current control frame in case callee raises. // Store the updated SP on the current frame (pop arguments and receiver)
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1)));
mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
// Store the next PC i the current frame
mov(cb, REG0, const_ptr_opnd(jit->pc + insn_len(BIN(opt_send_without_block)))); mov(cb, REG0, const_ptr_opnd(jit->pc + insn_len(BIN(opt_send_without_block))));
mov(cb, mem_opnd(64, REG_CFP, offsetof(rb_control_frame_t, pc)), REG0); mov(cb, mem_opnd(64, REG_CFP, offsetof(rb_control_frame_t, pc)), REG0);
// Store the updated SP on the CFP (pop arguments and receiver) // Stub so we can return to JITted code
lea(cb, REG0, ctx_sp_opnd(ctx, sizeof(VALUE) * -(argc + 1))); blockid_t return_block = { jit->iseq, jit_next_insn_idx(jit) };
mov(cb, member_opnd(REG_CFP, rb_control_frame_t, sp), REG0);
// Pop arguments and receiver in return context, push the return value
// After the return, the JIT and interpreter SP will match up
ctx_t return_ctx = *ctx;
ctx_stack_pop(&return_ctx, argc);
return_ctx.sp_offset = 0;
// Write the JIT return address on the current frame
gen_branch(
ctx,
return_block,
&return_ctx,
return_block,
&return_ctx,
gen_return_branch
);
// Stack overflow check // Stack overflow check
// #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin) // #define CHECK_VM_STACK_OVERFLOW0(cfp, sp, margin)
@ -1327,7 +1363,6 @@ gen_opt_swb_iseq(jitstate_t* jit, ctx_t* ctx, struct rb_call_data * cd, const rb
&DEFAULT_CTX, &DEFAULT_CTX,
(blockid_t){ iseq, 0 } (blockid_t){ iseq, 0 }
); );
// TODO: create stub for call continuation // TODO: create stub for call continuation
@ -1432,7 +1467,31 @@ gen_leave(jitstate_t* jit, ctx_t* ctx)
mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp)); mov(cb, REG_SP, member_opnd(REG_CFP, rb_control_frame_t, sp));
mov(cb, mem_opnd(64, REG_SP, -SIZEOF_VALUE), REG0); mov(cb, mem_opnd(64, REG_SP, -SIZEOF_VALUE), REG0);
// Write the post call bytes
// Load the JIT return address
mov(cb, REG0, member_opnd(REG_CFP, rb_control_frame_t, jit_return));
// If the return address is NULL, fall back to the interpreter
int FALLBACK_LABEL = cb_new_label(cb, "FALLBACK");
cmp(cb, REG0, imm_opnd(0));
jz(cb, FALLBACK_LABEL);
// Jump to the JIT return address
jmp_rm(cb, REG0);
// Fall back to the interpreter
cb_write_label(cb, FALLBACK_LABEL);
cb_link_labels(cb);
cb_write_post_call_bytes(cb); cb_write_post_call_bytes(cb);
return true; return true;

View File

@ -32,7 +32,7 @@ Get an operand for the adjusted stack pointer address
x86opnd_t x86opnd_t
ctx_sp_opnd(ctx_t* ctx, int32_t offset_bytes) ctx_sp_opnd(ctx_t* ctx, int32_t offset_bytes)
{ {
int32_t offset = (ctx->stack_size) * sizeof(VALUE) + offset_bytes; int32_t offset = (ctx->sp_offset * sizeof(VALUE)) + offset_bytes;
return mem_opnd(64, REG_SP, offset); return mem_opnd(64, REG_SP, offset);
} }
@ -49,9 +49,10 @@ ctx_stack_push(ctx_t* ctx, int type)
ctx->temp_types[ctx->stack_size] = type; ctx->temp_types[ctx->stack_size] = type;
ctx->stack_size += 1; ctx->stack_size += 1;
ctx->sp_offset += 1;
// SP points just above the topmost value // SP points just above the topmost value
int32_t offset = (ctx->stack_size - 1) * sizeof(VALUE); int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE);
return mem_opnd(64, REG_SP, offset); return mem_opnd(64, REG_SP, offset);
} }
@ -65,7 +66,7 @@ ctx_stack_pop(ctx_t* ctx, size_t n)
RUBY_ASSERT(n <= ctx->stack_size); RUBY_ASSERT(n <= ctx->stack_size);
// SP points just above the topmost value // SP points just above the topmost value
int32_t offset = (ctx->stack_size - 1) * sizeof(VALUE); int32_t offset = (ctx->sp_offset - 1) * sizeof(VALUE);
x86opnd_t top = mem_opnd(64, REG_SP, offset); x86opnd_t top = mem_opnd(64, REG_SP, offset);
// Clear the types of the popped values // Clear the types of the popped values
@ -77,6 +78,7 @@ ctx_stack_pop(ctx_t* ctx, size_t n)
} }
ctx->stack_size -= n; ctx->stack_size -= n;
ctx->sp_offset -= n;
return top; return top;
} }
@ -88,7 +90,7 @@ x86opnd_t
ctx_stack_opnd(ctx_t* ctx, int32_t idx) ctx_stack_opnd(ctx_t* ctx, int32_t idx)
{ {
// SP points just above the topmost value // SP points just above the topmost value
int32_t offset = (ctx->stack_size - 1 - idx) * sizeof(VALUE); int32_t offset = (ctx->sp_offset - 1 - idx) * sizeof(VALUE);
x86opnd_t opnd = mem_opnd(64, REG_SP, offset); x86opnd_t opnd = mem_opnd(64, REG_SP, offset);
return opnd; return opnd;
@ -120,6 +122,9 @@ int ctx_diff(const ctx_t* src, const ctx_t* dst)
if (dst->stack_size != src->stack_size) if (dst->stack_size != src->stack_size)
return INT_MAX; return INT_MAX;
if (dst->sp_offset != src->sp_offset)
return INT_MAX;
if (dst->self_is_object != src->self_is_object) if (dst->self_is_object != src->self_is_object)
return INT_MAX; return INT_MAX;
@ -345,6 +350,7 @@ uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx)
// Limit the number of block versions // Limit the number of block versions
ctx_t generic_ctx = DEFAULT_CTX; ctx_t generic_ctx = DEFAULT_CTX;
generic_ctx.stack_size = target_ctx->stack_size; generic_ctx.stack_size = target_ctx->stack_size;
generic_ctx.sp_offset = target_ctx->sp_offset;
if (count_block_versions(target) >= MAX_VERSIONS - 1) if (count_block_versions(target) >= MAX_VERSIONS - 1)
{ {
fprintf(stderr, "version limit hit in branch_stub_hit\n"); fprintf(stderr, "version limit hit in branch_stub_hit\n");
@ -383,7 +389,6 @@ uint8_t* branch_stub_hit(uint32_t branch_idx, uint32_t target_idx)
} }
// Get a version or stub corresponding to a branch target // Get a version or stub corresponding to a branch target
// TODO: need incoming and target contexts
uint8_t* get_branch_target( uint8_t* get_branch_target(
blockid_t target, blockid_t target,
const ctx_t* ctx, const ctx_t* ctx,
@ -440,13 +445,13 @@ void gen_branch(
) )
{ {
RUBY_ASSERT(target0.iseq != NULL); RUBY_ASSERT(target0.iseq != NULL);
RUBY_ASSERT(target1.iseq != NULL); //RUBY_ASSERT(target1.iseq != NULL);
RUBY_ASSERT(num_branches < MAX_BRANCHES); RUBY_ASSERT(num_branches < MAX_BRANCHES);
uint32_t branch_idx = num_branches++; uint32_t branch_idx = num_branches++;
// Get the branch targets or stubs // Get the branch targets or stubs
uint8_t* dst_addr0 = get_branch_target(target0, ctx0, branch_idx, 0); uint8_t* dst_addr0 = get_branch_target(target0, ctx0, branch_idx, 0);
uint8_t* dst_addr1 = get_branch_target(target1, ctx1, branch_idx, 1); uint8_t* dst_addr1 = ctx1? get_branch_target(target1, ctx1, branch_idx, 1):NULL;
// Call the branch generation function // Call the branch generation function
uint32_t start_pos = cb->write_pos; uint32_t start_pos = cb->write_pos;
@ -459,7 +464,7 @@ void gen_branch(
end_pos, end_pos,
*src_ctx, *src_ctx,
{ target0, target1 }, { target0, target1 },
{ *ctx0, *ctx1 }, { *ctx0, ctx1? *ctx1:DEFAULT_CTX },
{ dst_addr0, dst_addr1 }, { dst_addr0, dst_addr1 },
gen_fn, gen_fn,
SHAPE_DEFAULT SHAPE_DEFAULT
@ -508,6 +513,7 @@ void gen_direct_jump(
// Limit the number of block versions // Limit the number of block versions
ctx_t generic_ctx = DEFAULT_CTX; ctx_t generic_ctx = DEFAULT_CTX;
generic_ctx.stack_size = ctx->stack_size; generic_ctx.stack_size = ctx->stack_size;
generic_ctx.sp_offset = ctx->sp_offset;
if (count_block_versions(target0) >= MAX_VERSIONS - 1) if (count_block_versions(target0) >= MAX_VERSIONS - 1)
{ {
fprintf(stderr, "version limit hit in branch_stub_hit\n"); fprintf(stderr, "version limit hit in branch_stub_hit\n");

View File

@ -31,9 +31,12 @@ typedef struct CtxStruct
// T_NONE==0 is the unknown type // T_NONE==0 is the unknown type
uint8_t temp_types[MAX_TEMP_TYPES]; uint8_t temp_types[MAX_TEMP_TYPES];
// Number of values pushed on the temporary stack // Number of values currently on the temporary stack
uint16_t stack_size; uint16_t stack_size;
// Offset of the JIT SP relative to the interpreter SP
int16_t sp_offset;
// Whether we know self is a heap object // Whether we know self is a heap object
bool self_is_object : 1; bool self_is_object : 1;

2
vm.c
View File

@ -202,7 +202,7 @@ VM_CAPTURED_BLOCK_TO_CFP(const struct rb_captured_block *captured)
{ {
rb_control_frame_t *cfp = ((rb_control_frame_t *)((VALUE *)(captured) - 3)); rb_control_frame_t *cfp = ((rb_control_frame_t *)((VALUE *)(captured) - 3));
VM_ASSERT(!VM_CFP_IN_HEAP_P(GET_EC(), cfp)); VM_ASSERT(!VM_CFP_IN_HEAP_P(GET_EC(), cfp));
VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 7 + VM_DEBUG_BP_CHECK ? 1 : 0); VM_ASSERT(sizeof(rb_control_frame_t)/sizeof(VALUE) == 8 + VM_DEBUG_BP_CHECK ? 1 : 0);
return cfp; return cfp;
} }

View File

@ -790,6 +790,8 @@ typedef struct rb_control_frame_struct {
#if VM_DEBUG_BP_CHECK #if VM_DEBUG_BP_CHECK
VALUE *bp_check; /* cfp[7] */ VALUE *bp_check; /* cfp[7] */
#endif #endif
// Return address for uJIT code
void *jit_return;
} rb_control_frame_t; } rb_control_frame_t;
extern const rb_data_type_t ruby_threadptr_data_type; extern const rb_data_type_t ruby_threadptr_data_type;

View File

@ -390,6 +390,7 @@ vm_push_frame(rb_execution_context_t *ec,
#if VM_DEBUG_BP_CHECK #if VM_DEBUG_BP_CHECK
.bp_check = sp, .bp_check = sp,
#endif #endif
.jit_return = NULL
}; };
ec->cfp = cfp; ec->cfp = cfp;