Working towards having YJIT and ZJIT in the same build, we need to deduplicate some glue code that would otherwise cause name collision. Add jit.c for this and build it for YJIT and ZJIT builds. Update bindgen to look at jit.c; some shuffling of functions in the output, but the set of functions shouldn't have changed.
862 lines
28 KiB
C
862 lines
28 KiB
C
// This part of YJIT helps interfacing with the rest of CRuby and with the OS.
|
|
// Sometimes our FFI binding generation tool gives undesirable outputs when it
|
|
// sees C features that Rust doesn't support well. We mitigate that by binding
|
|
// functions which have simple parameter types. The boilerplate C functions for
|
|
// that purpose are in this file.
|
|
// Similarly, we wrap OS facilities we need in simple functions to help with
|
|
// FFI and to avoid the need to use external crates.io Rust libraries.
|
|
|
|
#include "internal.h"
|
|
#include "internal/sanitizers.h"
|
|
#include "internal/string.h"
|
|
#include "internal/hash.h"
|
|
#include "internal/variable.h"
|
|
#include "internal/compile.h"
|
|
#include "internal/class.h"
|
|
#include "internal/fixnum.h"
|
|
#include "internal/numeric.h"
|
|
#include "internal/gc.h"
|
|
#include "vm_core.h"
|
|
#include "vm_callinfo.h"
|
|
#include "builtin.h"
|
|
#include "insns.inc"
|
|
#include "insns_info.inc"
|
|
#include "vm_sync.h"
|
|
#include "yjit.h"
|
|
#include "vm_insnhelper.h"
|
|
#include "probes.h"
|
|
#include "probes_helper.h"
|
|
#include "iseq.h"
|
|
#include "ruby/debug.h"
|
|
#include "internal/cont.h"
|
|
|
|
// For mmapp(), sysconf()
|
|
#ifndef _WIN32
|
|
#include <unistd.h>
|
|
#include <sys/mman.h>
|
|
#endif
|
|
|
|
#include <errno.h>
|
|
|
|
// Field offsets for the RObject struct
|
|
enum robject_offsets {
|
|
ROBJECT_OFFSET_AS_HEAP_IVPTR = offsetof(struct RObject, as.heap.ivptr),
|
|
ROBJECT_OFFSET_AS_ARY = offsetof(struct RObject, as.ary),
|
|
};
|
|
|
|
// Field offsets for the RString struct
|
|
enum rstring_offsets {
|
|
RUBY_OFFSET_RSTRING_LEN = offsetof(struct RString, len)
|
|
};
|
|
|
|
// We need size_t to have a known size to simplify code generation and FFI.
|
|
// TODO(alan): check this in configure.ac to fail fast on 32 bit platforms.
|
|
STATIC_ASSERT(64b_size_t, SIZE_MAX == UINT64_MAX);
|
|
// I don't know any C implementation that has uint64_t and puts padding bits
|
|
// into size_t but the standard seems to allow it.
|
|
STATIC_ASSERT(size_t_no_padding_bits, sizeof(size_t) == sizeof(uint64_t));
|
|
|
|
// This build config impacts the pointer tagging scheme and we only want to
|
|
// support one scheme for simplicity.
|
|
STATIC_ASSERT(pointer_tagging_scheme, USE_FLONUM);
|
|
|
|
// NOTE: We can trust that uint8_t has no "padding bits" since the C spec
|
|
// guarantees it. Wording about padding bits is more explicit in C11 compared
|
|
// to C99. See C11 7.20.1.1p2. All this is to say we have _some_ standards backing to
|
|
// use a Rust `*mut u8` to represent a C `uint8_t *`.
|
|
//
|
|
// If we don't want to trust that we can interpreter the C standard correctly, we
|
|
// could outsource that work to the Rust standard library by sticking to fundamental
|
|
// types in C such as int, long, etc. and use `std::os::raw::c_long` and friends on
|
|
// the Rust side.
|
|
//
|
|
// What's up with the long prefix? Even though we build with `-fvisibility=hidden`
|
|
// we are sometimes a static library where the option doesn't prevent name collision.
|
|
// The "_yjit_" part is for trying to be informative. We might want different
|
|
// suffixes for symbols meant for Rust and symbols meant for broader CRuby.
|
|
|
|
bool
|
|
rb_yjit_mark_writable(void *mem_block, uint32_t mem_size)
|
|
{
|
|
return mprotect(mem_block, mem_size, PROT_READ | PROT_WRITE) == 0;
|
|
}
|
|
|
|
void
|
|
rb_yjit_mark_executable(void *mem_block, uint32_t mem_size)
|
|
{
|
|
// Do not call mprotect when mem_size is zero. Some platforms may return
|
|
// an error for it. https://github.com/Shopify/ruby/issues/450
|
|
if (mem_size == 0) {
|
|
return;
|
|
}
|
|
if (mprotect(mem_block, mem_size, PROT_READ | PROT_EXEC)) {
|
|
rb_bug("Couldn't make JIT page (%p, %lu bytes) executable, errno: %s",
|
|
mem_block, (unsigned long)mem_size, strerror(errno));
|
|
}
|
|
}
|
|
|
|
// Free the specified memory block.
|
|
bool
|
|
rb_yjit_mark_unused(void *mem_block, uint32_t mem_size)
|
|
{
|
|
// On Linux, you need to use madvise MADV_DONTNEED to free memory.
|
|
// We might not need to call this on macOS, but it's not really documented.
|
|
// We generally prefer to do the same thing on both to ease testing too.
|
|
madvise(mem_block, mem_size, MADV_DONTNEED);
|
|
|
|
// On macOS, mprotect PROT_NONE seems to reduce RSS.
|
|
// We also call this on Linux to avoid executing unused pages.
|
|
return mprotect(mem_block, mem_size, PROT_NONE) == 0;
|
|
}
|
|
|
|
long
|
|
rb_yjit_array_len(VALUE a)
|
|
{
|
|
return rb_array_len(a);
|
|
}
|
|
|
|
// `start` is inclusive and `end` is exclusive.
|
|
void
|
|
rb_yjit_icache_invalidate(void *start, void *end)
|
|
{
|
|
// Clear/invalidate the instruction cache. Compiles to nothing on x86_64
|
|
// but required on ARM before running freshly written code.
|
|
// On Darwin it's the same as calling sys_icache_invalidate().
|
|
#ifdef __GNUC__
|
|
__builtin___clear_cache(start, end);
|
|
#elif defined(__aarch64__)
|
|
#error No instruction cache clear available with this compiler on Aarch64!
|
|
#endif
|
|
}
|
|
|
|
# define PTR2NUM(x) (rb_int2inum((intptr_t)(void *)(x)))
|
|
|
|
// For a given raw_sample (frame), set the hash with the caller's
|
|
// name, file, and line number. Return the hash with collected frame_info.
|
|
static void
|
|
rb_yjit_add_frame(VALUE hash, VALUE frame)
|
|
{
|
|
VALUE frame_id = PTR2NUM(frame);
|
|
|
|
if (RTEST(rb_hash_aref(hash, frame_id))) {
|
|
return;
|
|
}
|
|
else {
|
|
VALUE frame_info = rb_hash_new();
|
|
// Full label for the frame
|
|
VALUE name = rb_profile_frame_full_label(frame);
|
|
// Absolute path of the frame from rb_iseq_realpath
|
|
VALUE file = rb_profile_frame_absolute_path(frame);
|
|
// Line number of the frame
|
|
VALUE line = rb_profile_frame_first_lineno(frame);
|
|
|
|
// If absolute path isn't available use the rb_iseq_path
|
|
if (NIL_P(file)) {
|
|
file = rb_profile_frame_path(frame);
|
|
}
|
|
|
|
rb_hash_aset(frame_info, ID2SYM(rb_intern("name")), name);
|
|
rb_hash_aset(frame_info, ID2SYM(rb_intern("file")), file);
|
|
rb_hash_aset(frame_info, ID2SYM(rb_intern("samples")), INT2NUM(0));
|
|
rb_hash_aset(frame_info, ID2SYM(rb_intern("total_samples")), INT2NUM(0));
|
|
rb_hash_aset(frame_info, ID2SYM(rb_intern("edges")), rb_hash_new());
|
|
rb_hash_aset(frame_info, ID2SYM(rb_intern("lines")), rb_hash_new());
|
|
|
|
if (line != INT2FIX(0)) {
|
|
rb_hash_aset(frame_info, ID2SYM(rb_intern("line")), line);
|
|
}
|
|
|
|
rb_hash_aset(hash, frame_id, frame_info);
|
|
}
|
|
}
|
|
|
|
// Parses the YjitExitLocations raw_samples and line_samples collected by
|
|
// rb_yjit_record_exit_stack and turns them into 3 hashes (raw, lines, and frames) to
|
|
// be used by RubyVM::YJIT.exit_locations. yjit_raw_samples represents the raw frames information
|
|
// (without name, file, and line), and yjit_line_samples represents the line information
|
|
// of the iseq caller.
|
|
VALUE
|
|
rb_yjit_exit_locations_dict(VALUE *yjit_raw_samples, int *yjit_line_samples, int samples_len)
|
|
{
|
|
VALUE result = rb_hash_new();
|
|
VALUE raw_samples = rb_ary_new_capa(samples_len);
|
|
VALUE line_samples = rb_ary_new_capa(samples_len);
|
|
VALUE frames = rb_hash_new();
|
|
int idx = 0;
|
|
|
|
// While the index is less than samples_len, parse yjit_raw_samples and
|
|
// yjit_line_samples, then add casted values to raw_samples and line_samples array.
|
|
while (idx < samples_len) {
|
|
int num = (int)yjit_raw_samples[idx];
|
|
int line_num = (int)yjit_line_samples[idx];
|
|
idx++;
|
|
|
|
// + 1 as we append an additional sample for the insn
|
|
rb_ary_push(raw_samples, SIZET2NUM(num + 1));
|
|
rb_ary_push(line_samples, INT2NUM(line_num + 1));
|
|
|
|
// Loop through the length of samples_len and add data to the
|
|
// frames hash. Also push the current value onto the raw_samples
|
|
// and line_samples array respectively.
|
|
for (int o = 0; o < num; o++) {
|
|
rb_yjit_add_frame(frames, yjit_raw_samples[idx]);
|
|
rb_ary_push(raw_samples, SIZET2NUM(yjit_raw_samples[idx]));
|
|
rb_ary_push(line_samples, INT2NUM(yjit_line_samples[idx]));
|
|
idx++;
|
|
}
|
|
|
|
rb_ary_push(raw_samples, SIZET2NUM(yjit_raw_samples[idx]));
|
|
rb_ary_push(line_samples, INT2NUM(yjit_line_samples[idx]));
|
|
idx++;
|
|
|
|
rb_ary_push(raw_samples, SIZET2NUM(yjit_raw_samples[idx]));
|
|
rb_ary_push(line_samples, INT2NUM(yjit_line_samples[idx]));
|
|
idx++;
|
|
}
|
|
|
|
// Set add the raw_samples, line_samples, and frames to the results
|
|
// hash.
|
|
rb_hash_aset(result, ID2SYM(rb_intern("raw")), raw_samples);
|
|
rb_hash_aset(result, ID2SYM(rb_intern("lines")), line_samples);
|
|
rb_hash_aset(result, ID2SYM(rb_intern("frames")), frames);
|
|
|
|
return result;
|
|
}
|
|
|
|
uint32_t
|
|
rb_yjit_get_page_size(void)
|
|
{
|
|
#if defined(_SC_PAGESIZE)
|
|
long page_size = sysconf(_SC_PAGESIZE);
|
|
if (page_size <= 0) rb_bug("yjit: failed to get page size");
|
|
|
|
// 1 GiB limit. x86 CPUs with PDPE1GB can do this and anything larger is unexpected.
|
|
// Though our design sort of assume we have fine grained control over memory protection
|
|
// which require small page sizes.
|
|
if (page_size > 0x40000000l) rb_bug("yjit page size too large");
|
|
|
|
return (uint32_t)page_size;
|
|
#else
|
|
#error "YJIT supports POSIX only for now"
|
|
#endif
|
|
}
|
|
|
|
#if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE)
|
|
// Align the current write position to a multiple of bytes
|
|
static uint8_t *
|
|
align_ptr(uint8_t *ptr, uint32_t multiple)
|
|
{
|
|
// Compute the pointer modulo the given alignment boundary
|
|
uint32_t rem = ((uint32_t)(uintptr_t)ptr) % multiple;
|
|
|
|
// If the pointer is already aligned, stop
|
|
if (rem == 0)
|
|
return ptr;
|
|
|
|
// Pad the pointer by the necessary amount to align it
|
|
uint32_t pad = multiple - rem;
|
|
|
|
return ptr + pad;
|
|
}
|
|
#endif
|
|
|
|
// Address space reservation. Memory pages are mapped on an as needed basis.
|
|
// See the Rust mm module for details.
|
|
uint8_t *
|
|
rb_yjit_reserve_addr_space(uint32_t mem_size)
|
|
{
|
|
#ifndef _WIN32
|
|
uint8_t *mem_block;
|
|
|
|
// On Linux
|
|
#if defined(MAP_FIXED_NOREPLACE) && defined(_SC_PAGESIZE)
|
|
uint32_t const page_size = (uint32_t)sysconf(_SC_PAGESIZE);
|
|
uint8_t *const cfunc_sample_addr = (void *)(uintptr_t)&rb_yjit_reserve_addr_space;
|
|
uint8_t *const probe_region_end = cfunc_sample_addr + INT32_MAX;
|
|
// Align the requested address to page size
|
|
uint8_t *req_addr = align_ptr(cfunc_sample_addr, page_size);
|
|
|
|
// Probe for addresses close to this function using MAP_FIXED_NOREPLACE
|
|
// to improve odds of being in range for 32-bit relative call instructions.
|
|
do {
|
|
mem_block = mmap(
|
|
req_addr,
|
|
mem_size,
|
|
PROT_NONE,
|
|
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE,
|
|
-1,
|
|
0
|
|
);
|
|
|
|
// If we succeeded, stop
|
|
if (mem_block != MAP_FAILED) {
|
|
ruby_annotate_mmap(mem_block, mem_size, "Ruby:rb_yjit_reserve_addr_space");
|
|
break;
|
|
}
|
|
|
|
// -4MiB. Downwards to probe away from the heap. (On x86/A64 Linux
|
|
// main_code_addr < heap_addr, and in case we are in a shared
|
|
// library mapped higher than the heap, downwards is still better
|
|
// since it's towards the end of the heap rather than the stack.)
|
|
req_addr -= 4 * 1024 * 1024;
|
|
} while (req_addr < probe_region_end);
|
|
|
|
// On MacOS and other platforms
|
|
#else
|
|
// Try to map a chunk of memory as executable
|
|
mem_block = mmap(
|
|
(void *)rb_yjit_reserve_addr_space,
|
|
mem_size,
|
|
PROT_NONE,
|
|
MAP_PRIVATE | MAP_ANONYMOUS,
|
|
-1,
|
|
0
|
|
);
|
|
#endif
|
|
|
|
// Fallback
|
|
if (mem_block == MAP_FAILED) {
|
|
// Try again without the address hint (e.g., valgrind)
|
|
mem_block = mmap(
|
|
NULL,
|
|
mem_size,
|
|
PROT_NONE,
|
|
MAP_PRIVATE | MAP_ANONYMOUS,
|
|
-1,
|
|
0
|
|
);
|
|
|
|
if (mem_block != MAP_FAILED) {
|
|
ruby_annotate_mmap(mem_block, mem_size, "Ruby:rb_yjit_reserve_addr_space:fallback");
|
|
}
|
|
}
|
|
|
|
// Check that the memory mapping was successful
|
|
if (mem_block == MAP_FAILED) {
|
|
perror("ruby: yjit: mmap:");
|
|
if(errno == ENOMEM) {
|
|
// No crash report if it's only insufficient memory
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
rb_bug("mmap failed");
|
|
}
|
|
|
|
return mem_block;
|
|
#else
|
|
// Windows not supported for now
|
|
return NULL;
|
|
#endif
|
|
}
|
|
|
|
// Is anyone listening for :c_call and :c_return event currently?
|
|
bool
|
|
rb_c_method_tracing_currently_enabled(const rb_execution_context_t *ec)
|
|
{
|
|
rb_event_flag_t tracing_events;
|
|
if (rb_multi_ractor_p()) {
|
|
tracing_events = ruby_vm_event_enabled_global_flags;
|
|
}
|
|
else {
|
|
// At the time of writing, events are never removed from
|
|
// ruby_vm_event_enabled_global_flags so always checking using it would
|
|
// mean we don't compile even after tracing is disabled.
|
|
tracing_events = rb_ec_ractor_hooks(ec)->events;
|
|
}
|
|
|
|
return tracing_events & (RUBY_EVENT_C_CALL | RUBY_EVENT_C_RETURN);
|
|
}
|
|
|
|
// The code we generate in gen_send_cfunc() doesn't fire the c_return TracePoint event
|
|
// like the interpreter. When tracing for c_return is enabled, we patch the code after
|
|
// the C method return to call into this to fire the event.
|
|
void
|
|
rb_full_cfunc_return(rb_execution_context_t *ec, VALUE return_value)
|
|
{
|
|
rb_control_frame_t *cfp = ec->cfp;
|
|
RUBY_ASSERT_ALWAYS(cfp == GET_EC()->cfp);
|
|
const rb_callable_method_entry_t *me = rb_vm_frame_method_entry(cfp);
|
|
|
|
RUBY_ASSERT_ALWAYS(RUBYVM_CFUNC_FRAME_P(cfp));
|
|
RUBY_ASSERT_ALWAYS(me->def->type == VM_METHOD_TYPE_CFUNC);
|
|
|
|
// CHECK_CFP_CONSISTENCY("full_cfunc_return"); TODO revive this
|
|
|
|
// Pop the C func's frame and fire the c_return TracePoint event
|
|
// Note that this is the same order as vm_call_cfunc_with_frame().
|
|
rb_vm_pop_frame(ec);
|
|
EXEC_EVENT_HOOK(ec, RUBY_EVENT_C_RETURN, cfp->self, me->def->original_id, me->called_id, me->owner, return_value);
|
|
// Note, this deviates from the interpreter in that users need to enable
|
|
// a c_return TracePoint for this DTrace hook to work. A reasonable change
|
|
// since the Ruby return event works this way as well.
|
|
RUBY_DTRACE_CMETHOD_RETURN_HOOK(ec, me->owner, me->def->original_id);
|
|
|
|
// Push return value into the caller's stack. We know that it's a frame that
|
|
// uses cfp->sp because we are patching a call done with gen_send_cfunc().
|
|
ec->cfp->sp[0] = return_value;
|
|
ec->cfp->sp++;
|
|
}
|
|
|
|
// TODO(alan): consider using an opaque pointer for the payload rather than a void pointer
|
|
void *
|
|
rb_iseq_get_yjit_payload(const rb_iseq_t *iseq)
|
|
{
|
|
RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
|
|
if (iseq->body) {
|
|
return iseq->body->yjit_payload;
|
|
}
|
|
else {
|
|
// Body is NULL when constructing the iseq.
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
void
|
|
rb_iseq_set_yjit_payload(const rb_iseq_t *iseq, void *payload)
|
|
{
|
|
RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
|
|
RUBY_ASSERT_ALWAYS(iseq->body);
|
|
RUBY_ASSERT_ALWAYS(NULL == iseq->body->yjit_payload);
|
|
iseq->body->yjit_payload = payload;
|
|
}
|
|
|
|
void
|
|
rb_iseq_reset_jit_func(const rb_iseq_t *iseq)
|
|
{
|
|
RUBY_ASSERT_ALWAYS(IMEMO_TYPE_P(iseq, imemo_iseq));
|
|
iseq->body->jit_entry = NULL;
|
|
iseq->body->jit_exception = NULL;
|
|
// Enable re-compiling this ISEQ. Event when it's invalidated for TracePoint,
|
|
// we'd like to re-compile ISEQs that haven't been converted to trace_* insns.
|
|
iseq->body->jit_entry_calls = 0;
|
|
iseq->body->jit_exception_calls = 0;
|
|
}
|
|
|
|
rb_proc_t *
|
|
rb_yjit_get_proc_ptr(VALUE procv)
|
|
{
|
|
rb_proc_t *proc;
|
|
GetProcPtr(procv, proc);
|
|
return proc;
|
|
}
|
|
|
|
// This is defined only as a named struct inside rb_iseq_constant_body.
|
|
// By giving it a separate typedef, we make it nameable by rust-bindgen.
|
|
// Bindgen's temp/anon name isn't guaranteed stable.
|
|
typedef struct rb_iseq_param_keyword rb_seq_param_keyword_struct;
|
|
|
|
ID rb_get_symbol_id(VALUE namep);
|
|
|
|
VALUE
|
|
rb_get_def_bmethod_proc(rb_method_definition_t *def)
|
|
{
|
|
RUBY_ASSERT(def->type == VM_METHOD_TYPE_BMETHOD);
|
|
return def->body.bmethod.proc;
|
|
}
|
|
|
|
const rb_iseq_t *
|
|
rb_get_iseq_body_parent_iseq(const rb_iseq_t *iseq)
|
|
{
|
|
return iseq->body->parent_iseq;
|
|
}
|
|
|
|
VALUE
|
|
rb_optimized_call(VALUE *recv, rb_execution_context_t *ec, int argc, VALUE *argv, int kw_splat, VALUE block_handler)
|
|
{
|
|
rb_proc_t *proc;
|
|
GetProcPtr(recv, proc);
|
|
return rb_vm_invoke_proc(ec, proc, argc, argv, kw_splat, block_handler);
|
|
}
|
|
|
|
unsigned int
|
|
rb_yjit_iseq_builtin_attrs(const rb_iseq_t *iseq)
|
|
{
|
|
return iseq->body->builtin_attrs;
|
|
}
|
|
|
|
// If true, the iseq has only opt_invokebuiltin_delegate(_leave) and leave insns.
|
|
static bool
|
|
invokebuiltin_delegate_leave_p(const rb_iseq_t *iseq)
|
|
{
|
|
int insn1 = rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[0]);
|
|
if ((int)iseq->body->iseq_size != insn_len(insn1) + insn_len(BIN(leave))) {
|
|
return false;
|
|
}
|
|
int insn2 = rb_vm_insn_addr2opcode((void *)iseq->body->iseq_encoded[insn_len(insn1)]);
|
|
return (insn1 == BIN(opt_invokebuiltin_delegate) || insn1 == BIN(opt_invokebuiltin_delegate_leave)) &&
|
|
insn2 == BIN(leave);
|
|
}
|
|
|
|
// Return an rb_builtin_function if the iseq contains only that builtin function.
|
|
const struct rb_builtin_function *
|
|
rb_yjit_builtin_function(const rb_iseq_t *iseq)
|
|
{
|
|
if (invokebuiltin_delegate_leave_p(iseq)) {
|
|
return (const struct rb_builtin_function *)iseq->body->iseq_encoded[1];
|
|
}
|
|
else {
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
VALUE
|
|
rb_yjit_str_simple_append(VALUE str1, VALUE str2)
|
|
{
|
|
return rb_str_cat(str1, RSTRING_PTR(str2), RSTRING_LEN(str2));
|
|
}
|
|
|
|
void
|
|
rb_set_cfp_pc(struct rb_control_frame_struct *cfp, const VALUE *pc)
|
|
{
|
|
cfp->pc = pc;
|
|
}
|
|
|
|
void
|
|
rb_set_cfp_sp(struct rb_control_frame_struct *cfp, VALUE *sp)
|
|
{
|
|
cfp->sp = sp;
|
|
}
|
|
|
|
extern VALUE *rb_vm_base_ptr(struct rb_control_frame_struct *cfp);
|
|
|
|
// YJIT needs this function to never allocate and never raise
|
|
VALUE
|
|
rb_yarv_str_eql_internal(VALUE str1, VALUE str2)
|
|
{
|
|
// We wrap this since it's static inline
|
|
return rb_str_eql_internal(str1, str2);
|
|
}
|
|
|
|
VALUE
|
|
rb_str_neq_internal(VALUE str1, VALUE str2)
|
|
{
|
|
return rb_str_eql_internal(str1, str2) == Qtrue ? Qfalse : Qtrue;
|
|
}
|
|
|
|
// YJIT needs this function to never allocate and never raise
|
|
VALUE
|
|
rb_yarv_ary_entry_internal(VALUE ary, long offset)
|
|
{
|
|
return rb_ary_entry_internal(ary, offset);
|
|
}
|
|
|
|
extern VALUE rb_ary_unshift_m(int argc, VALUE *argv, VALUE ary);
|
|
|
|
VALUE
|
|
rb_yjit_rb_ary_subseq_length(VALUE ary, long beg)
|
|
{
|
|
long len = RARRAY_LEN(ary);
|
|
return rb_ary_subseq(ary, beg, len);
|
|
}
|
|
|
|
VALUE
|
|
rb_yjit_fix_div_fix(VALUE recv, VALUE obj)
|
|
{
|
|
return rb_fix_div_fix(recv, obj);
|
|
}
|
|
|
|
VALUE
|
|
rb_yjit_fix_mod_fix(VALUE recv, VALUE obj)
|
|
{
|
|
return rb_fix_mod_fix(recv, obj);
|
|
}
|
|
|
|
// Return non-zero when `obj` is an array and its last item is a
|
|
// `ruby2_keywords` hash. We don't support this kind of splat.
|
|
size_t
|
|
rb_yjit_ruby2_keywords_splat_p(VALUE obj)
|
|
{
|
|
if (!RB_TYPE_P(obj, T_ARRAY)) return 0;
|
|
long len = RARRAY_LEN(obj);
|
|
if (len == 0) return 0;
|
|
VALUE last = RARRAY_AREF(obj, len - 1);
|
|
if (!RB_TYPE_P(last, T_HASH)) return 0;
|
|
return FL_TEST_RAW(last, RHASH_PASS_AS_KEYWORDS);
|
|
}
|
|
|
|
// Checks to establish preconditions for rb_yjit_splat_varg_cfunc()
|
|
VALUE
|
|
rb_yjit_splat_varg_checks(VALUE *sp, VALUE splat_array, rb_control_frame_t *cfp)
|
|
{
|
|
// We inserted a T_ARRAY guard before this call
|
|
long len = RARRAY_LEN(splat_array);
|
|
|
|
// Large splat arrays need a separate allocation
|
|
if (len < 0 || len > VM_ARGC_STACK_MAX) return Qfalse;
|
|
|
|
// Would we overflow if we put the contents of the array onto the stack?
|
|
if (sp + len > (VALUE *)(cfp - 2)) return Qfalse;
|
|
|
|
// Reject keywords hash since that requires duping it sometimes
|
|
if (len > 0) {
|
|
VALUE last_hash = RARRAY_AREF(splat_array, len - 1);
|
|
if (RB_TYPE_P(last_hash, T_HASH) &&
|
|
FL_TEST_RAW(last_hash, RHASH_PASS_AS_KEYWORDS)) {
|
|
return Qfalse;
|
|
}
|
|
}
|
|
|
|
return Qtrue;
|
|
}
|
|
|
|
// Push array elements to the stack for a C method that has a variable number
|
|
// of parameters. Returns the number of arguments the splat array contributes.
|
|
int
|
|
rb_yjit_splat_varg_cfunc(VALUE *stack_splat_array)
|
|
{
|
|
VALUE splat_array = *stack_splat_array;
|
|
int len;
|
|
|
|
// We already checked that length fits in `int`
|
|
RUBY_ASSERT(RB_TYPE_P(splat_array, T_ARRAY));
|
|
len = (int)RARRAY_LEN(splat_array);
|
|
|
|
// Push the contents of the array onto the stack
|
|
MEMCPY(stack_splat_array, RARRAY_CONST_PTR(splat_array), VALUE, len);
|
|
|
|
return len;
|
|
}
|
|
|
|
// Print the Ruby source location of some ISEQ for debugging purposes
|
|
void
|
|
rb_yjit_dump_iseq_loc(const rb_iseq_t *iseq, uint32_t insn_idx)
|
|
{
|
|
char *ptr;
|
|
long len;
|
|
VALUE path = rb_iseq_path(iseq);
|
|
RSTRING_GETMEM(path, ptr, len);
|
|
fprintf(stderr, "%s %.*s:%u\n", __func__, (int)len, ptr, rb_iseq_line_no(iseq, insn_idx));
|
|
}
|
|
|
|
// Get the number of digits required to print an integer
|
|
static int
|
|
num_digits(int integer)
|
|
{
|
|
int num = 1;
|
|
while (integer /= 10) {
|
|
num++;
|
|
}
|
|
return num;
|
|
}
|
|
|
|
// Allocate a C string that formats an ISEQ label like iseq_inspect()
|
|
char *
|
|
rb_yjit_iseq_inspect(const rb_iseq_t *iseq)
|
|
{
|
|
const char *label = RSTRING_PTR(iseq->body->location.label);
|
|
const char *path = RSTRING_PTR(rb_iseq_path(iseq));
|
|
int lineno = iseq->body->location.code_location.beg_pos.lineno;
|
|
|
|
char *buf = ZALLOC_N(char, strlen(label) + strlen(path) + num_digits(lineno) + 3);
|
|
sprintf(buf, "%s@%s:%d", label, path, lineno);
|
|
return buf;
|
|
}
|
|
|
|
// There are RSTRUCT_SETs in ruby/internal/core/rstruct.h and internal/struct.h
|
|
// with different types (int vs long) for k. Here we use the one from ruby/internal/core/rstruct.h,
|
|
// which takes an int.
|
|
void
|
|
rb_RSTRUCT_SET(VALUE st, int k, VALUE v)
|
|
{
|
|
RSTRUCT_SET(st, k, v);
|
|
}
|
|
|
|
// Return the string encoding index
|
|
int
|
|
rb_ENCODING_GET(VALUE obj)
|
|
{
|
|
return RB_ENCODING_GET(obj);
|
|
}
|
|
|
|
bool
|
|
rb_yjit_multi_ractor_p(void)
|
|
{
|
|
return rb_multi_ractor_p();
|
|
}
|
|
|
|
bool
|
|
rb_yjit_constcache_shareable(const struct iseq_inline_constant_cache_entry *ice)
|
|
{
|
|
return (ice->flags & IMEMO_CONST_CACHE_SHAREABLE) != 0;
|
|
}
|
|
|
|
// Used for passing a callback and other data over rb_objspace_each_objects
|
|
struct iseq_callback_data {
|
|
rb_iseq_callback callback;
|
|
void *data;
|
|
};
|
|
|
|
// Heap-walking callback for rb_yjit_for_each_iseq().
|
|
static int
|
|
for_each_iseq_i(void *vstart, void *vend, size_t stride, void *data)
|
|
{
|
|
const struct iseq_callback_data *callback_data = (struct iseq_callback_data *)data;
|
|
VALUE v = (VALUE)vstart;
|
|
for (; v != (VALUE)vend; v += stride) {
|
|
void *ptr = rb_asan_poisoned_object_p(v);
|
|
rb_asan_unpoison_object(v, false);
|
|
|
|
if (rb_obj_is_iseq(v)) {
|
|
rb_iseq_t *iseq = (rb_iseq_t *)v;
|
|
callback_data->callback(iseq, callback_data->data);
|
|
}
|
|
|
|
asan_poison_object_if(ptr, v);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// Iterate through the whole GC heap and invoke a callback for each iseq.
|
|
// Used for global code invalidation.
|
|
void
|
|
rb_yjit_for_each_iseq(rb_iseq_callback callback, void *data)
|
|
{
|
|
struct iseq_callback_data callback_data = { .callback = callback, .data = data };
|
|
rb_objspace_each_objects(for_each_iseq_i, (void *)&callback_data);
|
|
}
|
|
|
|
// For running write barriers from Rust. Required when we add a new edge in the
|
|
// object graph from `old` to `young`.
|
|
void
|
|
rb_yjit_obj_written(VALUE old, VALUE young, const char *file, int line)
|
|
{
|
|
rb_obj_written(old, Qundef, young, file, line);
|
|
}
|
|
|
|
// Acquire the VM lock and then signal all other Ruby threads (ractors) to
|
|
// contend for the VM lock, putting them to sleep. YJIT uses this to evict
|
|
// threads running inside generated code so among other things, it can
|
|
// safely change memory protection of regions housing generated code.
|
|
void
|
|
rb_yjit_vm_lock_then_barrier(unsigned int *recursive_lock_level, const char *file, int line)
|
|
{
|
|
rb_vm_lock_enter(recursive_lock_level, file, line);
|
|
rb_vm_barrier();
|
|
}
|
|
|
|
// Release the VM lock. The lock level must point to the same integer used to
|
|
// acquire the lock.
|
|
void
|
|
rb_yjit_vm_unlock(unsigned int *recursive_lock_level, const char *file, int line)
|
|
{
|
|
rb_vm_lock_leave(recursive_lock_level, file, line);
|
|
}
|
|
|
|
void
|
|
rb_yjit_compile_iseq(const rb_iseq_t *iseq, rb_execution_context_t *ec, bool jit_exception)
|
|
{
|
|
RB_VM_LOCK_ENTER();
|
|
rb_vm_barrier();
|
|
|
|
// Compile a block version starting at the current instruction
|
|
uint8_t *rb_yjit_iseq_gen_entry_point(const rb_iseq_t *iseq, rb_execution_context_t *ec, bool jit_exception); // defined in Rust
|
|
uintptr_t code_ptr = (uintptr_t)rb_yjit_iseq_gen_entry_point(iseq, ec, jit_exception);
|
|
|
|
if (jit_exception) {
|
|
iseq->body->jit_exception = (rb_jit_func_t)code_ptr;
|
|
}
|
|
else {
|
|
iseq->body->jit_entry = (rb_jit_func_t)code_ptr;
|
|
}
|
|
|
|
RB_VM_LOCK_LEAVE();
|
|
}
|
|
|
|
// GC root for interacting with the GC
|
|
struct yjit_root_struct {
|
|
bool unused; // empty structs are not legal in C99
|
|
};
|
|
|
|
// For dealing with refinements
|
|
void
|
|
rb_yjit_invalidate_all_method_lookup_assumptions(void)
|
|
{
|
|
// It looks like Module#using actually doesn't need to invalidate all the
|
|
// method caches, so we do nothing here for now.
|
|
}
|
|
|
|
// Number of object shapes, which might be useful for investigating YJIT exit reasons.
|
|
VALUE
|
|
rb_object_shape_count(void)
|
|
{
|
|
// next_shape_id starts from 0, so it's the same as the count
|
|
return ULONG2NUM((unsigned long)GET_SHAPE_TREE()->next_shape_id);
|
|
}
|
|
|
|
// Assert that we have the VM lock. Relevant mostly for multi ractor situations.
|
|
// The GC takes the lock before calling us, and this asserts that it indeed happens.
|
|
void
|
|
rb_yjit_assert_holding_vm_lock(void)
|
|
{
|
|
ASSERT_vm_locking();
|
|
}
|
|
|
|
// The number of stack slots that vm_sendish() pops for send and invokesuper.
|
|
size_t
|
|
rb_yjit_sendish_sp_pops(const struct rb_callinfo *ci)
|
|
{
|
|
return 1 - sp_inc_of_sendish(ci); // + 1 to ignore return value push
|
|
}
|
|
|
|
// The number of stack slots that vm_sendish() pops for invokeblock.
|
|
size_t
|
|
rb_yjit_invokeblock_sp_pops(const struct rb_callinfo *ci)
|
|
{
|
|
return 1 - sp_inc_of_invokeblock(ci); // + 1 to ignore return value push
|
|
}
|
|
|
|
// Setup jit_return to avoid returning a non-Qundef value on a non-FINISH frame.
|
|
// See [jit_compile_exception] for details.
|
|
void
|
|
rb_yjit_set_exception_return(rb_control_frame_t *cfp, void *leave_exit, void *leave_exception)
|
|
{
|
|
if (VM_FRAME_FINISHED_P(cfp)) {
|
|
// If it's a FINISH frame, just normally exit with a non-Qundef value.
|
|
cfp->jit_return = leave_exit;
|
|
}
|
|
else if (cfp->jit_return) {
|
|
while (!VM_FRAME_FINISHED_P(cfp)) {
|
|
if (cfp->jit_return == leave_exit) {
|
|
// Unlike jit_exec(), leave_exit is not safe on a non-FINISH frame on
|
|
// jit_exec_exception(). See [jit_exec] and [jit_exec_exception] for
|
|
// details. Exit to the interpreter with Qundef to let it keep executing
|
|
// other Ruby frames.
|
|
cfp->jit_return = leave_exception;
|
|
return;
|
|
}
|
|
cfp = RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
|
|
}
|
|
}
|
|
else {
|
|
// If the caller was not JIT code, exit to the interpreter with Qundef
|
|
// to keep executing Ruby frames with the interpreter.
|
|
cfp->jit_return = leave_exception;
|
|
}
|
|
}
|
|
|
|
// Primitives used by yjit.rb
|
|
VALUE rb_yjit_stats_enabled_p(rb_execution_context_t *ec, VALUE self);
|
|
VALUE rb_yjit_print_stats_p(rb_execution_context_t *ec, VALUE self);
|
|
VALUE rb_yjit_log_enabled_p(rb_execution_context_t *c, VALUE self);
|
|
VALUE rb_yjit_print_log_p(rb_execution_context_t *c, VALUE self);
|
|
VALUE rb_yjit_trace_exit_locations_enabled_p(rb_execution_context_t *ec, VALUE self);
|
|
VALUE rb_yjit_get_stats(rb_execution_context_t *ec, VALUE self, VALUE key);
|
|
VALUE rb_yjit_reset_stats_bang(rb_execution_context_t *ec, VALUE self);
|
|
VALUE rb_yjit_get_log(rb_execution_context_t *ec, VALUE self);
|
|
VALUE rb_yjit_disasm_iseq(rb_execution_context_t *ec, VALUE self, VALUE iseq);
|
|
VALUE rb_yjit_insns_compiled(rb_execution_context_t *ec, VALUE self, VALUE iseq);
|
|
VALUE rb_yjit_code_gc(rb_execution_context_t *ec, VALUE self);
|
|
VALUE rb_yjit_simulate_oom_bang(rb_execution_context_t *ec, VALUE self);
|
|
VALUE rb_yjit_get_exit_locations(rb_execution_context_t *ec, VALUE self);
|
|
VALUE rb_yjit_enable(rb_execution_context_t *ec, VALUE self, VALUE gen_stats, VALUE print_stats, VALUE gen_compilation_log, VALUE print_compilation_log, VALUE mem_size, VALUE call_threshold);
|
|
VALUE rb_yjit_c_builtin_p(rb_execution_context_t *ec, VALUE self);
|
|
|
|
// Allow YJIT_C_BUILTIN macro to force --yjit-c-builtin
|
|
#ifdef YJIT_C_BUILTIN
|
|
static VALUE yjit_c_builtin_p(rb_execution_context_t *ec, VALUE self) { return Qtrue; }
|
|
#else
|
|
#define yjit_c_builtin_p rb_yjit_c_builtin_p
|
|
#endif
|
|
|
|
// Preprocessed yjit.rb generated during build
|
|
#include "yjit.rbinc"
|