Only count VM instructions in YJIT stats builds

The instruction counter is slowing multi-Ractor applications.  I had
changed it to use a thread local, but using a thread local is slowing
single threaded applications.  This commit only enables the instruction
counter in YJIT stats builds until we can figure out a way to gather the
information with lower overhead.

Co-authored-by: Randy Stauner <randy.stauner@shopify.com>
This commit is contained in:
Aaron Patterson 2025-02-13 09:56:21 -08:00 committed by Aaron Patterson
parent c1ce3d719d
commit 8cafa5b8ce
Notes: git 2025-02-14 19:39:53 +00:00
9 changed files with 21 additions and 21 deletions

View File

@ -81,7 +81,7 @@ struct vm_ifunc *rb_current_ifunc(void);
#if USE_YJIT
/* vm_exec.c */
extern RB_THREAD_LOCAL_SPECIFIER uint64_t rb_vm_insns_count;
extern uint64_t rb_vm_insns_count;
#endif
extern bool rb_free_at_exit;

View File

@ -1637,7 +1637,7 @@ class TestYJIT < Test::Unit::TestCase
[
stats[:object_shape_count].is_a?(Integer),
stats[:ratio_in_yjit].is_a?(Float),
stats[:ratio_in_yjit].nil? || stats[:ratio_in_yjit].is_a?(Float),
].all?
RUBY
end
@ -1648,7 +1648,7 @@ class TestYJIT < Test::Unit::TestCase
3.times { test }
# Collect single stat.
stat = RubyVM::YJIT.runtime_stats(:ratio_in_yjit)
stat = RubyVM::YJIT.runtime_stats(:yjit_alloc_size)
# Ensure this invocation had stats.
return true unless RubyVM::YJIT.runtime_stats[:all_stats]

View File

@ -13,7 +13,7 @@
#if USE_YJIT
// The number of instructions executed on vm_exec_core. --yjit-stats uses this.
RB_THREAD_LOCAL_SPECIFIER uint64_t rb_vm_insns_count = 0;
uint64_t rb_vm_insns_count = 0;
#endif
#if VM_COLLECT_USAGE_DETAILS

View File

@ -16,7 +16,7 @@ RUBY_EXTERN rb_serial_t ruby_vm_constant_cache_invalidations;
RUBY_EXTERN rb_serial_t ruby_vm_constant_cache_misses;
RUBY_EXTERN rb_serial_t ruby_vm_global_cvar_state;
#if USE_YJIT // We want vm_insns_count on any JIT-enabled build.
#if USE_YJIT && YJIT_STATS // We want vm_insns_count only on stats builds.
// Increment vm_insns_count for --yjit-stats. We increment this even when
// --yjit or --yjit-stats is not used because branching to skip it is slower.
// We also don't use ATOMIC_INC for performance, allowing inaccuracy on Ractors.

6
yjit.c
View File

@ -16,7 +16,6 @@
#include "internal/fixnum.h"
#include "internal/numeric.h"
#include "internal/gc.h"
#include "internal/vm.h"
#include "vm_core.h"
#include "vm_callinfo.h"
#include "builtin.h"
@ -96,11 +95,6 @@ rb_yjit_mark_executable(void *mem_block, uint32_t mem_size)
}
}
uint64_t
rb_yjit_vm_insns_count(void) {
return rb_vm_insns_count;
}
// Free the specified memory block.
bool
rb_yjit_mark_unused(void *mem_block, uint32_t mem_size)

View File

@ -422,10 +422,10 @@ module RubyVM::YJIT
out.puts "object_shape_count: " + format_number(13, stats[:object_shape_count])
out.puts "side_exit_count: " + format_number(13, stats[:side_exit_count])
out.puts "total_exit_count: " + format_number(13, stats[:total_exit_count])
out.puts "total_insns_count: " + format_number(13, stats[:total_insns_count])
out.puts "vm_insns_count: " + format_number(13, stats[:vm_insns_count])
out.puts "total_insns_count: " + format_number(13, stats[:total_insns_count]) if stats[:total_insns_count]
out.puts "vm_insns_count: " + format_number(13, stats[:vm_insns_count]) if stats[:vm_insns_count]
out.puts "yjit_insns_count: " + format_number(13, stats[:yjit_insns_count])
out.puts "ratio_in_yjit: " + ("%12.1f" % stats[:ratio_in_yjit]) + "%"
out.puts "ratio_in_yjit: " + ("%12.1f" % stats[:ratio_in_yjit]) + "%" if stats[:ratio_in_yjit]
out.puts "avg_len_in_yjit: " + ("%13.1f" % stats[:avg_len_in_yjit])
print_sorted_exit_counts(stats, out: out, prefix: "exit_")

View File

@ -317,7 +317,6 @@ fn main() {
.allowlist_function("rb_yjit_get_page_size")
.allowlist_function("rb_yjit_iseq_builtin_attrs")
.allowlist_function("rb_yjit_iseq_inspect")
.allowlist_function("rb_yjit_vm_insns_count")
.allowlist_function("rb_yjit_builtin_function")
.allowlist_function("rb_set_cfp_(pc|sp)")
.allowlist_function("rb_yjit_multi_ractor_p")
@ -381,6 +380,9 @@ fn main() {
.allowlist_function("rb_ivar_get")
.allowlist_function("rb_mod_name")
// From internal/vm.h
.allowlist_var("rb_vm_insns_count")
// From include/ruby/internal/intern/vm.h
.allowlist_function("rb_get_alloc_func")

View File

@ -1060,6 +1060,7 @@ extern "C" {
elts: *const VALUE,
) -> VALUE;
pub fn rb_vm_top_self() -> VALUE;
pub static mut rb_vm_insns_count: u64;
pub fn rb_method_entry_at(obj: VALUE, id: ID) -> *const rb_method_entry_t;
pub fn rb_callable_method_entry(klass: VALUE, id: ID) -> *const rb_callable_method_entry_t;
pub fn rb_callable_method_entry_or_negative(
@ -1139,7 +1140,6 @@ extern "C" {
pub fn rb_jit_cont_each_iseq(callback: rb_iseq_callback, data: *mut ::std::os::raw::c_void);
pub fn rb_yjit_mark_writable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
pub fn rb_yjit_mark_executable(mem_block: *mut ::std::os::raw::c_void, mem_size: u32);
pub fn rb_yjit_vm_insns_count() -> u64;
pub fn rb_yjit_mark_unused(mem_block: *mut ::std::os::raw::c_void, mem_size: u32) -> bool;
pub fn rb_yjit_array_len(a: VALUE) -> ::std::os::raw::c_long;
pub fn rb_yjit_icache_invalidate(

View File

@ -789,7 +789,9 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE {
set_stat_usize!(hash, "context_cache_bytes", crate::core::CTX_ENCODE_CACHE_BYTES + crate::core::CTX_DECODE_CACHE_BYTES);
// VM instructions count
set_stat_usize!(hash, "vm_insns_count", rb_yjit_vm_insns_count() as usize);
if rb_vm_insns_count > 0 {
set_stat_usize!(hash, "vm_insns_count", rb_vm_insns_count as usize);
}
set_stat_usize!(hash, "live_iseq_count", rb_yjit_live_iseq_count as usize);
set_stat_usize!(hash, "iseq_alloc_count", rb_yjit_iseq_alloc_count as usize);
@ -859,11 +861,13 @@ fn rb_yjit_gen_stats_dict(key: VALUE) -> VALUE {
set_stat_double!(hash, "avg_len_in_yjit", avg_len_in_yjit);
// Proportion of instructions that retire in YJIT
let total_insns_count = retired_in_yjit + rb_yjit_vm_insns_count();
set_stat_usize!(hash, "total_insns_count", total_insns_count as usize);
if rb_vm_insns_count > 0 {
let total_insns_count = retired_in_yjit + rb_vm_insns_count;
set_stat_usize!(hash, "total_insns_count", total_insns_count as usize);
let ratio_in_yjit: f64 = 100.0 * retired_in_yjit as f64 / total_insns_count as f64;
set_stat_double!(hash, "ratio_in_yjit", ratio_in_yjit);
let ratio_in_yjit: f64 = 100.0 * retired_in_yjit as f64 / total_insns_count as f64;
set_stat_double!(hash, "ratio_in_yjit", ratio_in_yjit);
}
// Set method call counts in a Ruby dict
fn set_call_counts(