ruby/internal/set_table.h

#ifndef INTERNAL_SET_TABLE_H
#define INTERNAL_SET_TABLE_H

#include "include/ruby/st.h"

struct set_table_entry;

typedef struct set_table_entry set_table_entry;

struct set_table {
    /* Cached features of the table -- see st.c for more details.  */
    unsigned char entry_power, bin_power, size_ind;
    /* How many times the table was rebuilt.  */
    unsigned int rebuilds_num;
    const struct st_hash_type *type;
    /* Number of entries currently in the table.  */
    st_index_t num_entries;
    /* Array of bins used for access by keys.  */
    st_index_t *bins;
    /* Start and bound index of entries in array entries.
       entries_starts and entries_bound are in interval
       [0,allocated_entries].  */
    st_index_t entries_start, entries_bound;
    /* Array of size 2^entry_power.  */
    set_table_entry *entries;
};

typedef struct set_table set_table;

typedef int set_foreach_callback_func(st_data_t, st_data_t);
typedef int set_foreach_check_callback_func(st_data_t, st_data_t, int);
typedef int set_update_callback_func(st_data_t *key, st_data_t arg, int existing);

#define set_table_size rb_set_table_size
size_t rb_set_table_size(const struct set_table *tbl);
#define set_init_table_with_size rb_set_init_table_with_size
set_table *rb_set_init_table_with_size(set_table *tab, const struct st_hash_type *, st_index_t);
#define set_init_numtable rb_set_init_numtable
set_table *rb_set_init_numtable(void);
#define set_init_numtable_with_size rb_set_init_numtable_with_size
set_table *rb_set_init_numtable_with_size(st_index_t size);
#define set_delete rb_set_delete
int rb_set_delete(set_table *, st_data_t *); /* returns 0:notfound 1:deleted */
#define set_insert rb_set_insert
int rb_set_insert(set_table *, st_data_t);
#define set_lookup rb_set_lookup
int rb_set_lookup(set_table *, st_data_t);
#define set_foreach_with_replace rb_set_foreach_with_replace
int rb_set_foreach_with_replace(set_table *tab, set_foreach_check_callback_func *func, set_update_callback_func *replace, st_data_t arg);
#define set_foreach rb_set_foreach
int rb_set_foreach(set_table *, set_foreach_callback_func *, st_data_t);
#define set_foreach_check rb_set_foreach_check
int rb_set_foreach_check(set_table *, set_foreach_check_callback_func *, st_data_t, st_data_t);
#define set_keys rb_set_keys
st_index_t rb_set_keys(set_table *table, st_data_t *keys, st_index_t size);
#define set_free_table rb_set_free_table
void rb_set_free_table(set_table *);
#define set_clear rb_set_clear
void rb_set_clear(set_table *);
#define set_copy rb_set_copy
set_table *rb_set_copy(set_table *new_table, set_table *old_table);
#define set_memsize rb_set_memsize
PUREFUNC(size_t rb_set_memsize(const set_table *));
#define set_compact_table rb_set_compact_table
void set_compact_table(set_table *tab);

#endif
Implement Set as a core class Set has been an autoloaded standard library since Ruby 3.2. The standard library Set is less efficient than it could be, as it uses Hash for storage, which stores unnecessary values for each key. Implementation details: * Core Set uses a modified version of `st_table`, named `set_table`. than `s/st_/set_/`, the main difference is that the stored records do not have values, making them 1/3 smaller. `st_table_entry` stores `hash`, `key`, and `record` (value), while `set_table_entry` only stores `hash` and `key`. This results in large sets using ~33% less memory compared to stdlib Set. For small sets, core Set uses 12% more memory (160 byte object slot and 64 malloc bytes, while stdlib set uses 40 for Set and 160 for Hash). More memory is used because the set_table is embedded and 72 bytes in the object slot are currently wasted. Hopefully we can make this more efficient and have it stored in an 80 byte object slot in the future. * All methods are implemented as cfuncs, except the pretty_print methods, which were moved to `lib/pp.rb` (which is where the pretty_print methods for other core classes are defined). As is typical for core classes, internal calls call C functions and not Ruby methods. For example, to check if something is a Set, `rb_obj_is_kind_of` is used, instead of calling `is_a?(Set)` on the related object. * Almost all methods use the same algorithm that the pure-Ruby implementation used. The exception is when calling `Set#divide` with a block with 2-arity. The pure-Ruby method used tsort to implement this. I developed an algorithm that only allocates a single intermediate hash and does not need tsort. * The `flatten_merge` protected method is no longer necessary, so it is not implemented (it could be). * Similar to Hash/Array, subclasses of Set are no longer reflected in `inspect` output. * RDoc from stdlib Set was moved to core Set, with minor updates. This includes a comprehensive benchmark suite for all public Set methods. As you would expect, the native version is faster in the vast majority of cases, and multiple times faster in many cases. There are a few cases where it is significantly slower: * Set.new with no arguments (~1.6x) * Set#compare_by_identity for small sets (~1.3x) * Set#clone for small sets (~1.5x) * Set#dup for small sets (~1.7x) These are slower as Set does not currently use the AR table optimization that Hash does, so a new set_table is initialized for each call. I'm not sure it's worth the complexity to have an AR table-like optimization for small sets (for hashes it makes sense, as small hashes are used everywhere in Ruby). The rbs and repl_type_completor bundled gems will need updates to support core Set. The pull request marks them as allowed failures. This passes all set tests with no changes. The following specs needed modification: * Modifying frozen set error message (changed for the better) * `Set#divide` when passed a 2-arity block no longer yields the same object as both the first and second argument (this seems like an issue with the previous implementation). * Set-like objects that override `is_a?` such that `is_a?(Set)` return `true` are no longer treated as Set instances. * `Set.allocate.hash` is no longer the same as `nil.hash` * `Set#join` no longer calls `Set#to_a` (it calls the underlying C function). * `Set#flatten_merge` protected method is not implemented. Previously, `set.rb` added a `SortedSet` autoload, which loads `set/sorted_set.rb`. This replaces the `Set` autoload in `prelude.rb` with a `SortedSet` autoload, but I recommend removing it and `set/sorted_set.rb`. This moves `test/set/test_set.rb` to `test/ruby/test_set.rb`, reflecting that switch to a core class. This does not move the spec files, as I'm not sure how they should be handled. Internally, this uses the st_* types and functions as much as possible, and only adds set_* types and functions as needed. The underlying set_table implementation is stored in st.c, but there is no public C-API for it, nor is there one planned, in order to keep the ability to change the internals going forward. For internal uses of st_table with Qtrue values, those can probably be replaced with set_table. To do that, include internal/set_table.h. To handle symbol visibility (rb_ prefix), internal/set_table.h uses the same macro approach that include/ruby/st.h uses. The Set class (rb_cSet) and all methods are defined in set.c. There isn't currently a C-API for the Set class, though C-API functions can be added as needed going forward. Implements [Feature #21216] Co-authored-by: Jean Boussier <jean.boussier@gmail.com> Co-authored-by: Oliver Nutter <mrnoname1000@riseup.net> 2025-04-05 17:57:43 -07:00			`#ifndef INTERNAL_SET_TABLE_H`
			`#define INTERNAL_SET_TABLE_H`

			`#include "include/ruby/st.h"`

			`struct set_table_entry;`

			`typedef struct set_table_entry set_table_entry;`

			`struct set_table {`
			`/* Cached features of the table -- see st.c for more details. */`
			`unsigned char entry_power, bin_power, size_ind;`
			`/* How many times the table was rebuilt. */`
			`unsigned int rebuilds_num;`
			`const struct st_hash_type *type;`
			`/* Number of entries currently in the table. */`
			`st_index_t num_entries;`
			`/* Array of bins used for access by keys. */`
			`st_index_t *bins;`
			`/* Start and bound index of entries in array entries.`
			`entries_starts and entries_bound are in interval`
			`[0,allocated_entries]. */`
			`st_index_t entries_start, entries_bound;`
			`/* Array of size 2^entry_power. */`
			`set_table_entry *entries;`
			`};`

			`typedef struct set_table set_table;`

			`typedef int set_foreach_callback_func(st_data_t, st_data_t);`
			`typedef int set_foreach_check_callback_func(st_data_t, st_data_t, int);`
			`typedef int set_update_callback_func(st_data_t *key, st_data_t arg, int existing);`

			`#define set_table_size rb_set_table_size`
			`size_t rb_set_table_size(const struct set_table *tbl);`
			`#define set_init_table_with_size rb_set_init_table_with_size`
			`set_table rb_set_init_table_with_size(set_table tab, const struct st_hash_type *, st_index_t);`
Use `set_table` to track const caches Now that we have a `set_table` implementation, we can use it to track const caches and save some memory. We could even save some more memory if `numtable` didn't store a copy of the `hash` and instead recomputed it every time, but this is a quick win. 2025-04-26 07:56:54 +02:00			`#define set_init_numtable rb_set_init_numtable`
			`set_table *rb_set_init_numtable(void);`
Optimize callcache invalidation for refinements Fixes [Bug #21201] This change addresses a performance regression where defining methods inside `refine` blocks caused severe slowdowns. The issue was due to `rb_clear_all_refinement_method_cache()` triggering a full object space scan via `rb_objspace_each_objects` to find and invalidate affected callcaches, which is very inefficient. To fix this, I introduce `vm->cc_refinement_table` to track callcaches related to refinements. This allows us to invalidate only the necessary callcaches without scanning the entire heap, resulting in significant performance improvement. 2025-04-06 01:50:08 +09:00			`#define set_init_numtable_with_size rb_set_init_numtable_with_size`
			`set_table *rb_set_init_numtable_with_size(st_index_t size);`
Implement Set as a core class Set has been an autoloaded standard library since Ruby 3.2. The standard library Set is less efficient than it could be, as it uses Hash for storage, which stores unnecessary values for each key. Implementation details: * Core Set uses a modified version of `st_table`, named `set_table`. than `s/st_/set_/`, the main difference is that the stored records do not have values, making them 1/3 smaller. `st_table_entry` stores `hash`, `key`, and `record` (value), while `set_table_entry` only stores `hash` and `key`. This results in large sets using ~33% less memory compared to stdlib Set. For small sets, core Set uses 12% more memory (160 byte object slot and 64 malloc bytes, while stdlib set uses 40 for Set and 160 for Hash). More memory is used because the set_table is embedded and 72 bytes in the object slot are currently wasted. Hopefully we can make this more efficient and have it stored in an 80 byte object slot in the future. * All methods are implemented as cfuncs, except the pretty_print methods, which were moved to `lib/pp.rb` (which is where the pretty_print methods for other core classes are defined). As is typical for core classes, internal calls call C functions and not Ruby methods. For example, to check if something is a Set, `rb_obj_is_kind_of` is used, instead of calling `is_a?(Set)` on the related object. * Almost all methods use the same algorithm that the pure-Ruby implementation used. The exception is when calling `Set#divide` with a block with 2-arity. The pure-Ruby method used tsort to implement this. I developed an algorithm that only allocates a single intermediate hash and does not need tsort. * The `flatten_merge` protected method is no longer necessary, so it is not implemented (it could be). * Similar to Hash/Array, subclasses of Set are no longer reflected in `inspect` output. * RDoc from stdlib Set was moved to core Set, with minor updates. This includes a comprehensive benchmark suite for all public Set methods. As you would expect, the native version is faster in the vast majority of cases, and multiple times faster in many cases. There are a few cases where it is significantly slower: * Set.new with no arguments (~1.6x) * Set#compare_by_identity for small sets (~1.3x) * Set#clone for small sets (~1.5x) * Set#dup for small sets (~1.7x) These are slower as Set does not currently use the AR table optimization that Hash does, so a new set_table is initialized for each call. I'm not sure it's worth the complexity to have an AR table-like optimization for small sets (for hashes it makes sense, as small hashes are used everywhere in Ruby). The rbs and repl_type_completor bundled gems will need updates to support core Set. The pull request marks them as allowed failures. This passes all set tests with no changes. The following specs needed modification: * Modifying frozen set error message (changed for the better) * `Set#divide` when passed a 2-arity block no longer yields the same object as both the first and second argument (this seems like an issue with the previous implementation). * Set-like objects that override `is_a?` such that `is_a?(Set)` return `true` are no longer treated as Set instances. * `Set.allocate.hash` is no longer the same as `nil.hash` * `Set#join` no longer calls `Set#to_a` (it calls the underlying C function). * `Set#flatten_merge` protected method is not implemented. Previously, `set.rb` added a `SortedSet` autoload, which loads `set/sorted_set.rb`. This replaces the `Set` autoload in `prelude.rb` with a `SortedSet` autoload, but I recommend removing it and `set/sorted_set.rb`. This moves `test/set/test_set.rb` to `test/ruby/test_set.rb`, reflecting that switch to a core class. This does not move the spec files, as I'm not sure how they should be handled. Internally, this uses the st_* types and functions as much as possible, and only adds set_* types and functions as needed. The underlying set_table implementation is stored in st.c, but there is no public C-API for it, nor is there one planned, in order to keep the ability to change the internals going forward. For internal uses of st_table with Qtrue values, those can probably be replaced with set_table. To do that, include internal/set_table.h. To handle symbol visibility (rb_ prefix), internal/set_table.h uses the same macro approach that include/ruby/st.h uses. The Set class (rb_cSet) and all methods are defined in set.c. There isn't currently a C-API for the Set class, though C-API functions can be added as needed going forward. Implements [Feature #21216] Co-authored-by: Jean Boussier <jean.boussier@gmail.com> Co-authored-by: Oliver Nutter <mrnoname1000@riseup.net> 2025-04-05 17:57:43 -07:00			`#define set_delete rb_set_delete`
			`int rb_set_delete(set_table , st_data_t ); /* returns 0:notfound 1:deleted */`
			`#define set_insert rb_set_insert`
			`int rb_set_insert(set_table *, st_data_t);`
			`#define set_lookup rb_set_lookup`
			`int rb_set_lookup(set_table *, st_data_t);`
			`#define set_foreach_with_replace rb_set_foreach_with_replace`
			`int rb_set_foreach_with_replace(set_table tab, set_foreach_check_callback_func func, set_update_callback_func *replace, st_data_t arg);`
			`#define set_foreach rb_set_foreach`
			`int rb_set_foreach(set_table , set_foreach_callback_func , st_data_t);`
			`#define set_foreach_check rb_set_foreach_check`
			`int rb_set_foreach_check(set_table , set_foreach_check_callback_func , st_data_t, st_data_t);`
			`#define set_keys rb_set_keys`
			`st_index_t rb_set_keys(set_table table, st_data_t keys, st_index_t size);`
			`#define set_free_table rb_set_free_table`
			`void rb_set_free_table(set_table *);`
			`#define set_clear rb_set_clear`
			`void rb_set_clear(set_table *);`
			`#define set_copy rb_set_copy`
			`set_table rb_set_copy(set_table new_table, set_table *old_table);`
			`#define set_memsize rb_set_memsize`
			`PUREFUNC(size_t rb_set_memsize(const set_table *));`
			`#define set_compact_table rb_set_compact_table`
			`void set_compact_table(set_table *tab);`

			`#endif`