Implement embedded TypedData objects

This commit adds a new flag RUBY_TYPED_EMBEDDABLE that allows the data
of a TypedData object to be embedded after the object itself. This will
improve cache locality and allow us to save the 8 byte data pointer.

Co-Authored-By: Jean Boussier <byroot@ruby-lang.org>
This commit is contained in:
Peter Zhu 2023-03-03 16:05:01 -05:00
parent bc07b0b9e1
commit 392238e3fd
3 changed files with 88 additions and 14 deletions

View File

@ -1322,7 +1322,7 @@ rb_check_typeddata(VALUE obj, const rb_data_type_t *data_type)
actual = rb_str_new_cstr(name); /* or rb_fstring_cstr? not sure... */ actual = rb_str_new_cstr(name); /* or rb_fstring_cstr? not sure... */
} }
else { else {
return DATA_PTR(obj); return RTYPEDDATA_GET_DATA(obj);
} }
const char *expected = data_type->wrap_struct_name; const char *expected = data_type->wrap_struct_name;

61
gc.c
View File

@ -3142,19 +3142,42 @@ rb_data_object_zalloc(VALUE klass, size_t size, RUBY_DATA_FUNC dmark, RUBY_DATA_
return obj; return obj;
} }
VALUE static VALUE
rb_data_typed_object_wrap(VALUE klass, void *datap, const rb_data_type_t *type) typed_data_alloc(VALUE klass, VALUE typed_flag, void *datap, const rb_data_type_t *type, size_t size)
{ {
RBIMPL_NONNULL_ARG(type); RBIMPL_NONNULL_ARG(type);
if (klass) rb_data_object_check(klass); if (klass) rb_data_object_check(klass);
bool wb_protected = (type->flags & RUBY_FL_WB_PROTECTED) || !type->function.dmark; bool wb_protected = (type->flags & RUBY_FL_WB_PROTECTED) || !type->function.dmark;
return newobj_of(GET_RACTOR(), klass, T_DATA, (VALUE)type, (VALUE)1, (VALUE)datap, wb_protected, sizeof(struct RTypedData)); return newobj_of(GET_RACTOR(), klass, T_DATA, (VALUE)type, 1 | typed_flag, (VALUE)datap, wb_protected, size);
}
VALUE
rb_data_typed_object_wrap(VALUE klass, void *datap, const rb_data_type_t *type)
{
if (UNLIKELY(type->flags & RUBY_TYPED_EMBEDDABLE)) {
rb_raise(rb_eTypeError, "Cannot wrap an embeddable TypedData");
}
return typed_data_alloc(klass, 0, datap, type, sizeof(struct RTypedData));
} }
VALUE VALUE
rb_data_typed_object_zalloc(VALUE klass, size_t size, const rb_data_type_t *type) rb_data_typed_object_zalloc(VALUE klass, size_t size, const rb_data_type_t *type)
{ {
VALUE obj = rb_data_typed_object_wrap(klass, 0, type); if (type->flags & RUBY_TYPED_EMBEDDABLE) {
if (!(type->flags & RUBY_TYPED_FREE_IMMEDIATELY)) {
rb_raise(rb_eTypeError, "Embeddable TypedData must be freed immediately");
}
size_t embed_size = offsetof(struct RTypedData, data) + size;
if (rb_gc_size_allocatable_p(embed_size)) {
VALUE obj = typed_data_alloc(klass, TYPED_DATA_EMBEDDED, 0, type, embed_size);
memset((char *)obj + offsetof(struct RTypedData, data), 0, size);
return obj;
}
}
VALUE obj = typed_data_alloc(klass, 0, NULL, type, sizeof(struct RTypedData));
DATA_PTR(obj) = xcalloc(1, size); DATA_PTR(obj) = xcalloc(1, size);
return obj; return obj;
} }
@ -3162,14 +3185,23 @@ rb_data_typed_object_zalloc(VALUE klass, size_t size, const rb_data_type_t *type
size_t size_t
rb_objspace_data_type_memsize(VALUE obj) rb_objspace_data_type_memsize(VALUE obj)
{ {
size_t size = 0;
if (RTYPEDDATA_P(obj)) { if (RTYPEDDATA_P(obj)) {
const rb_data_type_t *type = RTYPEDDATA_TYPE(obj); const rb_data_type_t *type = RTYPEDDATA_TYPE(obj);
const void *ptr = RTYPEDDATA_DATA(obj); const void *ptr = RTYPEDDATA_DATA(obj);
if (RTYPEDDATA_TYPE(obj)->flags & RUBY_TYPED_EMBEDDABLE && !RTYPEDDATA_EMBEDDED_P(obj)) {
#ifdef HAVE_MALLOC_USABLE_SIZE
size += malloc_usable_size((void *)ptr);
#endif
}
if (ptr && type->function.dsize) { if (ptr && type->function.dsize) {
return type->function.dsize(ptr); size += type->function.dsize(ptr);
} }
} }
return 0;
return size;
} }
const char * const char *
@ -3454,17 +3486,23 @@ rb_data_free(rb_objspace_t *objspace, VALUE obj)
if (dfree) { if (dfree) {
if (dfree == RUBY_DEFAULT_FREE) { if (dfree == RUBY_DEFAULT_FREE) {
xfree(data); if (!RTYPEDDATA_EMBEDDED_P(obj)) {
RB_DEBUG_COUNTER_INC(obj_data_xfree); xfree(data);
RB_DEBUG_COUNTER_INC(obj_data_xfree);
}
} }
else if (free_immediately) { else if (free_immediately) {
(*dfree)(data); (*dfree)(data);
if (RTYPEDDATA_TYPE(obj)->flags & RUBY_TYPED_EMBEDDABLE && !RTYPEDDATA_EMBEDDED_P(obj)) {
xfree(data);
}
RB_DEBUG_COUNTER_INC(obj_data_imm_free); RB_DEBUG_COUNTER_INC(obj_data_imm_free);
} }
else { else {
RB_DEBUG_COUNTER_INC(obj_data_zombie);
make_zombie(objspace, obj, dfree, data); make_zombie(objspace, obj, dfree, data);
return false; RB_DEBUG_COUNTER_INC(obj_data_zombie);
return FALSE;
} }
} }
else { else {
@ -7313,7 +7351,8 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj)
case T_DATA: case T_DATA:
{ {
void *const ptr = DATA_PTR(obj); void *const ptr = RTYPEDDATA_P(obj) ? RTYPEDDATA_GET_DATA(obj) : DATA_PTR(obj);
if (ptr) { if (ptr) {
if (RTYPEDDATA_P(obj) && gc_declarative_marking_p(any->as.typeddata.type)) { if (RTYPEDDATA_P(obj) && gc_declarative_marking_p(any->as.typeddata.type)) {
gc_mark_from_offset(objspace, obj); gc_mark_from_offset(objspace, obj);

View File

@ -114,6 +114,8 @@
#define RUBY_TYPED_PROMOTED1 RUBY_TYPED_PROMOTED1 #define RUBY_TYPED_PROMOTED1 RUBY_TYPED_PROMOTED1
/** @endcond */ /** @endcond */
#define TYPED_DATA_EMBEDDED 2
/** /**
* @private * @private
* *
@ -137,6 +139,8 @@ rbimpl_typeddata_flags {
*/ */
RUBY_TYPED_FREE_IMMEDIATELY = 1, RUBY_TYPED_FREE_IMMEDIATELY = 1,
RUBY_TYPED_EMBEDDABLE = 2,
/** /**
* This flag has something to do with Ractor. Multiple Ractors run without * This flag has something to do with Ractor. Multiple Ractors run without
* protecting each other. Sharing an object among Ractors is basically * protecting each other. Sharing an object among Ractors is basically
@ -460,7 +464,7 @@ RBIMPL_SYMBOL_EXPORT_END()
*/ */
#define TypedData_Make_Struct0(result, klass, type, size, data_type, sval) \ #define TypedData_Make_Struct0(result, klass, type, size, data_type, sval) \
VALUE result = rb_data_typed_object_zalloc(klass, size, data_type); \ VALUE result = rb_data_typed_object_zalloc(klass, size, data_type); \
(sval) = RBIMPL_CAST((type *)RTYPEDDATA_DATA(result)); \ (sval) = RTYPEDDATA_GET_DATA(result); \
RBIMPL_CAST(/*suppress unused variable warnings*/(void)(sval)) RBIMPL_CAST(/*suppress unused variable warnings*/(void)(sval))
/** /**
@ -511,6 +515,36 @@ RBIMPL_SYMBOL_EXPORT_END()
#define TypedData_Get_Struct(obj,type,data_type,sval) \ #define TypedData_Get_Struct(obj,type,data_type,sval) \
((sval) = RBIMPL_CAST((type *)rb_check_typeddata((obj), (data_type)))) ((sval) = RBIMPL_CAST((type *)rb_check_typeddata((obj), (data_type))))
static inline bool
RTYPEDDATA_EMBEDDED_P(VALUE obj)
{
#if RUBY_DEBUG
if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) {
Check_Type(obj, RUBY_T_DATA);
RBIMPL_UNREACHABLE_RETURN(false);
}
#endif
return RTYPEDDATA(obj)->typed_flag & TYPED_DATA_EMBEDDED;
}
static inline void *
RTYPEDDATA_GET_DATA(VALUE obj)
{
#if RUBY_DEBUG
if (RB_UNLIKELY(!RB_TYPE_P(obj, RUBY_T_DATA))) {
Check_Type(obj, RUBY_T_DATA);
RBIMPL_UNREACHABLE_RETURN(false);
}
#endif
/* We reuse the data pointer in embedded TypedData. We can't use offsetof
* since RTypedData a non-POD type in C++. */
const size_t embedded_typed_data_size = sizeof(struct RTypedData) - sizeof(void *);
return RTYPEDDATA_EMBEDDED_P(obj) ? (char *)obj + embedded_typed_data_size : RTYPEDDATA(obj)->data;
}
RBIMPL_ATTR_PURE() RBIMPL_ATTR_PURE()
RBIMPL_ATTR_ARTIFICIAL() RBIMPL_ATTR_ARTIFICIAL()
/** /**
@ -527,7 +561,8 @@ RBIMPL_ATTR_ARTIFICIAL()
static inline bool static inline bool
rbimpl_rtypeddata_p(VALUE obj) rbimpl_rtypeddata_p(VALUE obj)
{ {
return RTYPEDDATA(obj)->typed_flag == 1; VALUE typed_flag = RTYPEDDATA(obj)->typed_flag;
return typed_flag != 0 && typed_flag <= 3;
} }
RBIMPL_ATTR_PURE_UNLESS_DEBUG() RBIMPL_ATTR_PURE_UNLESS_DEBUG()