[Bug #20009] Support marshaling non-ASCII name class/module
This commit is contained in:
parent
5e01c0e4e2
commit
097d742a1e
Notes:
git
2025-05-16 12:22:46 +00:00
89
marshal.c
89
marshal.c
@ -460,6 +460,31 @@ w_float(double d, struct dump_arg *arg)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static VALUE
|
||||
w_encivar(VALUE str, struct dump_arg *arg)
|
||||
{
|
||||
VALUE encname = encoding_name(str, arg);
|
||||
if (NIL_P(encname) ||
|
||||
is_ascii_string(str)) {
|
||||
return Qnil;
|
||||
}
|
||||
w_byte(TYPE_IVAR, arg);
|
||||
return encname;
|
||||
}
|
||||
|
||||
static void
|
||||
w_encname(VALUE encname, struct dump_arg *arg)
|
||||
{
|
||||
if (!NIL_P(encname)) {
|
||||
struct dump_call_arg c_arg;
|
||||
c_arg.limit = 1;
|
||||
c_arg.arg = arg;
|
||||
w_long(1L, arg);
|
||||
w_encoding(encname, &c_arg);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
w_symbol(VALUE sym, struct dump_arg *arg)
|
||||
{
|
||||
@ -476,24 +501,11 @@ w_symbol(VALUE sym, struct dump_arg *arg)
|
||||
if (!sym) {
|
||||
rb_raise(rb_eTypeError, "can't dump anonymous ID %"PRIdVALUE, sym);
|
||||
}
|
||||
encname = encoding_name(sym, arg);
|
||||
if (NIL_P(encname) ||
|
||||
is_ascii_string(sym)) {
|
||||
encname = Qnil;
|
||||
}
|
||||
else {
|
||||
w_byte(TYPE_IVAR, arg);
|
||||
}
|
||||
encname = w_encivar(sym, arg);
|
||||
w_byte(TYPE_SYMBOL, arg);
|
||||
w_bytes(RSTRING_PTR(sym), RSTRING_LEN(sym), arg);
|
||||
st_add_direct(arg->symbols, orig_sym, arg->symbols->num_entries);
|
||||
if (!NIL_P(encname)) {
|
||||
struct dump_call_arg c_arg;
|
||||
c_arg.limit = 1;
|
||||
c_arg.arg = arg;
|
||||
w_long(1L, arg);
|
||||
w_encoding(encname, &c_arg);
|
||||
}
|
||||
w_encname(encname, arg);
|
||||
}
|
||||
}
|
||||
|
||||
@ -953,19 +965,23 @@ w_object(VALUE obj, struct dump_arg *arg, int limit)
|
||||
if (FL_TEST(obj, FL_SINGLETON)) {
|
||||
rb_raise(rb_eTypeError, "singleton class can't be dumped");
|
||||
}
|
||||
w_byte(TYPE_CLASS, arg);
|
||||
{
|
||||
VALUE path = class2path(obj);
|
||||
VALUE encname = w_encivar(path, arg);
|
||||
w_byte(TYPE_CLASS, arg);
|
||||
w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
|
||||
w_encname(encname, arg);
|
||||
RB_GC_GUARD(path);
|
||||
}
|
||||
break;
|
||||
|
||||
case T_MODULE:
|
||||
w_byte(TYPE_MODULE, arg);
|
||||
{
|
||||
VALUE path = class2path(obj);
|
||||
VALUE encname = w_encivar(path, arg);
|
||||
w_byte(TYPE_MODULE, arg);
|
||||
w_bytes(RSTRING_PTR(path), RSTRING_LEN(path), arg);
|
||||
w_encname(encname, arg);
|
||||
RB_GC_GUARD(path);
|
||||
}
|
||||
break;
|
||||
@ -1707,6 +1723,34 @@ r_copy_ivar(VALUE v, VALUE data)
|
||||
"can't override instance variable of "type" '%"PRIsVALUE"'", \
|
||||
(str))
|
||||
|
||||
static int
|
||||
r_ivar_encoding(VALUE obj, struct load_arg *arg, VALUE sym, VALUE val)
|
||||
{
|
||||
int idx = sym2encidx(sym, val);
|
||||
if (idx >= 0) {
|
||||
if (rb_enc_capable(obj)) {
|
||||
rb_enc_associate_index(obj, idx);
|
||||
}
|
||||
else {
|
||||
rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
static long
|
||||
r_encname(VALUE obj, struct load_arg *arg)
|
||||
{
|
||||
long len = r_long(arg);
|
||||
if (len > 0) {
|
||||
VALUE sym = r_symbol(arg);
|
||||
VALUE val = r_object(arg);
|
||||
len -= r_ivar_encoding(obj, arg, sym, val);
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
||||
static void
|
||||
r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
|
||||
{
|
||||
@ -1723,14 +1767,7 @@ r_ivar(VALUE obj, int *has_encoding, struct load_arg *arg)
|
||||
do {
|
||||
VALUE sym = r_symbol(arg);
|
||||
VALUE val = r_object(arg);
|
||||
int idx = sym2encidx(sym, val);
|
||||
if (idx >= 0) {
|
||||
if (rb_enc_capable(obj)) {
|
||||
rb_enc_associate_index(obj, idx);
|
||||
}
|
||||
else {
|
||||
rb_raise(rb_eArgError, "%"PRIsVALUE" is not enc_capable", obj);
|
||||
}
|
||||
if (r_ivar_encoding(obj, arg, sym, val)) {
|
||||
if (has_encoding) *has_encoding = TRUE;
|
||||
}
|
||||
else if (symname_equal_lit(sym, name_s_ruby2_keywords_flag)) {
|
||||
@ -2254,6 +2291,7 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
|
||||
{
|
||||
VALUE str = r_bytes(arg);
|
||||
|
||||
if (ivp && *ivp > 0) *ivp = r_encname(str, arg) > 0;
|
||||
v = path2class(str);
|
||||
prohibit_ivar("class", str);
|
||||
v = r_entry(v, arg);
|
||||
@ -2265,6 +2303,7 @@ r_object_for(struct load_arg *arg, bool partial, int *ivp, VALUE extmod, int typ
|
||||
{
|
||||
VALUE str = r_bytes(arg);
|
||||
|
||||
if (ivp && *ivp > 0) *ivp = r_encname(str, arg) > 0;
|
||||
v = path2module(str);
|
||||
prohibit_ivar("module", str);
|
||||
v = r_entry(v, arg);
|
||||
|
@ -231,9 +231,12 @@ describe "Marshal.dump" do
|
||||
Marshal.dump(MarshalSpec::ClassWithOverriddenName).should == "\x04\bc)MarshalSpec::ClassWithOverriddenName"
|
||||
end
|
||||
|
||||
it "dumps a class with multibyte characters in name" do
|
||||
source_object = eval("MarshalSpec::MultibyteぁあぃいClass".dup.force_encoding(Encoding::UTF_8))
|
||||
Marshal.dump(source_object).should == "\x04\bc,MarshalSpec::Multibyte\xE3\x81\x81\xE3\x81\x82\xE3\x81\x83\xE3\x81\x84Class"
|
||||
ruby_version_is "3.5" do
|
||||
it "dumps a class with multibyte characters in name" do
|
||||
source_object = eval("MarshalSpec::MultibyteぁあぃいClass".dup.force_encoding(Encoding::UTF_8))
|
||||
Marshal.dump(source_object).should == "\x04\bIc,MarshalSpec::Multibyte\xE3\x81\x81\xE3\x81\x82\xE3\x81\x83\xE3\x81\x84Class\x06:\x06ET"
|
||||
Marshal.load(Marshal.dump(source_object)) == source_object
|
||||
end
|
||||
end
|
||||
|
||||
it "uses object links for objects repeatedly dumped" do
|
||||
@ -258,9 +261,12 @@ describe "Marshal.dump" do
|
||||
Marshal.dump(MarshalSpec::ModuleWithOverriddenName).should == "\x04\bc*MarshalSpec::ModuleWithOverriddenName"
|
||||
end
|
||||
|
||||
it "dumps a module with multibyte characters in name" do
|
||||
source_object = eval("MarshalSpec::MultibyteけげこごModule".dup.force_encoding(Encoding::UTF_8))
|
||||
Marshal.dump(source_object).should == "\x04\bm-MarshalSpec::Multibyte\xE3\x81\x91\xE3\x81\x92\xE3\x81\x93\xE3\x81\x94Module"
|
||||
ruby_version_is "3.5" do
|
||||
it "dumps a module with multibyte characters in name" do
|
||||
source_object = eval("MarshalSpec::MultibyteけげこごModule".dup.force_encoding(Encoding::UTF_8))
|
||||
Marshal.dump(source_object).should == "\x04\bIm-MarshalSpec::Multibyte\xE3\x81\x91\xE3\x81\x92\xE3\x81\x93\xE3\x81\x94Module\x06:\x06ET"
|
||||
Marshal.load(Marshal.dump(source_object)) == source_object
|
||||
end
|
||||
end
|
||||
|
||||
it "uses object links for objects repeatedly dumped" do
|
||||
@ -874,9 +880,12 @@ describe "Marshal.dump" do
|
||||
Marshal.dump(obj).should include("MarshalSpec::TimeWithOverriddenName")
|
||||
end
|
||||
|
||||
it "dumps a Time subclass with multibyte characters in name" do
|
||||
source_object = eval("MarshalSpec::MultibyteぁあぃいTime".dup.force_encoding(Encoding::UTF_8))
|
||||
Marshal.dump(source_object).should == "\x04\bc+MarshalSpec::Multibyte\xE3\x81\x81\xE3\x81\x82\xE3\x81\x83\xE3\x81\x84Time"
|
||||
ruby_version_is "3.5" do
|
||||
it "dumps a Time subclass with multibyte characters in name" do
|
||||
source_object = eval("MarshalSpec::MultibyteぁあぃいTime".dup.force_encoding(Encoding::UTF_8))
|
||||
Marshal.dump(source_object).should == "\x04\bIc+MarshalSpec::Multibyte\xE3\x81\x81\xE3\x81\x82\xE3\x81\x83\xE3\x81\x84Time\x06:\x06ET"
|
||||
Marshal.load(Marshal.dump(source_object)) == source_object
|
||||
end
|
||||
end
|
||||
|
||||
it "uses object links for objects repeatedly dumped" do
|
||||
|
@ -268,7 +268,11 @@ class TestMarshal < Test::Unit::TestCase
|
||||
classISO8859_1.name
|
||||
ClassISO8859_1 = classISO8859_1
|
||||
|
||||
def test_class_nonascii
|
||||
moduleUTF8 = const_set("C\u{30af 30e9 30b9}", Module.new)
|
||||
moduleUTF8.name
|
||||
ModuleUTF8 = moduleUTF8
|
||||
|
||||
def test_nonascii_class_instance
|
||||
a = ClassUTF8.new
|
||||
assert_instance_of(ClassUTF8, Marshal.load(Marshal.dump(a)), '[ruby-core:24790]')
|
||||
|
||||
@ -301,6 +305,12 @@ class TestMarshal < Test::Unit::TestCase
|
||||
end
|
||||
end
|
||||
|
||||
def test_nonascii_class_module
|
||||
assert_same(ClassUTF8, Marshal.load(Marshal.dump(ClassUTF8)))
|
||||
assert_same(ClassISO8859_1, Marshal.load(Marshal.dump(ClassISO8859_1)))
|
||||
assert_same(ModuleUTF8, Marshal.load(Marshal.dump(ModuleUTF8)))
|
||||
end
|
||||
|
||||
def test_regexp2
|
||||
assert_equal(/\\u/, Marshal.load("\004\b/\b\\\\u\000"))
|
||||
assert_equal(/u/, Marshal.load("\004\b/\a\\u\000"))
|
||||
|
Loading…
x
Reference in New Issue
Block a user