bpo-32583: Fix possible crashing in builtin Unicode decoders (#5325)
When using customized decode error handlers, it is possible for builtin decoders to write out-of-bounds and then crash.
This commit is contained in:
parent
84521047e4
commit
2c7fd46e11
@ -1044,6 +1044,58 @@ class CodecCallbackTest(unittest.TestCase):
|
|||||||
for (encoding, data) in baddata:
|
for (encoding, data) in baddata:
|
||||||
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
|
self.assertEqual(data.decode(encoding, "test.mutating"), "\u4242")
|
||||||
|
|
||||||
|
# issue32583
|
||||||
|
def test_crashing_decode_handler(self):
|
||||||
|
# better generating one more character to fill the extra space slot
|
||||||
|
# so in debug build it can steadily fail
|
||||||
|
def forward_shorter_than_end(exc):
|
||||||
|
if isinstance(exc, UnicodeDecodeError):
|
||||||
|
# size one character, 0 < forward < exc.end
|
||||||
|
return ('\ufffd', exc.start+1)
|
||||||
|
else:
|
||||||
|
raise TypeError("don't know how to handle %r" % exc)
|
||||||
|
codecs.register_error(
|
||||||
|
"test.forward_shorter_than_end", forward_shorter_than_end)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
b'\xd8\xd8\xd8\xd8\xd8\x00\x00\x00'.decode(
|
||||||
|
'utf-16-le', 'test.forward_shorter_than_end'),
|
||||||
|
'\ufffd\ufffd\ufffd\ufffd\xd8\x00'
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
b'\xd8\xd8\xd8\xd8\x00\xd8\x00\x00'.decode(
|
||||||
|
'utf-16-be', 'test.forward_shorter_than_end'),
|
||||||
|
'\ufffd\ufffd\ufffd\ufffd\xd8\x00'
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
b'\x11\x11\x11\x11\x11\x00\x00\x00\x00\x00\x00'.decode(
|
||||||
|
'utf-32-le', 'test.forward_shorter_than_end'),
|
||||||
|
'\ufffd\ufffd\ufffd\u1111\x00'
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
b'\x11\x11\x11\x00\x00\x11\x11\x00\x00\x00\x00'.decode(
|
||||||
|
'utf-32-be', 'test.forward_shorter_than_end'),
|
||||||
|
'\ufffd\ufffd\ufffd\u1111\x00'
|
||||||
|
)
|
||||||
|
|
||||||
|
def replace_with_long(exc):
|
||||||
|
if isinstance(exc, UnicodeDecodeError):
|
||||||
|
exc.object = b"\x00" * 8
|
||||||
|
return ('\ufffd', exc.start)
|
||||||
|
else:
|
||||||
|
raise TypeError("don't know how to handle %r" % exc)
|
||||||
|
codecs.register_error("test.replace_with_long", replace_with_long)
|
||||||
|
|
||||||
|
self.assertEqual(
|
||||||
|
b'\x00'.decode('utf-16', 'test.replace_with_long'),
|
||||||
|
'\ufffd\x00\x00\x00\x00'
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
b'\x00'.decode('utf-32', 'test.replace_with_long'),
|
||||||
|
'\ufffd\x00\x00'
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_fake_error_class(self):
|
def test_fake_error_class(self):
|
||||||
handlers = [
|
handlers = [
|
||||||
codecs.strict_errors,
|
codecs.strict_errors,
|
||||||
|
@ -0,0 +1,2 @@
|
|||||||
|
Fix possible crashing in builtin Unicode decoders caused by write
|
||||||
|
out-of-bound errors when using customized decode error handlers.
|
@ -4190,7 +4190,10 @@ unicode_decode_call_errorhandler_writer(
|
|||||||
Py_ssize_t insize;
|
Py_ssize_t insize;
|
||||||
Py_ssize_t newpos;
|
Py_ssize_t newpos;
|
||||||
Py_ssize_t replen;
|
Py_ssize_t replen;
|
||||||
|
Py_ssize_t remain;
|
||||||
PyObject *inputobj = NULL;
|
PyObject *inputobj = NULL;
|
||||||
|
int need_to_grow = 0;
|
||||||
|
const char *new_inptr;
|
||||||
|
|
||||||
if (*errorHandler == NULL) {
|
if (*errorHandler == NULL) {
|
||||||
*errorHandler = PyCodec_LookupError(errors);
|
*errorHandler = PyCodec_LookupError(errors);
|
||||||
@ -4221,6 +4224,7 @@ unicode_decode_call_errorhandler_writer(
|
|||||||
inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
|
inputobj = PyUnicodeDecodeError_GetObject(*exceptionObject);
|
||||||
if (!inputobj)
|
if (!inputobj)
|
||||||
goto onError;
|
goto onError;
|
||||||
|
remain = *inend - *input - *endinpos;
|
||||||
*input = PyBytes_AS_STRING(inputobj);
|
*input = PyBytes_AS_STRING(inputobj);
|
||||||
insize = PyBytes_GET_SIZE(inputobj);
|
insize = PyBytes_GET_SIZE(inputobj);
|
||||||
*inend = *input + insize;
|
*inend = *input + insize;
|
||||||
@ -4238,6 +4242,19 @@ unicode_decode_call_errorhandler_writer(
|
|||||||
replen = PyUnicode_GET_LENGTH(repunicode);
|
replen = PyUnicode_GET_LENGTH(repunicode);
|
||||||
if (replen > 1) {
|
if (replen > 1) {
|
||||||
writer->min_length += replen - 1;
|
writer->min_length += replen - 1;
|
||||||
|
need_to_grow = 1;
|
||||||
|
}
|
||||||
|
new_inptr = *input + newpos;
|
||||||
|
if (*inend - new_inptr > remain) {
|
||||||
|
/* We don't know the decoding algorithm here so we make the worst
|
||||||
|
assumption that one byte decodes to one unicode character.
|
||||||
|
If unfortunately one byte could decode to more unicode characters,
|
||||||
|
the decoder may write out-of-bound then. Is it possible for the
|
||||||
|
algorithms using this function? */
|
||||||
|
writer->min_length += *inend - new_inptr - remain;
|
||||||
|
need_to_grow = 1;
|
||||||
|
}
|
||||||
|
if (need_to_grow) {
|
||||||
writer->overallocate = 1;
|
writer->overallocate = 1;
|
||||||
if (_PyUnicodeWriter_Prepare(writer, writer->min_length,
|
if (_PyUnicodeWriter_Prepare(writer, writer->min_length,
|
||||||
PyUnicode_MAX_CHAR_VALUE(repunicode)) == -1)
|
PyUnicode_MAX_CHAR_VALUE(repunicode)) == -1)
|
||||||
@ -4247,7 +4264,7 @@ unicode_decode_call_errorhandler_writer(
|
|||||||
goto onError;
|
goto onError;
|
||||||
|
|
||||||
*endinpos = newpos;
|
*endinpos = newpos;
|
||||||
*inptr = *input + newpos;
|
*inptr = new_inptr;
|
||||||
|
|
||||||
/* we made it! */
|
/* we made it! */
|
||||||
Py_DECREF(restuple);
|
Py_DECREF(restuple);
|
||||||
@ -5572,7 +5589,8 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/* Note: size will always be longer than the resulting Unicode
|
/* Note: size will always be longer than the resulting Unicode
|
||||||
character count */
|
character count normally. Error handler will take care of
|
||||||
|
resizing when needed. */
|
||||||
_PyUnicodeWriter_Init(&writer);
|
_PyUnicodeWriter_Init(&writer);
|
||||||
writer.min_length = (e - q + 1) / 2;
|
writer.min_length = (e - q + 1) / 2;
|
||||||
if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
|
if (_PyUnicodeWriter_Prepare(&writer, writer.min_length, 127) == -1)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user