Fix regression on 2-byte wchar_t systems (Windows)
This commit is contained in:
parent
28a08205c5
commit
0290c7a811
@ -6252,15 +6252,18 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
|
|||||||
end = s + size;
|
end = s + size;
|
||||||
|
|
||||||
while (s < end) {
|
while (s < end) {
|
||||||
|
Py_UNICODE uch;
|
||||||
Py_UCS4 ch;
|
Py_UCS4 ch;
|
||||||
/* We copy the raw representation one byte at a time because the
|
/* We copy the raw representation one byte at a time because the
|
||||||
pointer may be unaligned (see test_codeccallbacks). */
|
pointer may be unaligned (see test_codeccallbacks). */
|
||||||
((char *) &ch)[0] = s[0];
|
((char *) &uch)[0] = s[0];
|
||||||
((char *) &ch)[1] = s[1];
|
((char *) &uch)[1] = s[1];
|
||||||
#ifdef Py_UNICODE_WIDE
|
#ifdef Py_UNICODE_WIDE
|
||||||
((char *) &ch)[2] = s[2];
|
((char *) &uch)[2] = s[2];
|
||||||
((char *) &ch)[3] = s[3];
|
((char *) &uch)[3] = s[3];
|
||||||
#endif
|
#endif
|
||||||
|
ch = uch;
|
||||||
|
|
||||||
/* We have to sanity check the raw data, otherwise doom looms for
|
/* We have to sanity check the raw data, otherwise doom looms for
|
||||||
some malformed UCS-4 data. */
|
some malformed UCS-4 data. */
|
||||||
if (
|
if (
|
||||||
@ -6292,10 +6295,12 @@ _PyUnicode_DecodeUnicodeInternal(const char *s,
|
|||||||
#ifndef Py_UNICODE_WIDE
|
#ifndef Py_UNICODE_WIDE
|
||||||
if (ch >= 0xD800 && ch <= 0xDBFF && s < end)
|
if (ch >= 0xD800 && ch <= 0xDBFF && s < end)
|
||||||
{
|
{
|
||||||
Py_UCS4 ch2 = *(Py_UNICODE*)s;
|
Py_UNICODE uch2;
|
||||||
if (ch2 >= 0xDC00 && ch2 <= 0xDFFF)
|
((char *) &uch2)[0] = s[0];
|
||||||
|
((char *) &uch2)[1] = s[1];
|
||||||
|
if (uch2 >= 0xDC00 && uch2 <= 0xDFFF)
|
||||||
{
|
{
|
||||||
ch = (((ch & 0x3FF)<<10) | (ch2 & 0x3FF)) + 0x10000;
|
ch = (((uch & 0x3FF)<<10) | (uch2 & 0x3FF)) + 0x10000;
|
||||||
s += Py_UNICODE_SIZE;
|
s += Py_UNICODE_SIZE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user