_PyUnicode_CheckConsistency() also checks maxchar maximum value,
not only its minimum value
This commit is contained in:
parent
d5c4022d2a
commit
77faf69ca1
@ -303,19 +303,22 @@ typedef struct {
|
|||||||
- PyUnicode_1BYTE_KIND (1):
|
- PyUnicode_1BYTE_KIND (1):
|
||||||
|
|
||||||
* character type = Py_UCS1 (8 bits, unsigned)
|
* character type = Py_UCS1 (8 bits, unsigned)
|
||||||
* if ascii is set, all characters must be in range
|
* all characters are in the range U+0000-U+00FF (latin1)
|
||||||
U+0000-U+007F, otherwise at least one character must be in range
|
* if ascii is set, all characters are in the range U+0000-U+007F
|
||||||
|
(ASCII), otherwise at least one character is in the range
|
||||||
U+0080-U+00FF
|
U+0080-U+00FF
|
||||||
|
|
||||||
- PyUnicode_2BYTE_KIND (2):
|
- PyUnicode_2BYTE_KIND (2):
|
||||||
|
|
||||||
* character type = Py_UCS2 (16 bits, unsigned)
|
* character type = Py_UCS2 (16 bits, unsigned)
|
||||||
* at least one character must be in range U+0100-U+FFFF
|
* all characters are in the range U+0000-U+FFFF (BMP)
|
||||||
|
* at least one character is in the range U+0100-U+FFFF
|
||||||
|
|
||||||
- PyUnicode_4BYTE_KIND (4):
|
- PyUnicode_4BYTE_KIND (4):
|
||||||
|
|
||||||
* character type = Py_UCS4 (32 bits, unsigned)
|
* character type = Py_UCS4 (32 bits, unsigned)
|
||||||
* at least one character must be in range U+10000-U+10FFFF
|
* all characters are in the range U+0000-U+10FFFF
|
||||||
|
* at least one character is in the range U+10000-U+10FFFF
|
||||||
*/
|
*/
|
||||||
unsigned int kind:3;
|
unsigned int kind:3;
|
||||||
/* Compact is with respect to the allocation scheme. Compact unicode
|
/* Compact is with respect to the allocation scheme. Compact unicode
|
||||||
@ -323,7 +326,7 @@ typedef struct {
|
|||||||
one block for the PyUnicodeObject struct and another for its data
|
one block for the PyUnicodeObject struct and another for its data
|
||||||
buffer. */
|
buffer. */
|
||||||
unsigned int compact:1;
|
unsigned int compact:1;
|
||||||
/* The string only contains characters in range U+0000-U+007F (ASCII)
|
/* The string only contains characters in the range U+0000-U+007F (ASCII)
|
||||||
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
|
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
|
||||||
set, use the PyASCIIObject structure. */
|
set, use the PyASCIIObject structure. */
|
||||||
unsigned int ascii:1;
|
unsigned int ascii:1;
|
||||||
|
@ -392,15 +392,21 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
|
|||||||
maxchar = ch;
|
maxchar = ch;
|
||||||
}
|
}
|
||||||
if (kind == PyUnicode_1BYTE_KIND) {
|
if (kind == PyUnicode_1BYTE_KIND) {
|
||||||
if (ascii->state.ascii == 0)
|
if (ascii->state.ascii == 0) {
|
||||||
assert(maxchar >= 128);
|
assert(maxchar >= 128);
|
||||||
|
assert(maxchar <= 255);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
assert(maxchar < 128);
|
assert(maxchar < 128);
|
||||||
}
|
}
|
||||||
else if (kind == PyUnicode_2BYTE_KIND)
|
else if (kind == PyUnicode_2BYTE_KIND) {
|
||||||
assert(maxchar >= 0x100);
|
assert(maxchar >= 0x100);
|
||||||
else
|
assert(maxchar <= 0xFFFF);
|
||||||
|
}
|
||||||
|
else {
|
||||||
assert(maxchar >= 0x10000);
|
assert(maxchar >= 0x10000);
|
||||||
|
assert(maxchar <= 0x10FFFF);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (check_content && !unicode_is_singleton(op))
|
if (check_content && !unicode_is_singleton(op))
|
||||||
assert(ascii->hash == -1);
|
assert(ascii->hash == -1);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user