_PyUnicode_CheckConsistency() also checks maxchar maximum value,

not only its minimum value
This commit is contained in:
Victor Stinner 2011-11-20 18:56:05 +01:00
parent d5c4022d2a
commit 77faf69ca1
2 changed files with 17 additions and 8 deletions

View File

@ -303,19 +303,22 @@ typedef struct {
- PyUnicode_1BYTE_KIND (1): - PyUnicode_1BYTE_KIND (1):
* character type = Py_UCS1 (8 bits, unsigned) * character type = Py_UCS1 (8 bits, unsigned)
* if ascii is set, all characters must be in range * all characters are in the range U+0000-U+00FF (latin1)
U+0000-U+007F, otherwise at least one character must be in range * if ascii is set, all characters are in the range U+0000-U+007F
(ASCII), otherwise at least one character is in the range
U+0080-U+00FF U+0080-U+00FF
- PyUnicode_2BYTE_KIND (2): - PyUnicode_2BYTE_KIND (2):
* character type = Py_UCS2 (16 bits, unsigned) * character type = Py_UCS2 (16 bits, unsigned)
* at least one character must be in range U+0100-U+FFFF * all characters are in the range U+0000-U+FFFF (BMP)
* at least one character is in the range U+0100-U+FFFF
- PyUnicode_4BYTE_KIND (4): - PyUnicode_4BYTE_KIND (4):
* character type = Py_UCS4 (32 bits, unsigned) * character type = Py_UCS4 (32 bits, unsigned)
* at least one character must be in range U+10000-U+10FFFF * all characters are in the range U+0000-U+10FFFF
* at least one character is in the range U+10000-U+10FFFF
*/ */
unsigned int kind:3; unsigned int kind:3;
/* Compact is with respect to the allocation scheme. Compact unicode /* Compact is with respect to the allocation scheme. Compact unicode
@ -323,7 +326,7 @@ typedef struct {
one block for the PyUnicodeObject struct and another for its data one block for the PyUnicodeObject struct and another for its data
buffer. */ buffer. */
unsigned int compact:1; unsigned int compact:1;
/* The string only contains characters in range U+0000-U+007F (ASCII) /* The string only contains characters in the range U+0000-U+007F (ASCII)
and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is
set, use the PyASCIIObject structure. */ set, use the PyASCIIObject structure. */
unsigned int ascii:1; unsigned int ascii:1;

View File

@ -392,15 +392,21 @@ _PyUnicode_CheckConsistency(PyObject *op, int check_content)
maxchar = ch; maxchar = ch;
} }
if (kind == PyUnicode_1BYTE_KIND) { if (kind == PyUnicode_1BYTE_KIND) {
if (ascii->state.ascii == 0) if (ascii->state.ascii == 0) {
assert(maxchar >= 128); assert(maxchar >= 128);
assert(maxchar <= 255);
}
else else
assert(maxchar < 128); assert(maxchar < 128);
} }
else if (kind == PyUnicode_2BYTE_KIND) else if (kind == PyUnicode_2BYTE_KIND) {
assert(maxchar >= 0x100); assert(maxchar >= 0x100);
else assert(maxchar <= 0xFFFF);
}
else {
assert(maxchar >= 0x10000); assert(maxchar >= 0x10000);
assert(maxchar <= 0x10FFFF);
}
} }
if (check_content && !unicode_is_singleton(op)) if (check_content && !unicode_is_singleton(op))
assert(ascii->hash == -1); assert(ascii->hash == -1);