Speedup find_maxchar_surrogates() for 32-bit wchar_t
If we have at least one character in U+10000-U+10FFFF, we know that we must use PyUnicode_4BYTE_KIND kind.
This commit is contained in:
parent
b9275c104e
commit
ae86485517
@ -1060,19 +1060,17 @@ find_maxchar_surrogates(const wchar_t *begin, const wchar_t *end,
|
|||||||
const wchar_t *iter;
|
const wchar_t *iter;
|
||||||
|
|
||||||
assert(num_surrogates != NULL && maxchar != NULL);
|
assert(num_surrogates != NULL && maxchar != NULL);
|
||||||
if (num_surrogates == NULL || maxchar == NULL) {
|
|
||||||
PyErr_SetString(PyExc_SystemError,
|
|
||||||
"unexpected NULL arguments to "
|
|
||||||
"PyUnicode_FindMaxCharAndNumSurrogatePairs");
|
|
||||||
return -1;
|
|
||||||
}
|
|
||||||
|
|
||||||
*num_surrogates = 0;
|
*num_surrogates = 0;
|
||||||
*maxchar = 0;
|
*maxchar = 0;
|
||||||
|
|
||||||
for (iter = begin; iter < end; ) {
|
for (iter = begin; iter < end; ) {
|
||||||
if (*iter > *maxchar)
|
if (*iter > *maxchar) {
|
||||||
*maxchar = *iter;
|
*maxchar = *iter;
|
||||||
|
#if SIZEOF_WCHAR_T != 2
|
||||||
|
if (*maxchar >= 0x10000)
|
||||||
|
return 0;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
#if SIZEOF_WCHAR_T == 2
|
#if SIZEOF_WCHAR_T == 2
|
||||||
if (*iter >= 0xD800 && *iter <= 0xDBFF
|
if (*iter >= 0xD800 && *iter <= 0xDBFF
|
||||||
&& (iter+1) < end && iter[1] >= 0xDC00 && iter[1] <= 0xDFFF)
|
&& (iter+1) < end && iter[1] >= 0xDC00 && iter[1] <= 0xDFFF)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user