8032012: String.toLowerCase/toUpperCase performance improvement
Updated the implementation to improve the performance Reviewed-by: psandoz, forax
This commit is contained in:
parent
7be40556f6
commit
8cdace2575
@ -2549,87 +2549,88 @@ public final class String
|
|||||||
if (locale == null) {
|
if (locale == null) {
|
||||||
throw new NullPointerException();
|
throw new NullPointerException();
|
||||||
}
|
}
|
||||||
|
int first;
|
||||||
int firstUpper;
|
boolean hasSurr = false;
|
||||||
final int len = value.length;
|
final int len = value.length;
|
||||||
|
|
||||||
/* Now check if there are any characters that need to be changed. */
|
// Now check if there are any characters that need to be changed, or are surrogate
|
||||||
scan: {
|
for (first = 0 ; first < len; first++) {
|
||||||
for (firstUpper = 0 ; firstUpper < len; ) {
|
int cp = (int)value[first];
|
||||||
char c = value[firstUpper];
|
if (Character.isSurrogate((char)cp)) {
|
||||||
if ((c >= Character.MIN_HIGH_SURROGATE)
|
hasSurr = true;
|
||||||
&& (c <= Character.MAX_HIGH_SURROGATE)) {
|
break;
|
||||||
int supplChar = codePointAt(firstUpper);
|
}
|
||||||
if (supplChar != Character.toLowerCase(supplChar)) {
|
if (cp != Character.toLowerCase(cp)) { // no need to check Character.ERROR
|
||||||
break scan;
|
break;
|
||||||
}
|
|
||||||
firstUpper += Character.charCount(supplChar);
|
|
||||||
} else {
|
|
||||||
if (c != Character.toLowerCase(c)) {
|
|
||||||
break scan;
|
|
||||||
}
|
|
||||||
firstUpper++;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return this;
|
|
||||||
}
|
}
|
||||||
|
if (first == len)
|
||||||
|
return this;
|
||||||
char[] result = new char[len];
|
char[] result = new char[len];
|
||||||
int resultOffset = 0; /* result may grow, so i+resultOffset
|
System.arraycopy(value, 0, result, 0, first); // Just copy the first few
|
||||||
* is the write location in result */
|
// lowerCase characters.
|
||||||
|
|
||||||
/* Just copy the first few lowerCase characters. */
|
|
||||||
System.arraycopy(value, 0, result, 0, firstUpper);
|
|
||||||
|
|
||||||
String lang = locale.getLanguage();
|
String lang = locale.getLanguage();
|
||||||
boolean localeDependent =
|
if (lang == "tr" || lang == "az" || lang == "lt") {
|
||||||
(lang == "tr" || lang == "az" || lang == "lt");
|
return toLowerCaseEx(result, first, locale, true);
|
||||||
char[] lowerCharArray;
|
}
|
||||||
int lowerChar;
|
if (hasSurr) {
|
||||||
int srcChar;
|
return toLowerCaseEx(result, first, locale, false);
|
||||||
|
}
|
||||||
|
for (int i = first; i < len; i++) {
|
||||||
|
int cp = (int)value[i];
|
||||||
|
if (cp == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
|
||||||
|
return toLowerCaseEx(result, i, locale, false);
|
||||||
|
}
|
||||||
|
cp = Character.toLowerCase(cp);
|
||||||
|
if (!Character.isBmpCodePoint(cp)) {
|
||||||
|
return toLowerCaseEx(result, i, locale, false);
|
||||||
|
}
|
||||||
|
result[i] = (char)cp;
|
||||||
|
}
|
||||||
|
return new String(result, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String toLowerCaseEx(char[] result, int first, Locale locale, boolean localeDependent) {
|
||||||
|
int resultOffset = first;
|
||||||
int srcCount;
|
int srcCount;
|
||||||
for (int i = firstUpper; i < len; i += srcCount) {
|
for (int i = first; i < value.length; i += srcCount) {
|
||||||
srcChar = (int)value[i];
|
int srcChar = (int)value[i];
|
||||||
if ((char)srcChar >= Character.MIN_HIGH_SURROGATE
|
int lowerChar;
|
||||||
&& (char)srcChar <= Character.MAX_HIGH_SURROGATE) {
|
char[] lowerCharArray;
|
||||||
|
srcCount = 1;
|
||||||
|
if (Character.isSurrogate((char)srcChar)) {
|
||||||
srcChar = codePointAt(i);
|
srcChar = codePointAt(i);
|
||||||
srcCount = Character.charCount(srcChar);
|
srcCount = Character.charCount(srcChar);
|
||||||
} else {
|
|
||||||
srcCount = 1;
|
|
||||||
}
|
}
|
||||||
if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
|
if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
|
||||||
lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
|
lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
|
||||||
} else {
|
} else {
|
||||||
lowerChar = Character.toLowerCase(srcChar);
|
lowerChar = Character.toLowerCase(srcChar);
|
||||||
}
|
}
|
||||||
if ((lowerChar == Character.ERROR)
|
if (Character.isBmpCodePoint(lowerChar)) { // Character.ERROR is not a bmp
|
||||||
|| (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
|
result[resultOffset++] = (char)lowerChar;
|
||||||
|
} else {
|
||||||
if (lowerChar == Character.ERROR) {
|
if (lowerChar == Character.ERROR) {
|
||||||
lowerCharArray =
|
lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
|
||||||
ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
|
|
||||||
} else if (srcCount == 2) {
|
} else if (srcCount == 2) {
|
||||||
resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;
|
resultOffset += Character.toChars(lowerChar, result, resultOffset);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
lowerCharArray = Character.toChars(lowerChar);
|
lowerCharArray = Character.toChars(lowerChar);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Grow result if needed */
|
/* Grow result if needed */
|
||||||
int mapLen = lowerCharArray.length;
|
int mapLen = lowerCharArray.length;
|
||||||
if (mapLen > srcCount) {
|
if (mapLen > srcCount) {
|
||||||
char[] result2 = new char[result.length + mapLen - srcCount];
|
char[] result2 = new char[result.length + mapLen - srcCount];
|
||||||
System.arraycopy(result, 0, result2, 0, i + resultOffset);
|
System.arraycopy(result, 0, result2, 0, resultOffset);
|
||||||
result = result2;
|
result = result2;
|
||||||
}
|
}
|
||||||
for (int x = 0; x < mapLen; ++x) {
|
for (int x = 0; x < mapLen; ++x) {
|
||||||
result[i + resultOffset + x] = lowerCharArray[x];
|
result[resultOffset++] = lowerCharArray[x];
|
||||||
}
|
}
|
||||||
resultOffset += (mapLen - srcCount);
|
|
||||||
} else {
|
|
||||||
result[i + resultOffset] = (char)lowerChar;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new String(result, 0, len + resultOffset);
|
return new String(result, 0, resultOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -2707,92 +2708,91 @@ public final class String
|
|||||||
if (locale == null) {
|
if (locale == null) {
|
||||||
throw new NullPointerException();
|
throw new NullPointerException();
|
||||||
}
|
}
|
||||||
|
int first;
|
||||||
int firstLower;
|
boolean hasSurr = false;
|
||||||
final int len = value.length;
|
final int len = value.length;
|
||||||
|
|
||||||
/* Now check if there are any characters that need to be changed. */
|
// Now check if there are any characters that need to be changed, or are surrogate
|
||||||
scan: {
|
for (first = 0 ; first < len; first++ ) {
|
||||||
for (firstLower = 0 ; firstLower < len; ) {
|
int cp = (int)value[first];
|
||||||
int c = (int)value[firstLower];
|
if (Character.isSurrogate((char)cp)) {
|
||||||
int srcCount;
|
hasSurr = true;
|
||||||
if ((c >= Character.MIN_HIGH_SURROGATE)
|
break;
|
||||||
&& (c <= Character.MAX_HIGH_SURROGATE)) {
|
|
||||||
c = codePointAt(firstLower);
|
|
||||||
srcCount = Character.charCount(c);
|
|
||||||
} else {
|
|
||||||
srcCount = 1;
|
|
||||||
}
|
|
||||||
int upperCaseChar = Character.toUpperCaseEx(c);
|
|
||||||
if ((upperCaseChar == Character.ERROR)
|
|
||||||
|| (c != upperCaseChar)) {
|
|
||||||
break scan;
|
|
||||||
}
|
|
||||||
firstLower += srcCount;
|
|
||||||
}
|
}
|
||||||
|
if (cp != Character.toUpperCaseEx(cp)) { // no need to check Character.ERROR
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (first == len) {
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
char[] result = new char[len];
|
||||||
/* result may grow, so i+resultOffset is the write location in result */
|
System.arraycopy(value, 0, result, 0, first); // Just copy the first few
|
||||||
int resultOffset = 0;
|
// upperCase characters.
|
||||||
char[] result = new char[len]; /* may grow */
|
|
||||||
|
|
||||||
/* Just copy the first few upperCase characters. */
|
|
||||||
System.arraycopy(value, 0, result, 0, firstLower);
|
|
||||||
|
|
||||||
String lang = locale.getLanguage();
|
String lang = locale.getLanguage();
|
||||||
boolean localeDependent =
|
if (lang == "tr" || lang == "az" || lang == "lt") {
|
||||||
(lang == "tr" || lang == "az" || lang == "lt");
|
return toUpperCaseEx(result, first, locale, true);
|
||||||
char[] upperCharArray;
|
}
|
||||||
int upperChar;
|
if (hasSurr) {
|
||||||
int srcChar;
|
return toUpperCaseEx(result, first, locale, false);
|
||||||
|
}
|
||||||
|
for (int i = first; i < len; i++) {
|
||||||
|
int cp = Character.toUpperCaseEx((int)value[i]);
|
||||||
|
if (!Character.isBmpCodePoint(cp)) { // Character.ERROR is not bmp
|
||||||
|
return toUpperCaseEx(result, i, locale, false);
|
||||||
|
}
|
||||||
|
result[i] = (char)cp;
|
||||||
|
}
|
||||||
|
return new String(result, true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String toUpperCaseEx(char[] result, int first, Locale locale,
|
||||||
|
boolean localeDependent) {
|
||||||
|
int resultOffset = first;
|
||||||
int srcCount;
|
int srcCount;
|
||||||
for (int i = firstLower; i < len; i += srcCount) {
|
for (int i = first; i < value.length; i += srcCount) {
|
||||||
srcChar = (int)value[i];
|
int srcChar = (int)value[i];
|
||||||
if ((char)srcChar >= Character.MIN_HIGH_SURROGATE &&
|
int upperChar;
|
||||||
(char)srcChar <= Character.MAX_HIGH_SURROGATE) {
|
char[] upperCharArray;
|
||||||
|
srcCount = 1;
|
||||||
|
if (Character.isSurrogate((char)srcChar)) {
|
||||||
srcChar = codePointAt(i);
|
srcChar = codePointAt(i);
|
||||||
srcCount = Character.charCount(srcChar);
|
srcCount = Character.charCount(srcChar);
|
||||||
} else {
|
|
||||||
srcCount = 1;
|
|
||||||
}
|
}
|
||||||
if (localeDependent) {
|
if (localeDependent) {
|
||||||
upperChar = ConditionalSpecialCasing.toUpperCaseEx(this, i, locale);
|
upperChar = ConditionalSpecialCasing.toUpperCaseEx(this, i, locale);
|
||||||
} else {
|
} else {
|
||||||
upperChar = Character.toUpperCaseEx(srcChar);
|
upperChar = Character.toUpperCaseEx(srcChar);
|
||||||
}
|
}
|
||||||
if ((upperChar == Character.ERROR)
|
if (Character.isBmpCodePoint(upperChar)) {
|
||||||
|| (upperChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
|
result[resultOffset++] = (char)upperChar;
|
||||||
|
} else {
|
||||||
if (upperChar == Character.ERROR) {
|
if (upperChar == Character.ERROR) {
|
||||||
if (localeDependent) {
|
if (localeDependent) {
|
||||||
upperCharArray =
|
upperCharArray =
|
||||||
ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale);
|
ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale);
|
||||||
} else {
|
} else {
|
||||||
upperCharArray = Character.toUpperCaseCharArray(srcChar);
|
upperCharArray = Character.toUpperCaseCharArray(srcChar);
|
||||||
}
|
}
|
||||||
} else if (srcCount == 2) {
|
} else if (srcCount == 2) {
|
||||||
resultOffset += Character.toChars(upperChar, result, i + resultOffset) - srcCount;
|
resultOffset += Character.toChars(upperChar, result, resultOffset);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
upperCharArray = Character.toChars(upperChar);
|
upperCharArray = Character.toChars(upperChar);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Grow result if needed */
|
/* Grow result if needed */
|
||||||
int mapLen = upperCharArray.length;
|
int mapLen = upperCharArray.length;
|
||||||
if (mapLen > srcCount) {
|
if (mapLen > srcCount) {
|
||||||
char[] result2 = new char[result.length + mapLen - srcCount];
|
char[] result2 = new char[result.length + mapLen - srcCount];
|
||||||
System.arraycopy(result, 0, result2, 0, i + resultOffset);
|
System.arraycopy(result, 0, result2, 0, resultOffset);
|
||||||
result = result2;
|
result = result2;
|
||||||
}
|
}
|
||||||
for (int x = 0; x < mapLen; ++x) {
|
for (int x = 0; x < mapLen; ++x) {
|
||||||
result[i + resultOffset + x] = upperCharArray[x];
|
result[resultOffset++] = upperCharArray[x];
|
||||||
}
|
}
|
||||||
resultOffset += (mapLen - srcCount);
|
|
||||||
} else {
|
|
||||||
result[i + resultOffset] = (char)upperChar;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return new String(result, 0, len + resultOffset);
|
return new String(result, 0, resultOffset);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
@test
|
@test
|
||||||
@bug 4217441 4533872 4900935 8020037
|
@bug 4217441 4533872 4900935 8020037 8032012
|
||||||
@summary toLowerCase should lower-case Greek Sigma correctly depending
|
@summary toLowerCase should lower-case Greek Sigma correctly depending
|
||||||
on the context (final/non-final). Also it should handle
|
on the context (final/non-final). Also it should handle
|
||||||
Locale specific (lt, tr, and az) lowercasings and supplementary
|
Locale specific (lt, tr, and az) lowercasings and supplementary
|
||||||
@ -104,6 +104,22 @@ public class ToLowerCase {
|
|||||||
// invalid code point tests:
|
// invalid code point tests:
|
||||||
test("\uD800\uD800\uD801A\uDC00\uDC00\uDC00B", Locale.US, "\uD800\uD800\uD801a\uDC00\uDC00\uDC00b");
|
test("\uD800\uD800\uD801A\uDC00\uDC00\uDC00B", Locale.US, "\uD800\uD800\uD801a\uDC00\uDC00\uDC00b");
|
||||||
|
|
||||||
|
// test bmp + supp1
|
||||||
|
StringBuilder src = new StringBuilder(0x20000);
|
||||||
|
StringBuilder exp = new StringBuilder(0x20000);
|
||||||
|
for (int cp = 0; cp < 0x20000; cp++) {
|
||||||
|
if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int lowerCase = Character.toLowerCase(cp);
|
||||||
|
if (lowerCase == -1) { //Character.ERROR
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
src.appendCodePoint(cp);
|
||||||
|
exp.appendCodePoint(lowerCase);
|
||||||
|
}
|
||||||
|
test(src.toString(), Locale.US, exp.toString());
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void test(String in, Locale locale, String expected) {
|
static void test(String in, Locale locale, String expected) {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user