8032012: String.toLowerCase/toUpperCase performance improvement

Updated the implementation to improve the performance Reviewed-by: psandoz, forax
2014-02-07 09:04:17 -08:00 · 2014-02-07 09:04:17 -08:00 · 8cdace2575
commit 8cdace2575
parent 7be40556f6
2 changed files with 120 additions and 104 deletions
--- a/jdk/src/share/classes/java/lang/String.java
+++ b/jdk/src/share/classes/java/lang/String.java
@ -2549,87 +2549,88 @@ public final class String
        if (locale == null) {
            throw new NullPointerException();
        }
-
+        int first;
-        int firstUpper;
+        boolean hasSurr = false;
        final int len = value.length;
-        /* Now check if there are any characters that need to be changed. */
+        // Now check if there are any characters that need to be changed, or are surrogate
-        scan: {
+        for (first = 0 ; first < len; first++) {
-            for (firstUpper = 0 ; firstUpper < len; ) {
+            int cp = (int)value[first];
-                char c = value[firstUpper];
+            if (Character.isSurrogate((char)cp)) {
-                if ((c >= Character.MIN_HIGH_SURROGATE)
+                hasSurr = true;
-                        && (c <= Character.MAX_HIGH_SURROGATE)) {
+                break;
-                    int supplChar = codePointAt(firstUpper);
+            }
-                    if (supplChar != Character.toLowerCase(supplChar)) {
+            if (cp != Character.toLowerCase(cp)) {  // no need to check Character.ERROR
-                        break scan;
+                break;
                    }
                    firstUpper += Character.charCount(supplChar);
                } else {
                    if (c != Character.toLowerCase(c)) {
                        break scan;
                    }
                    firstUpper++;
                }
            }
            return this;
        }
-
+        if (first == len)
            return this;
        char[] result = new char[len];
-        int resultOffset = 0;  /* result may grow, so i+resultOffset
+        System.arraycopy(value, 0, result, 0, first);  // Just copy the first few
-                                * is the write location in result */
+                                                       // lowerCase characters.
        /* Just copy the first few lowerCase characters. */
        System.arraycopy(value, 0, result, 0, firstUpper);
        String lang = locale.getLanguage();
-        boolean localeDependent =
+        if (lang == "tr" || lang == "az" || lang == "lt") {
-                (lang == "tr" || lang == "az" || lang == "lt");
+            return toLowerCaseEx(result, first, locale, true);
-        char[] lowerCharArray;
+        }
-        int lowerChar;
+        if (hasSurr) {
-        int srcChar;
+            return toLowerCaseEx(result, first, locale, false);
        }
        for (int i = first; i < len; i++) {
            int cp = (int)value[i];
            if (cp == '\u03A3') {                       // GREEK CAPITAL LETTER SIGMA
                return toLowerCaseEx(result, i, locale, false);
            }
            cp = Character.toLowerCase(cp);
            if (!Character.isBmpCodePoint(cp)) {
                return toLowerCaseEx(result, i, locale, false);
            }
            result[i] = (char)cp;
        }
        return new String(result, true);
    }
    private String toLowerCaseEx(char[] result, int first, Locale locale, boolean localeDependent) {
        int resultOffset = first;
        int srcCount;
-        for (int i = firstUpper; i < len; i += srcCount) {
+        for (int i = first; i < value.length; i += srcCount) {
-            srcChar = (int)value[i];
+            int srcChar = (int)value[i];
-            if ((char)srcChar >= Character.MIN_HIGH_SURROGATE
+            int lowerChar;
-                    && (char)srcChar <= Character.MAX_HIGH_SURROGATE) {
+            char[] lowerCharArray;
            srcCount = 1;
            if (Character.isSurrogate((char)srcChar)) {
                srcChar = codePointAt(i);
                srcCount = Character.charCount(srcChar);
            } else {
                srcCount = 1;
            }
            if (localeDependent || srcChar == '\u03A3') { // GREEK CAPITAL LETTER SIGMA
                lowerChar = ConditionalSpecialCasing.toLowerCaseEx(this, i, locale);
            } else {
                lowerChar = Character.toLowerCase(srcChar);
            }
-            if ((lowerChar == Character.ERROR)
+            if (Character.isBmpCodePoint(lowerChar)) {    // Character.ERROR is not a bmp
-                    || (lowerChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
+                result[resultOffset++] = (char)lowerChar;
            } else {
                if (lowerChar == Character.ERROR) {
-                    lowerCharArray =
+                    lowerCharArray = ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
                            ConditionalSpecialCasing.toLowerCaseCharArray(this, i, locale);
                } else if (srcCount == 2) {
-                    resultOffset += Character.toChars(lowerChar, result, i + resultOffset) - srcCount;
+                    resultOffset += Character.toChars(lowerChar, result, resultOffset);
                    continue;
                } else {
                    lowerCharArray = Character.toChars(lowerChar);
                }
                /* Grow result if needed */
                int mapLen = lowerCharArray.length;
                if (mapLen > srcCount) {
                    char[] result2 = new char[result.length + mapLen - srcCount];
-                    System.arraycopy(result, 0, result2, 0, i + resultOffset);
+                    System.arraycopy(result, 0, result2, 0, resultOffset);
                    result = result2;
                }
                for (int x = 0; x < mapLen; ++x) {
-                    result[i + resultOffset + x] = lowerCharArray[x];
+                    result[resultOffset++] = lowerCharArray[x];
                }
                resultOffset += (mapLen - srcCount);
            } else {
                result[i + resultOffset] = (char)lowerChar;
            }
        }
-        return new String(result, 0, len + resultOffset);
+        return new String(result, 0, resultOffset);
    }
    /**
@ -2707,92 +2708,91 @@ public final class String
        if (locale == null) {
            throw new NullPointerException();
        }
-
+        int first;
-        int firstLower;
+        boolean hasSurr = false;
        final int len = value.length;
-        /* Now check if there are any characters that need to be changed. */
+        // Now check if there are any characters that need to be changed, or are surrogate
-        scan: {
+        for (first = 0 ; first < len; first++ ) {
-            for (firstLower = 0 ; firstLower < len; ) {
+            int cp = (int)value[first];
-                int c = (int)value[firstLower];
+            if (Character.isSurrogate((char)cp)) {
-                int srcCount;
+                hasSurr = true;
-                if ((c >= Character.MIN_HIGH_SURROGATE)
+                break;
                        && (c <= Character.MAX_HIGH_SURROGATE)) {
                    c = codePointAt(firstLower);
                    srcCount = Character.charCount(c);
                } else {
                    srcCount = 1;
                }
                int upperCaseChar = Character.toUpperCaseEx(c);
                if ((upperCaseChar == Character.ERROR)
                        || (c != upperCaseChar)) {
                    break scan;
                }
                firstLower += srcCount;
            }
            if (cp != Character.toUpperCaseEx(cp)) {   // no need to check Character.ERROR
                break;
            }
        }
        if (first == len) {
            return this;
        }
-
+        char[] result = new char[len];
-        /* result may grow, so i+resultOffset is the write location in result */
+        System.arraycopy(value, 0, result, 0, first);  // Just copy the first few
-        int resultOffset = 0;
+                                                       // upperCase characters.
        char[] result = new char[len]; /* may grow */
        /* Just copy the first few upperCase characters. */
        System.arraycopy(value, 0, result, 0, firstLower);
        String lang = locale.getLanguage();
-        boolean localeDependent =
+        if (lang == "tr" || lang == "az" || lang == "lt") {
-                (lang == "tr" || lang == "az" || lang == "lt");
+            return toUpperCaseEx(result, first, locale, true);
-        char[] upperCharArray;
+        }
-        int upperChar;
+        if (hasSurr) {
-        int srcChar;
+            return toUpperCaseEx(result, first, locale, false);
        }
        for (int i = first; i < len; i++) {
            int cp = Character.toUpperCaseEx((int)value[i]);
            if (!Character.isBmpCodePoint(cp)) {    // Character.ERROR is not bmp
                return toUpperCaseEx(result, i, locale, false);
            }
            result[i] = (char)cp;
        }
        return new String(result, true);
    }
    private String toUpperCaseEx(char[] result, int first, Locale locale,
                                 boolean localeDependent) {
        int resultOffset = first;
        int srcCount;
-        for (int i = firstLower; i < len; i += srcCount) {
+        for (int i = first; i < value.length; i += srcCount) {
-            srcChar = (int)value[i];
+            int srcChar = (int)value[i];
-            if ((char)srcChar >= Character.MIN_HIGH_SURROGATE &&
+            int upperChar;
-                (char)srcChar <= Character.MAX_HIGH_SURROGATE) {
+            char[] upperCharArray;
            srcCount = 1;
            if (Character.isSurrogate((char)srcChar)) {
                srcChar = codePointAt(i);
                srcCount = Character.charCount(srcChar);
            } else {
                srcCount = 1;
            }
            if (localeDependent) {
                upperChar = ConditionalSpecialCasing.toUpperCaseEx(this, i, locale);
            } else {
                upperChar = Character.toUpperCaseEx(srcChar);
            }
-            if ((upperChar == Character.ERROR)
+            if (Character.isBmpCodePoint(upperChar)) {
-                    || (upperChar >= Character.MIN_SUPPLEMENTARY_CODE_POINT)) {
+                result[resultOffset++] = (char)upperChar;
            } else {
                if (upperChar == Character.ERROR) {
                    if (localeDependent) {
                        upperCharArray =
-                                ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale);
+                            ConditionalSpecialCasing.toUpperCaseCharArray(this, i, locale);
                    } else {
                        upperCharArray = Character.toUpperCaseCharArray(srcChar);
                    }
                } else if (srcCount == 2) {
-                    resultOffset += Character.toChars(upperChar, result, i + resultOffset) - srcCount;
+                    resultOffset += Character.toChars(upperChar, result, resultOffset);
                    continue;
                } else {
                    upperCharArray = Character.toChars(upperChar);
                }
                /* Grow result if needed */
                int mapLen = upperCharArray.length;
                if (mapLen > srcCount) {
                    char[] result2 = new char[result.length + mapLen - srcCount];
-                    System.arraycopy(result, 0, result2, 0, i + resultOffset);
+                    System.arraycopy(result, 0, result2, 0, resultOffset);
                    result = result2;
-                }
+                 }
-                for (int x = 0; x < mapLen; ++x) {
+                 for (int x = 0; x < mapLen; ++x) {
-                    result[i + resultOffset + x] = upperCharArray[x];
+                    result[resultOffset++] = upperCharArray[x];
-                }
+                 }
                resultOffset += (mapLen - srcCount);
            } else {
                result[i + resultOffset] = (char)upperChar;
            }
        }
-        return new String(result, 0, len + resultOffset);
+        return new String(result, 0, resultOffset);
    }
    /**
--- a/jdk/test/java/lang/String/ToLowerCase.java
+++ b/jdk/test/java/lang/String/ToLowerCase.java
@ -23,7 +23,7 @@
 /*
    @test
-    @bug 4217441 4533872 4900935 8020037
+    @bug 4217441 4533872 4900935 8020037 8032012
    @summary toLowerCase should lower-case Greek Sigma correctly depending
             on the context (final/non-final).  Also it should handle
             Locale specific (lt, tr, and az) lowercasings and supplementary
@ -104,6 +104,22 @@ public class ToLowerCase {
        // invalid code point tests:
        test("\uD800\uD800\uD801A\uDC00\uDC00\uDC00B", Locale.US, "\uD800\uD800\uD801a\uDC00\uDC00\uDC00b");
        // test bmp + supp1
        StringBuilder src = new StringBuilder(0x20000);
        StringBuilder exp = new StringBuilder(0x20000);
        for (int cp = 0; cp < 0x20000; cp++) {
            if (cp >= Character.MIN_HIGH_SURROGATE && cp <= Character.MAX_HIGH_SURROGATE) {
                continue;
            }
            int lowerCase = Character.toLowerCase(cp);
            if (lowerCase == -1) {    //Character.ERROR
                continue;
            }
            src.appendCodePoint(cp);
            exp.appendCodePoint(lowerCase);
        }
        test(src.toString(), Locale.US, exp.toString());
    }
    static void test(String in, Locale locale, String expected) {