7037261: j.l.Character.isLowerCase/isUpperCase need to match the Unicode Standard

Updated j.l.c.lsLowerCase/isUpperCase Reviewed-by: okutsu
2011-04-28 20:18:57 -07:00 · 2011-04-28 20:18:57 -07:00 · 4463efaf61
commit 4463efaf61
parent faa5e45f7a
14 changed files with 3147 additions and 63 deletions
--- a/jdk/make/java/java/FILES_java.gmk
+++ b/jdk/make/java/java/FILES_java.gmk
@ -356,6 +356,7 @@ JAVA_JAVA_java = \
    java/util/regex/Matcher.java \
    java/util/regex/MatchResult.java \
    java/util/regex/ASCII.java \
+    java/util/regex/UnicodeProp.java \
    java/util/regex/PatternSyntaxException.java \
    java/util/prefs/Preferences.java \
        java/util/prefs/AbstractPreferences.java \
--- a/jdk/make/java/java/Makefile
+++ b/jdk/make/java/java/Makefile
@ -345,30 +345,35 @@ $(GENSRCDIR)/java/lang/CharacterDataLatin1.java \
 		-template $(CHARACTERDATA)/CharacterDataLatin1.java.template \
 		-spec $(UNICODEDATA)/UnicodeData.txt \
 		-specialcasing $(UNICODEDATA)/SpecialCasing.txt \
+		-proplist $(UNICODEDATA)/PropList.txt \
 		-o $(GENSRCDIR)/java/lang/CharacterDataLatin1.java -string \
 		-usecharforbyte -latin1 8
 	$(BOOT_JAVA_CMD) -jar $(GENERATECHARACTER_JARFILE) -plane 0 \
 		-template $(CHARACTERDATA)/CharacterData00.java.template \
 		-spec $(UNICODEDATA)/UnicodeData.txt \
 		-specialcasing $(UNICODEDATA)/SpecialCasing.txt \
+		-proplist $(UNICODEDATA)/PropList.txt \
 		-o $(GENSRCDIR)/java/lang/CharacterData00.java -string \
 		-usecharforbyte 11 4 1
 	$(BOOT_JAVA_CMD) -jar $(GENERATECHARACTER_JARFILE) -plane 1 \
 		-template $(CHARACTERDATA)/CharacterData01.java.template \
 		-spec $(UNICODEDATA)/UnicodeData.txt \
 		-specialcasing $(UNICODEDATA)/SpecialCasing.txt \
+		-proplist $(UNICODEDATA)/PropList.txt \
 		-o $(GENSRCDIR)/java/lang/CharacterData01.java -string \
 		-usecharforbyte  11 4 1
 	$(BOOT_JAVA_CMD) -jar $(GENERATECHARACTER_JARFILE) -plane 2 \
 		-template $(CHARACTERDATA)/CharacterData02.java.template \
 		-spec $(UNICODEDATA)/UnicodeData.txt \
 		-specialcasing $(UNICODEDATA)/SpecialCasing.txt \
+		-proplist $(UNICODEDATA)/PropList.txt \
 		-o $(GENSRCDIR)/java/lang/CharacterData02.java -string \
 		-usecharforbyte 11 4 1
 	$(BOOT_JAVA_CMD) -jar $(GENERATECHARACTER_JARFILE) -plane 14 \
 		-template $(CHARACTERDATA)/CharacterData0E.java.template \
 		-spec $(UNICODEDATA)/UnicodeData.txt \
 		-specialcasing $(UNICODEDATA)/SpecialCasing.txt \
+		-proplist $(UNICODEDATA)/PropList.txt \
 		-o $(GENSRCDIR)/java/lang/CharacterData0E.java -string \
 		-usecharforbyte 11 4 1

--- a/jdk/make/tools/GenerateCharacter/CharacterData00.java.template
+++ b/jdk/make/tools/GenerateCharacter/CharacterData00.java.template
@ -73,11 +73,37 @@ class CharacterData00 extends CharacterData {
        return props;
    }

+    int getPropertiesEx(int ch) {
+        char offset = (char)ch;
+        int props = $$LookupEx(offset);
+        return props;
+    }
+
    int getType(int ch) {
        int props = getProperties(ch);
        return (props & $$maskType);
    }

+    boolean isOtherLowercase(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherLowercase) != 0;
+    }
+
+    boolean isOtherUppercase(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherUppercase) != 0;
+    }
+
+    boolean isOtherAlphabetic(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherAlphabetic) != 0;
+    }
+
+    boolean isIdeographic(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskIdeographic) != 0;
+    }
+
    boolean isJavaIdentifierStart(int ch) {
        int props = getProperties(ch);
        return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
--- a/jdk/make/tools/GenerateCharacter/CharacterData01.java.template
+++ b/jdk/make/tools/GenerateCharacter/CharacterData01.java.template
@ -72,11 +72,37 @@ class CharacterData01 extends CharacterData {
        return props;
    }

+    int getPropertiesEx(int ch) {
+        char offset = (char)ch;
+        int props = $$LookupEx(offset);
+        return props;
+    }
+
    int getType(int ch) {
        int props = getProperties(ch);
        return (props & $$maskType);
    }

+    boolean isOtherLowercase(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherLowercase) != 0;
+    }
+
+    boolean isOtherUppercase(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherUppercase) != 0;
+    }
+ 
+    boolean isOtherAlphabetic(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherAlphabetic) != 0;
+    }
+
+    boolean isIdeographic(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskIdeographic) != 0;
+    }
+
    boolean isJavaIdentifierStart(int ch) {
        int props = getProperties(ch);
        return ((props & $$maskIdentifierInfo) >= $$lowJavaStart);
--- a/jdk/make/tools/GenerateCharacter/CharacterData02.java.template
+++ b/jdk/make/tools/GenerateCharacter/CharacterData02.java.template
@ -66,11 +66,37 @@ class CharacterData02 extends CharacterData {
     */

    int getProperties(int ch) {
-		char offset = (char)ch;
+	char offset = (char)ch;
        int props = $$Lookup(offset);
        return props;
    }

+    int getPropertiesEx(int ch) {
+        char offset = (char)ch;
+        int props = $$LookupEx(offset);
+        return props;
+    }
+
+    boolean isOtherLowercase(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherLowercase) != 0;
+    }
+
+    boolean isOtherUppercase(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherUppercase) != 0;
+    }
+
+    boolean isOtherAlphabetic(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherAlphabetic) != 0;
+    }
+
+    boolean isIdeographic(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskIdeographic) != 0;
+    }
+
    int getType(int ch) {
        int props = getProperties(ch);
        return (props & $$maskType);
--- a/jdk/make/tools/GenerateCharacter/CharacterData0E.java.template
+++ b/jdk/make/tools/GenerateCharacter/CharacterData0E.java.template
@ -66,11 +66,37 @@ class CharacterData0E extends CharacterData {
     */

    int getProperties(int ch) {
-		char offset = (char)ch;
+        char offset = (char)ch;
        int props = $$Lookup(offset);
        return props;
    }

+    int getPropertiesEx(int ch) {
+        char offset = (char)ch;
+        int props = $$LookupEx(offset);
+        return props;
+    }
+
+    boolean isOtherLowercase(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherLowercase) != 0;
+    }
+
+    boolean isOtherUppercase(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherUppercase) != 0;
+    }
+
+    boolean isOtherAlphabetic(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherAlphabetic) != 0;
+    }
+
+    boolean isIdeographic(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskIdeographic) != 0;
+    }
+
    int getType(int ch) {
        int props = getProperties(ch);
        return (props & $$maskType);
--- a/jdk/make/tools/GenerateCharacter/CharacterDataLatin1.java.template
+++ b/jdk/make/tools/GenerateCharacter/CharacterDataLatin1.java.template
@ -67,11 +67,37 @@ class CharacterDataLatin1 extends CharacterData {
     */

    int getProperties(int ch) {
-		char offset = (char)ch;
+        char offset = (char)ch;
        int props = $$Lookup(offset);
        return props;
    }

+    int getPropertiesEx(int ch) {
+        char offset = (char)ch;
+        int props = $$LookupEx(offset);
+        return props;
+    }
+
+    boolean isOtherLowercase(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherLowercase) != 0;
+    }
+
+    boolean isOtherUppercase(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherUppercase) != 0;
+    }
+
+    boolean isOtherAlphabetic(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskOtherAlphabetic) != 0;
+    }
+
+    boolean isIdeographic(int ch) {
+        int props = getPropertiesEx(ch);
+        return (props & $$maskIdeographic) != 0;
+    }
+
    int getType(int ch) {
        int props = getProperties(ch);
        return (props & $$maskType);
--- a/jdk/make/tools/UnicodeData/PropList.txt
+++ b/jdk/make/tools/UnicodeData/PropList.txt
--- a/jdk/make/tools/src/build/tools/generatecharacter/GenerateCharacter.java
+++ b/jdk/make/tools/src/build/tools/generatecharacter/GenerateCharacter.java
@ -1,4 +1,3 @@
-
 /*
 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@ -34,6 +33,7 @@ import java.io.PrintWriter;
 import java.io.BufferedWriter;
 import java.io.FileWriter;
 import java.io.File;
+import java.util.List;

 import build.tools.generatecharacter.CharacterName;

@ -68,18 +68,17 @@ public class GenerateCharacter {

    final static boolean DEBUG = false;

-    final static int MAX_UNICODE_VALUE = 0xFFFF;
    final static String commandMarker = "$$";
    static String ROOT                        = "";
    static String DefaultUnicodeSpecFileName  = ROOT + "UnicodeData.txt";
    static String DefaultSpecialCasingFileName = ROOT + "SpecialCasing.txt";
+    static String DefaultPropListFileName     = ROOT + "PropList.txt";
    static String DefaultJavaTemplateFileName = ROOT + "Character.java.template";
    static String DefaultJavaOutputFileName   = ROOT + "Character.java";
    static String DefaultCTemplateFileName    = ROOT + "Character.c.template";
    static String DefaultCOutputFileName      = ROOT + "Character.c";

-    static String CharacterDataClassName      = "CharacterData";
-        static int plane = 0;
+    static int plane = 0;

    /* The overall idea is that, in the generated Character class source code,
    most character property data is stored in a special multi-level table whose
@ -105,7 +104,11 @@ public class GenerateCharacter {
    entries are short rather than byte).
    */

-    /* The character properties are currently encoded into 32 bits in the following manner:
+    /* The character properties are currently encoded into A (32 bits)and B (16 bits)
+       two parts.
+
+    A: the low 32 bits are defined  in the following manner:
+
    1 bit Mirrored property.
    4 bits      Bidirectional category (see below) (unused if -nobidi switch specified)
    9 bits      A signed offset used for converting case .
@ -148,6 +151,14 @@ public class GenerateCharacter {
           will produce the desired numeric value.
    5 bits  The digit offset (see description of previous field)
    5 bits      Character type (see below)
+
+    B: the high 16 bits are defined as:
+    1 bit Other_Lowercase property
+    1 bit Other_Uppercase property
+    1 bit Other_Alphabetic property
+    1 bit Other_Math property
+    1 bit Ideographic property
+    1 bit Noncharacter codepoint property
    */


@ -173,9 +184,22 @@ public class GenerateCharacter {
                                        // case offset are 9 bits
                                        maskCase                =   0x01FF,
        shiftBidi           = 27,       maskBidi              = 0x78000000,
-        shiftMirrored       = 31,       maskMirrored          = 0x80000000,
+        shiftMirrored       = 31,       //maskMirrored          = 0x80000000,
        shiftPlane          = 16,       maskPlane = 0xFF0000;

+    // maskMirrored needs to be long, if up 16-bit
+    private static final long maskMirrored          = 0x80000000L;
+
+    // bit masks identify the 16-bit priperty field described above, in B
+    // table
+    private static final long
+        maskOtherLowercase  = 0x100000000L,
+        maskOtherUppercase  = 0x200000000L,
+        maskOtherAlphabetic = 0x400000000L,
+        maskOtherMath       = 0x800000000L,
+        maskIdeographic     = 0x1000000000L,
+        maskNoncharacterCP  = 0x2000000000L;
+
    // Can compare masked values with these to determine
    // numeric or lexical types.
    public static int
@ -258,7 +282,7 @@ public class GenerateCharacter {
    * The specification file is assumed to contain its data in sorted order by
    * character code; as a result, the array passed as an argument to this method
    * has its components in the same sorted order, with one entry for each defined
-        * Unicode character or character range.  (A range is indicated by two consecutive
+    * Unicode character or character range.  (A range is indicated by two consecutive
    * entries, such that the name of the first entry begins with "<" and ends with
    * "First>" and the second entry begins with "<" and ends with "Last>".)  This is
    * therefore a sparse representation of the character property data.
@ -282,7 +306,8 @@ public class GenerateCharacter {
    * @see GenerateCharacter#buildOne
    */

-    static long[] buildMap(UnicodeSpec[] data, SpecialCaseMap[] specialMaps) {
+    static long[] buildMap(UnicodeSpec[] data, SpecialCaseMap[] specialMaps, PropList propList)
+    {
        long[] result;
        if (bLatin1 == true) {
            result = new long[256];
@ -290,13 +315,13 @@ public class GenerateCharacter {
            result = new long[1<<16];
        }
        int k=0;
-                int codePoint = plane<<16;
+        int codePoint = plane<<16;
        UnicodeSpec nonCharSpec = new UnicodeSpec();
        for (int j = 0; j < data.length && k < result.length; j++) {
            if (data[j].codePoint == codePoint) {
                result[k] = buildOne(codePoint, data[j], specialMaps);
                ++k;
-                                ++codePoint;
+                ++codePoint;
            }
            else if(data[j].codePoint > codePoint) {
                if (data[j].name.endsWith("Last>")) {
@ -304,7 +329,7 @@ public class GenerateCharacter {
                    while (codePoint < data[j].codePoint && k < result.length) {
                        result[k] = buildOne(codePoint, data[j], specialMaps);
                        ++k;
-                                                ++codePoint;
+                        ++codePoint;
                    }
                }
                else {
@ -312,15 +337,14 @@ public class GenerateCharacter {
                    while (codePoint < data[j].codePoint && k < result.length) {
                        result[k] = buildOne(codePoint, nonCharSpec, specialMaps);
                        ++k;
-                                                ++codePoint;
+                        ++codePoint;
                    }
                }
                k = data[j].codePoint & 0xFFFF;
-                                codePoint = data[j].codePoint;
+                codePoint = data[j].codePoint;
                result[k] = buildOne(codePoint, data[j], specialMaps);
                ++k;
-                                ++codePoint;
-
+                ++codePoint;
            }
            else {
                System.out.println("An error has occured during spec mapping.");
@ -333,8 +357,17 @@ public class GenerateCharacter {
        while (k < result.length) {
            result[k] = buildOne(codePoint, nonCharSpec, specialMaps);
            ++k;
-                        ++codePoint;
+            ++codePoint;
        }
+        // now add all extra supported properties from PropList, to the
+        // upper 16-bit
+        addExProp(result, propList, "Other_Lowercase", maskOtherLowercase);
+        addExProp(result, propList, "Other_Uppercase", maskOtherUppercase);
+        addExProp(result, propList, "Other_Alphabetic", maskOtherAlphabetic);
+        addExProp(result, propList, "Ideographic", maskIdeographic);
+        //addExProp(result, propList, "Other_Math", maskOtherMath);
+        //addExProp(result, propList, "Noncharacter_CodePoint", maskNoncharacterCP);
+
        return result;
    }

@ -381,15 +414,15 @@ public class GenerateCharacter {
        // record the general category
        resultA |= us.generalCategory;

-    // record the numeric properties
-    NUMERIC: {
+        // record the numeric properties
+        NUMERIC: {
        STRANGE: {
            int val = 0;
-        // c is A-Z
+            // c is A-Z
            if ((c >= 0x0041) && (c <= 0x005A)) {
                val = c - 0x0041;
                resultA |= valueJavaSupradecimal;
-        // c is a-z
+            // c is a-z
            } else if ((c >= 0x0061) && (c <= 0x007A)) {
                val = c - 0x0061;
                resultA |= valueJavaSupradecimal;
@ -428,7 +461,7 @@ public class GenerateCharacter {
        resultA |= valueStrangeNumeric;
        } // end NUMERIC

-    // record case mapping
+        // record case mapping
        int offset = 0;
        // might have a 1:M mapping
        int specialMap = SpecialCaseMap.find(c, specialCaseMaps);
@ -458,12 +491,12 @@ public class GenerateCharacter {
            }
        }
        if ((us.hasTitleMap() && us.titleMap != us.upperMap) ||
-                (bHasUpper && us.hasLowerMap())) {
+            (bHasUpper && us.hasLowerMap())) {
            resultA |= maskTitleCase;
        }
        if (bHasUpper && !us.hasLowerMap() && !us.hasTitleMap() && verbose) {
-          System.out.println("Warning: Character " + hex4(c) + " has upper but " +
-                             "no title case; Java won't know this");
+            System.out.println("Warning: Character " + hex4(c) + " has upper but " +
+                               "no title case; Java won't know this");
        }
        if (offset < minOffsetSeen) minOffsetSeen = offset;
        if (offset > maxOffsetSeen) maxOffsetSeen = offset;
@ -475,8 +508,7 @@ public class GenerateCharacter {
        }
        resultA |= ((offset & maskCase) << shiftCaseOffset);

-
-    // record lexical info about this character
+        // record lexical info about this character
        if (us.generalCategory == UnicodeSpec.LOWERCASE_LETTER
                || us.generalCategory == UnicodeSpec.UPPERCASE_LETTER
                || us.generalCategory == UnicodeSpec.TITLECASE_LETTER
@ -539,6 +571,16 @@ public class GenerateCharacter {
        return resultA;
    }

+    static void addExProp(long[] map, PropList propList, String prop, long mask) {
+        List<Integer> cps = propList.codepoints(prop);
+        if (cps != null) {
+            for (Integer cp : cps) {
+                if (cp < map.length)
+                    map[cp] |= mask;
+            }
+        }
+    }
+
    /**
    * This is the heart of the table compression strategy.  The inputs are a map
    * and a number of bits (size).  The map is simply an array of long integer values;
@ -645,8 +687,8 @@ OUTER:  for (int i = 0; i < n; i += m) {
    */

    static void generateCharacterClass(String theTemplateFileName,
-                     String theOutputFileName)
-            throws FileNotFoundException, IOException {
+                                       String theOutputFileName)
+        throws FileNotFoundException, IOException {
        BufferedReader in = new BufferedReader(new FileReader(theTemplateFileName));
        PrintWriter out = new PrintWriter(new BufferedWriter(new FileWriter(theOutputFileName)));
        out.println(commentStart +
@ -719,6 +761,9 @@ OUTER:  for (int i = 0; i < n; i += m) {
        if (x.length() >= 9 && x.substring(0, 7).equals("Lookup(") &&
                x.substring(x.length()-1).equals(")") )
            return genAccess("A", x.substring(7, x.length()-1), (identifiers ? 2 : 32));
+        if (x.length() >= 11 && x.substring(0, 9).equals("LookupEx(") &&
+                x.substring(x.length()-1).equals(")") )
+            return genAccess("B", x.substring(9, x.length()-1), 16);
        if (x.equals("shiftType")) return Long.toString(shiftType);
        if (x.equals("shiftIdentifierInfo")) return Long.toString(shiftIdentifierInfo);
        if (x.equals("maskIdentifierInfo")) return "0x" + hex8(maskIdentifierInfo);
@ -731,6 +776,10 @@ OUTER:  for (int i = 0; i < n; i += m) {
        if (x.equals("maskLowerCase")) return "0x" + hex8(maskLowerCase);
        if (x.equals("maskUpperCase")) return "0x" + hex8(maskUpperCase);
        if (x.equals("maskTitleCase")) return "0x" + hex8(maskTitleCase);
+        if (x.equals("maskOtherLowercase")) return "0x" + hex4(maskOtherLowercase >> 32);
+        if (x.equals("maskOtherUppercase")) return "0x" + hex4(maskOtherUppercase >> 32);
+        if (x.equals("maskOtherAlphabetic")) return "0x" + hex4(maskOtherAlphabetic >> 32);
+        if (x.equals("maskIdeographic")) return "0x" + hex4(maskIdeographic >> 32);
        if (x.equals("valueIgnorable")) return "0x" + hex8(valueIgnorable);
        if (x.equals("valueJavaUnicodeStart")) return "0x" + hex8(valueJavaUnicodeStart);
        if (x.equals("valueJavaOnlyStart")) return "0x" + hex8(valueJavaOnlyStart);
@ -899,7 +948,7 @@ OUTER:  for (int i = 0; i < n; i += m) {

        // If we ever need more than 32 bits to represent the character properties,
        // then a table "B" may be needed as well.
-        //  genTable(result, "B", tables[n - 1], 32, 16, sizes[n - 1], false, 0, true, true, false);
+        genTable(result, "B", tables[n - 1], 32, 16, sizes[n - 1], false, 0, true, true, false);

        totalBytes += ((((tables[n - 1].length * (identifiers ? 2 : 32)) + 31) >> 5) << 2);
        result.append(commentStart);
@ -1080,9 +1129,9 @@ OUTER:  for (int i = 0; i < n; i += m) {
    */

    static void genTable(StringBuffer result, String name,
-             long[] table, int extract, int bits, int size,
-             boolean preshifted, int shift, boolean hexFormat,
-             boolean properties, boolean hexComment) {
+                         long[] table, int extract, int bits, int size,
+                         boolean preshifted, int shift, boolean hexFormat,
+                         boolean properties, boolean hexComment) {

        String atype = bits == 1 ? (Csyntax ? "unsigned long" : "int") :
            bits == 2 ? (Csyntax ? "unsigned long" : "int") :
@ -1137,7 +1186,12 @@ OUTER:  for (int i = 0; i < n; i += m) {
            char ch = '\u0000';
            int charsPerEntry = -entriesPerChar;
            for (int j=0; j<table.length; ++j) {
-                long entry = table[j] >> extract;
+                //long entry = table[j] >> extract;
+                long entry;
+                if ("A".equals(name))
+                    entry = (table[j] & 0xffffffffL) >> extract;
+                else
+                    entry = (table[j] >> extract);
                if (shiftEntries) entry <<= shift;
                if (entry >= (1L << bits)) {
                    FAIL("Entry too big");
@ -1549,6 +1603,7 @@ OUTER:  for (int i = 0; i < n; i += m) {
    static String OutputFileName = null;
    static String UnicodeSpecFileName = null; // liu
    static String SpecialCasingFileName = null;
+    static String PropListFileName = null;
    static boolean useCharForByte = false;
    static int[] sizes;
    static int bins = 0; // liu; if > 0, then perform search
@ -1668,20 +1723,28 @@ OUTER:  for (int i = 0; i < n; i += m) {
                    SpecialCasingFileName = args[++j];
                }
            }
-                        else if (args[j].equals("-plane")) {
-                                if (j == args.length -1) {
-                                        FAIL("Plane number missing after -plane");
-                                }
-                                else {
-                                        plane = Integer.parseInt(args[++j]);
-                                }
-                                if (plane > 0) {
-                                        bLatin1 = false;
-                                }
-                        }
-                        else if ("-usecharforbyte".equals(args[j])) {
-                                useCharForByte = true;
-                        }
+            else if (args[j].equals("-proplist")) {
+                if (j == args.length -1) {
+                    FAIL("File name missing after -proplist");
+                }
+                else {
+                    PropListFileName = args[++j];
+                }
+            }
+            else if (args[j].equals("-plane")) {
+                if (j == args.length -1) {
+                    FAIL("Plane number missing after -plane");
+                }
+                else {
+                    plane = Integer.parseInt(args[++j]);
+                }
+                if (plane > 0) {
+                    bLatin1 = false;
+                }
+            }
+            else if ("-usecharforbyte".equals(args[j])) {
+                useCharForByte = true;
+            }
            else if (args[j].equals("-latin1")) {
                bLatin1 = true;
                plane = 0;
@ -1728,6 +1791,10 @@ OUTER:  for (int i = 0; i < n; i += m) {
            SpecialCasingFileName = DefaultSpecialCasingFileName;
            desc.append(" [-specialcasing " + SpecialCasingFileName + ']');
        }
+        if (PropListFileName == null) {
+            PropListFileName = DefaultPropListFileName;
+            desc.append(" [-proplist " + PropListFileName + ']');
+        }
        if (TemplateFileName == null) {
            TemplateFileName = (Csyntax ? DefaultCTemplateFileName
                  : DefaultJavaTemplateFileName);
@ -1877,12 +1944,13 @@ OUTER:  for (int i = 0; i < n; i += m) {
        try {

            UnicodeSpec[] data = UnicodeSpec.readSpecFile(new File(UnicodeSpecFileName), plane);
-
            specialCaseMaps = SpecialCaseMap.readSpecFile(new File(SpecialCasingFileName), plane);
+            PropList propList = PropList.readSpecFile(new File(PropListFileName), plane);
+
            if (verbose) {
                System.out.println(data.length + " items read from Unicode spec file " + UnicodeSpecFileName); // liu
            }
-            long[] map = buildMap(data, specialCaseMaps);
+            long[] map = buildMap(data, specialCaseMaps, propList);
            if (verbose) {
                System.err.println("Completed building of initial map");
            }
--- a/jdk/make/tools/src/build/tools/generatecharacter/PropList.java
+++ b/jdk/make/tools/src/build/tools/generatecharacter/PropList.java
@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package build.tools.generatecharacter;
+
+import java.util.regex.*;
+import java.util.*;
+import java.io.*;
+
+/**
+ * A PropList object contains the lists of code points that have
+ * the same Unicode property defined in PropList.txt
+ *
+ * @author Xueming Shen
+ */
+public class PropList {
+
+    public static PropList readSpecFile(File file, int plane)
+        throws IOException
+    {
+        return new PropList(file, plane);
+    }
+
+    public List<Integer> codepoints(String name) {
+        return propMap.get(name);
+    }
+
+    public Set<String> names() {
+        return propMap.keySet();
+    }
+
+    private Map<String, ArrayList<Integer>> propMap =
+        new LinkedHashMap<String, ArrayList<Integer>>();
+
+    private PropList(File file, int plane) throws IOException {
+
+        int i, j;
+        BufferedReader sbfr = new BufferedReader(new FileReader(file));
+        Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");
+        String line = null;
+        int lineNo = 0;
+        while ((line = sbfr.readLine()) != null) {
+            lineNo++;
+            if (line.length() <= 1 || line.charAt(0) == '#') {
+                continue;
+            }
+            m.reset(line);
+            if (m.matches()) {
+                int start = Integer.parseInt(m.group(1), 16);
+                if ((start >> 16) != plane)
+                    continue;
+                int end = (m.group(2)==null)?start
+                          :Integer.parseInt(m.group(2), 16);
+                String name = m.group(3);
+
+                start &= 0xffff;
+                end &= 0xffff;
+
+                ArrayList<Integer> list = propMap.get(name);
+                if (list == null) {
+                    list = new ArrayList<Integer>();
+                    propMap.put(name, list);
+                }
+                while (start <= end)
+                    list.add(start++);
+            } else {
+                System.out.printf("Warning: Unrecognized line %d <%s>%n", lineNo, line);
+            }
+        }
+        sbfr.close();
+
+        //for (String name: propMap.keySet()) {
+        //    System.out.printf("%s    %d%n", name, propMap.get(name).size());
+        //}
+    }
+
+    public static void main(String[] args) throws IOException {
+        readSpecFile(new File(args[0]), Integer.decode(args[1]));
+    }
+}
--- a/jdk/src/share/classes/java/lang/Character.java
+++ b/jdk/src/share/classes/java/lang/Character.java
@ -59,14 +59,14 @@ import java.util.Locale;
 * <p>The {@code char} data type (and therefore the value that a
 * {@code Character} object encapsulates) are based on the
 * original Unicode specification, which defined characters as
- * fixed-width 16-bit entities. The Unicode standard has since been
+ * fixed-width 16-bit entities. The Unicode Standard has since been
 * changed to allow for characters whose representation requires more
 * than 16 bits.  The range of legal <em>code point</em>s is now
 * U+0000 to U+10FFFF, known as <em>Unicode scalar value</em>.
 * (Refer to the <a
 * href="http://www.unicode.org/reports/tr27/#notation"><i>
 * definition</i></a> of the U+<i>n</i> notation in the Unicode
- * standard.)
+ * Standard.)
 *
 * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
 * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
@ -5200,7 +5200,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
     * <p>
     * A character is lowercase if its general category type, provided
     * by {@code Character.getType(ch)}, is
-     * {@code LOWERCASE_LETTER}.
+     * {@code LOWERCASE_LETTER}, or it has contributory property
+     * Other_Lowercase as defined by the Unicode Standard.
     * <p>
     * The following are examples of lowercase characters:
     * <p><blockquote><pre>
@ -5235,7 +5236,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
     * <p>
     * A character is lowercase if its general category type, provided
     * by {@link Character#getType getType(codePoint)}, is
-     * {@code LOWERCASE_LETTER}.
+     * {@code LOWERCASE_LETTER}, or it has contributory property
+     * Other_Lowercase as defined by the Unicode Standard.
     * <p>
     * The following are examples of lowercase characters:
     * <p><blockquote><pre>
@ -5257,7 +5259,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
     * @since   1.5
     */
    public static boolean isLowerCase(int codePoint) {
-        return getType(codePoint) == Character.LOWERCASE_LETTER;
+        return getType(codePoint) == Character.LOWERCASE_LETTER ||
+               CharacterData.of(codePoint).isOtherLowercase(codePoint);
    }

    /**
@ -5265,6 +5268,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
     * <p>
     * A character is uppercase if its general category type, provided by
     * {@code Character.getType(ch)}, is {@code UPPERCASE_LETTER}.
+     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
     * <p>
     * The following are examples of uppercase characters:
     * <p><blockquote><pre>
@ -5298,7 +5302,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
     * Determines if the specified character (Unicode code point) is an uppercase character.
     * <p>
     * A character is uppercase if its general category type, provided by
-     * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER}.
+     * {@link Character#getType(int) getType(codePoint)}, is {@code UPPERCASE_LETTER},
+     * or it has contributory property Other_Uppercase as defined by the Unicode Standard.
     * <p>
     * The following are examples of uppercase characters:
     * <p><blockquote><pre>
@ -5320,7 +5325,8 @@ class Character implements java.io.Serializable, Comparable<Character> {
     * @since   1.5
     */
    public static boolean isUpperCase(int codePoint) {
-        return getType(codePoint) == Character.UPPERCASE_LETTER;
+        return getType(codePoint) == Character.UPPERCASE_LETTER ||
+               CharacterData.of(codePoint).isOtherUppercase(codePoint);
    }

    /**
@ -5724,6 +5730,52 @@ class Character implements java.io.Serializable, Comparable<Character> {
        return isJavaIdentifierPart(ch);
    }

+    /**
+     * Determines if the specified character (Unicode code point) is an alphabet.
+     * <p>
+     * A character is considered to be alphabetic if its general category type,
+     * provided by {@link Character#getType(int) getType(codePoint)}, is any of
+     * the following:
+     * <ul>
+     * <li> <code>UPPERCASE_LETTER</code>
+     * <li> <code>LOWERCASE_LETTER</code>
+     * <li> <code>TITLECASE_LETTER</code>
+     * <li> <code>MODIFIER_LETTER</code>
+     * <li> <code>OTHER_LETTER</code>
+     * <li> <code>LETTER_NUMBER</code>
+     * </ul>
+     * or it has contributory property Other_Alphabetic as defined by the
+     * Unicode Standard.
+     *
+     * @param   codePoint the character (Unicode code point) to be tested.
+     * @return  <code>true</code> if the character is a Unicode alphabet
+     *          character, <code>false</code> otherwise.
+     * @since   1.7
+     */
+    public static boolean isAlphabetic(int codePoint) {
+        return (((((1 << Character.UPPERCASE_LETTER) |
+            (1 << Character.LOWERCASE_LETTER) |
+            (1 << Character.TITLECASE_LETTER) |
+            (1 << Character.MODIFIER_LETTER) |
+            (1 << Character.OTHER_LETTER) |
+            (1 << Character.LETTER_NUMBER)) >> getType(codePoint)) & 1) != 0) ||
+            CharacterData.of(codePoint).isOtherAlphabetic(codePoint);
+    }
+
+    /**
+     * Determines if the specified character (Unicode code point) is a CJKV
+     * (Chinese, Japanese, Korean and Vietnamese) ideograph, as defined by
+     * the Unicode Standard.
+     *
+     * @param   codePoint the character (Unicode code point) to be tested.
+     * @return  <code>true</code> if the character is a Unicode ideograph
+     *          character, <code>false</code> otherwise.
+     * @since   1.7
+     */
+    public static boolean isIdeographic(int codePoint) {
+        return CharacterData.of(codePoint).isIdeographic(codePoint);
+    }
+
    /**
     * Determines if the specified character is
     * permissible as the first character in a Java identifier.
@ -6430,7 +6482,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
    /**
     * Determines if the specified character is a Unicode space character.
     * A character is considered to be a space character if and only if
-     * it is specified to be a space character by the Unicode standard. This
+     * it is specified to be a space character by the Unicode Standard. This
     * method returns true if the character's general category type is any of
     * the following:
     * <ul>
@ -6458,7 +6510,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
     * Determines if the specified character (Unicode code point) is a
     * Unicode space character.  A character is considered to be a
     * space character if and only if it is specified to be a space
-     * character by the Unicode standard. This method returns true if
+     * character by the Unicode Standard. This method returns true if
     * the character's general category type is any of the following:
     *
     * <ul>
@ -6908,7 +6960,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
     * @since 1.4
     */
    static char[] toUpperCaseCharArray(int codePoint) {
-        // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
+        // As of Unicode 6.0, 1:M uppercasings only happen in the BMP.
        assert isBmpCodePoint(codePoint);
        return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
    }
@ -6941,7 +6993,7 @@ class Character implements java.io.Serializable, Comparable<Character> {
     * Note: if the specified character is not assigned a name by
     * the <i>UnicodeData</i> file (part of the Unicode Character
     * Database maintained by the Unicode Consortium), the returned
-     * name is the same as the result of expression
+     * name is the same as the result of expression.
     *
     * <blockquote>{@code
     *     Character.UnicodeBlock.of(codePoint).toString().replace('_', ' ')
--- a/jdk/src/share/classes/java/lang/CharacterData.java
+++ b/jdk/src/share/classes/java/lang/CharacterData.java
@ -46,10 +46,27 @@ abstract class CharacterData {
    int toUpperCaseEx(int ch) {
        return toUpperCase(ch);
    }
+
    char[] toUpperCaseCharArray(int ch) {
        return null;
    }

+    boolean isOtherLowercase(int ch) {
+        return false;
+    }
+
+    boolean isOtherUppercase(int ch) {
+        return false;
+    }
+
+    boolean isOtherAlphabetic(int ch) {
+        return false;
+    }
+
+    boolean isIdeographic(int ch) {
+        return false;
+    }
+
    // Character <= 0xff (basic latin) is handled by internal fast-path
    // to avoid initializing large tables.
    // Note: performance of this "fast-path" code may be sub-optimal
--- a/jdk/test/java/lang/Character/CheckProp.java
+++ b/jdk/test/java/lang/Character/CheckProp.java
@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+
+/**
+ * @test
+ * @bug 7037261
+ * @summary  Check j.l.Character.isLowerCase/isUppercase/isAlphabetic/isIdeographic
+ */
+
+import java.util.regex.*;
+import java.util.*;
+import java.io.*;
+import static java.lang.Character.*;
+
+public class CheckProp {
+
+    public static void main(String[] args) throws IOException {
+        File fPropList = new File(System.getProperty("test.src", "."), "PropList.txt");
+        int i, j;
+        BufferedReader sbfr = new BufferedReader(new FileReader(fPropList));
+        Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");
+        Map<String, ArrayList<Integer>> propMap =  new LinkedHashMap<>();
+
+        String line = null;
+        int lineNo = 0;
+        while ((line = sbfr.readLine()) != null) {
+            lineNo++;
+            if (line.length() <= 1 || line.charAt(0) == '#') {
+                continue;
+            }
+            m.reset(line);
+            if (m.matches()) {
+                int start = Integer.parseInt(m.group(1), 16);
+                int end = (m.group(2)==null)?start
+                          :Integer.parseInt(m.group(2), 16);
+                String name = m.group(3);
+
+                ArrayList<Integer> list = propMap.get(name);
+                if (list == null) {
+                    list = new ArrayList<Integer>();
+                    propMap.put(name, list);
+                }
+                while (start <= end)
+                    list.add(start++);
+            } else {
+                System.out.printf("Warning: Unrecognized line %d <%s>%n", lineNo, line);
+            }
+        }
+        sbfr.close();
+        //for (String name: propMap.keySet()) {
+        //    System.out.printf("%s    %d%n", name, propMap.get(name).size());
+        //}
+
+        Integer[] otherLowercase = propMap.get("Other_Lowercase").toArray(new Integer[0]);
+        Integer[] otherUppercase = propMap.get("Other_Uppercase").toArray(new Integer[0]);
+        Integer[] otherAlphabetic = propMap.get("Other_Alphabetic").toArray(new Integer[0]);
+        Integer[] ideographic = propMap.get("Ideographic").toArray(new Integer[0]);
+
+        int fails = 0;
+        for (int cp = MIN_CODE_POINT; cp < MAX_CODE_POINT; cp++) {
+            int type = getType(cp);
+            if (isLowerCase(cp) !=
+                (type == LOWERCASE_LETTER ||
+                 Arrays.binarySearch(otherLowercase, cp) >= 0))
+            {
+                fails++;
+                System.err.printf("Wrong isLowerCase(U+%04x)\n", cp);
+            }
+            if (isUpperCase(cp) !=
+                (type == UPPERCASE_LETTER ||
+                 Arrays.binarySearch(otherUppercase, cp) >= 0))
+            {
+                fails++;
+                System.err.printf("Wrong isUpperCase(U+%04x)\n", cp);
+            }
+            if (isAlphabetic(cp) !=
+                (type == UPPERCASE_LETTER || type == LOWERCASE_LETTER ||
+                 type == TITLECASE_LETTER || type == MODIFIER_LETTER  ||
+                 type == OTHER_LETTER     || type == OTHER_LETTER ||
+                 type == LETTER_NUMBER ||
+                 Arrays.binarySearch(otherAlphabetic, cp) >=0))
+            {
+                fails++;
+                System.err.printf("Wrong isAlphabetic(U+%04x)\n", cp);
+            }
+            if (isIdeographic(cp) !=
+                (Arrays.binarySearch(ideographic, cp) >= 0))
+            {
+                fails++;
+                System.err.printf("Wrong isIdeographic(U+%04x)\n", cp);
+            }
+        }
+        if (fails != 0)
+            throw new RuntimeException("CheckProp failed=" + fails);
+    }
+}
--- a/jdk/test/java/lang/Character/PropList.txt
+++ b/jdk/test/java/lang/Character/PropList.txt