6934265: Add public method Character.isBmpCodePoint

Move isBmpCodePoint from sun.nio.cs.Surrogate to Character

Reviewed-by: sherman
This commit is contained in:
Ulf Zibis 2010-06-30 16:11:32 -07:00 committed by Martin Buchholz
parent 30d5c660bc
commit a0f3e72c24
15 changed files with 124 additions and 174 deletions

View File

@ -721,19 +721,18 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
* {@code codePoint} isn't a valid Unicode code point * {@code codePoint} isn't a valid Unicode code point
*/ */
public AbstractStringBuilder appendCodePoint(int codePoint) { public AbstractStringBuilder appendCodePoint(int codePoint) {
if (!Character.isValidCodePoint(codePoint)) { final int count = this.count;
throw new IllegalArgumentException();
} if (Character.isBmpCodePoint(codePoint)) {
int n = 1; ensureCapacityInternal(count + 1);
if (codePoint >= Character.MIN_SUPPLEMENTARY_CODE_POINT) { value[count] = (char) codePoint;
n++; this.count = count + 1;
} } else if (Character.isValidCodePoint(codePoint)) {
ensureCapacityInternal(count + n); ensureCapacityInternal(count + 2);
if (n == 1) {
value[count++] = (char) codePoint;
} else {
Character.toSurrogates(codePoint, value, count); Character.toSurrogates(codePoint, value, count);
count += n; this.count = count + 2;
} else {
throw new IllegalArgumentException();
} }
return this; return this;
} }

View File

@ -67,17 +67,16 @@ import java.util.Locale;
* definition</i></a> of the U+<i>n</i> notation in the Unicode * definition</i></a> of the U+<i>n</i> notation in the Unicode
* standard.) * standard.)
* *
* <p>The set of characters from U+0000 to U+FFFF is sometimes * <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
* referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a * sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
* name="supplementary">Characters</a> whose code points are greater * <a name="supplementary">Characters</a> whose code points are greater
* than U+FFFF are called <em>supplementary character</em>s. The Java * than U+FFFF are called <em>supplementary character</em>s. The Java
* 2 platform uses the UTF-16 representation in <code>char</code> * platform uses the UTF-16 representation in <code>char</code> arrays and
* arrays and in the <code>String</code> and <code>StringBuffer</code> * in the <code>String</code> and <code>StringBuffer</code> classes. In
* classes. In this representation, supplementary characters are * this representation, supplementary characters are represented as a pair
* represented as a pair of <code>char</code> values, the first from * of <code>char</code> values, the first from the <em>high-surrogates</em>
* the <em>high-surrogates</em> range, (&#92;uD800-&#92;uDBFF), the * range, (&#92;uD800-&#92;uDBFF), the second from the
* second from the <em>low-surrogates</em> range * <em>low-surrogates</em> range (&#92;uDC00-&#92;uDFFF).
* (&#92;uDC00-&#92;uDFFF).
* *
* <p>A <code>char</code> value, therefore, represents Basic * <p>A <code>char</code> value, therefore, represents Basic
* Multilingual Plane (BMP) code points, including the surrogate * Multilingual Plane (BMP) code points, including the surrogate
@ -3922,6 +3921,25 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
return plane < ((MAX_CODE_POINT + 1) >>> 16); return plane < ((MAX_CODE_POINT + 1) >>> 16);
} }
/**
* Determines whether the specified character (Unicode code point)
* is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
* Such code points can be represented using a single {@code char}.
*
* @param codePoint the character (Unicode code point) to be tested
* @return {@code true} if the specified code point is between
* {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
* {@code false} otherwise.
* @since 1.7
*/
public static boolean isBmpCodePoint(int codePoint) {
return codePoint >>> 16 == 0;
// Optimized form of:
// codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
// We consistently use logical shift (>>>) to facilitate
// additional runtime optimizations.
}
/** /**
* Determines whether the specified character (Unicode code point) * Determines whether the specified character (Unicode code point)
* is in the <a href="#supplementary">supplementary character</a> range. * is in the <a href="#supplementary">supplementary character</a> range.
@ -4319,15 +4337,15 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
* @since 1.5 * @since 1.5
*/ */
public static int toChars(int codePoint, char[] dst, int dstIndex) { public static int toChars(int codePoint, char[] dst, int dstIndex) {
if (codePoint < 0 || codePoint > MAX_CODE_POINT) { if (isBmpCodePoint(codePoint)) {
throw new IllegalArgumentException();
}
if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
dst[dstIndex] = (char) codePoint; dst[dstIndex] = (char) codePoint;
return 1; return 1;
} else if (isValidCodePoint(codePoint)) {
toSurrogates(codePoint, dst, dstIndex);
return 2;
} else {
throw new IllegalArgumentException();
} }
toSurrogates(codePoint, dst, dstIndex);
return 2;
} }
/** /**
@ -4347,15 +4365,15 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
* @since 1.5 * @since 1.5
*/ */
public static char[] toChars(int codePoint) { public static char[] toChars(int codePoint) {
if (codePoint < 0 || codePoint > MAX_CODE_POINT) { if (isBmpCodePoint(codePoint)) {
return new char[] { (char) codePoint };
} else if (isValidCodePoint(codePoint)) {
char[] result = new char[2];
toSurrogates(codePoint, result, 0);
return result;
} else {
throw new IllegalArgumentException(); throw new IllegalArgumentException();
} }
if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
return new char[] { (char) codePoint };
}
char[] result = new char[2];
toSurrogates(codePoint, result, 0);
return result;
} }
static void toSurrogates(int codePoint, char[] dst, int index) { static void toSurrogates(int codePoint, char[] dst, int index) {
@ -6259,8 +6277,7 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
*/ */
static char[] toUpperCaseCharArray(int codePoint) { static char[] toUpperCaseCharArray(int codePoint) {
// As of Unicode 4.0, 1:M uppercasings only happen in the BMP. // As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
assert isValidCodePoint(codePoint) && assert isBmpCodePoint(codePoint);
!isSupplementaryCodePoint(codePoint);
return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint); return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
} }

View File

@ -99,6 +99,8 @@ import java.util.regex.PatternSyntaxException;
* *
* @author Lee Boynton * @author Lee Boynton
* @author Arthur van Hoff * @author Arthur van Hoff
* @author Martin Buchholz
* @author Ulf Zibis
* @see java.lang.Object#toString() * @see java.lang.Object#toString()
* @see java.lang.StringBuffer * @see java.lang.StringBuffer
* @see java.lang.StringBuilder * @see java.lang.StringBuilder
@ -273,32 +275,32 @@ public final class String
throw new StringIndexOutOfBoundsException(offset + count); throw new StringIndexOutOfBoundsException(offset + count);
} }
final int end = offset + count;
// Pass 1: Compute precise size of char[] // Pass 1: Compute precise size of char[]
int n = 0; int n = count;
for (int i = offset; i < offset + count; i++) { for (int i = offset; i < end; i++) {
int c = codePoints[i]; int c = codePoints[i];
if (c >= Character.MIN_CODE_POINT && if (Character.isBmpCodePoint(c))
c < Character.MIN_SUPPLEMENTARY_CODE_POINT) continue;
n += 1; else if (Character.isValidCodePoint(c))
else if (Character.isSupplementaryCodePoint(c)) n++;
n += 2;
else throw new IllegalArgumentException(Integer.toString(c)); else throw new IllegalArgumentException(Integer.toString(c));
} }
// Pass 2: Allocate and fill in char[] // Pass 2: Allocate and fill in char[]
char[] v = new char[n]; final char[] v = new char[n];
for (int i = offset, j = 0; i < offset + count; i++) {
for (int i = offset, j = 0; i < end; i++, j++) {
int c = codePoints[i]; int c = codePoints[i];
if (c < Character.MIN_SUPPLEMENTARY_CODE_POINT) { if (Character.isBmpCodePoint(c))
v[j++] = (char) c; v[j] = (char) c;
} else { else
Character.toSurrogates(c, v, j); Character.toSurrogates(c, v, j++);
j += 2;
}
} }
this.value = v; this.value = v;
this.count = v.length; this.count = n;
this.offset = 0; this.offset = 0;
} }

View File

@ -24,7 +24,6 @@
*/ */
package sun.io; package sun.io;
import sun.nio.cs.Surrogate;
import sun.nio.cs.ext.DoubleByte; import sun.nio.cs.ext.DoubleByte;
import static sun.nio.cs.CharsetMapping.*; import static sun.nio.cs.CharsetMapping.*;

View File

@ -24,7 +24,6 @@
*/ */
package sun.io; package sun.io;
import sun.nio.cs.Surrogate;
import sun.nio.cs.ext.DoubleByte; import sun.nio.cs.ext.DoubleByte;
import static sun.nio.cs.CharsetMapping.*; import static sun.nio.cs.CharsetMapping.*;

View File

@ -1,5 +1,5 @@
/* /*
* Copyright (c) 2000, 2001, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
* *
* This code is free software; you can redistribute it and/or modify it * This code is free software; you can redistribute it and/or modify it
@ -34,8 +34,9 @@ import java.nio.charset.UnmappableCharacterException;
* Utility class for dealing with surrogates. * Utility class for dealing with surrogates.
* *
* @author Mark Reinhold * @author Mark Reinhold
* @author Martin Buchholz
* @author Ulf Zibis
*/ */
public class Surrogate { public class Surrogate {
private Surrogate() { } private Surrogate() { }
@ -74,17 +75,10 @@ public class Surrogate {
return (MIN <= c) && (c <= MAX); return (MIN <= c) && (c <= MAX);
} }
/**
* Tells whether or not the given UCS-4 character is in the Basic
* Multilingual Plane, and can be represented using a single char.
*/
public static boolean isBMPCodePoint(int uc) {
return uc >> 16 == 0;
}
/** /**
* Tells whether or not the given UCS-4 character must be represented as a * Tells whether or not the given UCS-4 character must be represented as a
* surrogate pair in UTF-16. * surrogate pair in UTF-16.
* Use of {@link Character#isSupplementaryCodePoint} is generally preferred.
*/ */
public static boolean neededFor(int uc) { public static boolean neededFor(int uc) {
return Character.isSupplementaryCodePoint(uc); return Character.isSupplementaryCodePoint(uc);
@ -110,6 +104,7 @@ public class Surrogate {
/** /**
* Converts the given surrogate pair into a 32-bit UCS-4 character. * Converts the given surrogate pair into a 32-bit UCS-4 character.
* Use of {@link Character#toCodePoint} is generally preferred.
*/ */
public static int toUCS4(char c, char d) { public static int toUCS4(char c, char d) {
assert Character.isHighSurrogate(c) && Character.isLowSurrogate(d); assert Character.isHighSurrogate(c) && Character.isLowSurrogate(d);
@ -290,8 +285,9 @@ public class Surrogate {
* error() will return a descriptive result object * error() will return a descriptive result object
*/ */
public int generate(int uc, int len, CharBuffer dst) { public int generate(int uc, int len, CharBuffer dst) {
if (Surrogate.isBMPCodePoint(uc)) { if (Character.isBmpCodePoint(uc)) {
if (Surrogate.is(uc)) { char c = (char) uc;
if (Character.isSurrogate(c)) {
error = CoderResult.malformedForLength(len); error = CoderResult.malformedForLength(len);
return -1; return -1;
} }
@ -299,10 +295,10 @@ public class Surrogate {
error = CoderResult.OVERFLOW; error = CoderResult.OVERFLOW;
return -1; return -1;
} }
dst.put((char)uc); dst.put(c);
error = null; error = null;
return 1; return 1;
} else if (Character.isSupplementaryCodePoint(uc)) { } else if (Character.isValidCodePoint(uc)) {
if (dst.remaining() < 2) { if (dst.remaining() < 2) {
error = CoderResult.OVERFLOW; error = CoderResult.OVERFLOW;
return -1; return -1;
@ -334,8 +330,9 @@ public class Surrogate {
* error() will return a descriptive result object * error() will return a descriptive result object
*/ */
public int generate(int uc, int len, char[] da, int dp, int dl) { public int generate(int uc, int len, char[] da, int dp, int dl) {
if (Surrogate.isBMPCodePoint(uc)) { if (Character.isBmpCodePoint(uc)) {
if (Surrogate.is(uc)) { char c = (char) uc;
if (Character.isSurrogate(c)) {
error = CoderResult.malformedForLength(len); error = CoderResult.malformedForLength(len);
return -1; return -1;
} }
@ -343,10 +340,10 @@ public class Surrogate {
error = CoderResult.OVERFLOW; error = CoderResult.OVERFLOW;
return -1; return -1;
} }
da[dp] = (char)uc; da[dp] = c;
error = null; error = null;
return 1; return 1;
} else if (Character.isSupplementaryCodePoint(uc)) { } else if (Character.isValidCodePoint(uc)) {
if (dl - dp < 2) { if (dl - dp < 2) {
error = CoderResult.OVERFLOW; error = CoderResult.OVERFLOW;
return -1; return -1;

View File

@ -86,22 +86,21 @@ class UTF_32Coder {
src.position(mark); src.position(mark);
} }
} }
while (src.remaining() > 3) { while (src.remaining() >= 4) {
cp = getCP(src); cp = getCP(src);
if (cp < 0 || cp > Surrogate.UCS4_MAX) { if (Character.isBmpCodePoint(cp)) {
return CoderResult.malformedForLength(4);
}
if (cp < Surrogate.UCS4_MIN) {
if (!dst.hasRemaining()) if (!dst.hasRemaining())
return CoderResult.OVERFLOW; return CoderResult.OVERFLOW;
mark += 4; mark += 4;
dst.put((char)cp); dst.put((char) cp);
} else { } else if (Character.isValidCodePoint(cp)) {
if (dst.remaining() < 2) if (dst.remaining() < 2)
return CoderResult.OVERFLOW; return CoderResult.OVERFLOW;
mark += 4; mark += 4;
dst.put(Surrogate.high(cp)); dst.put(Surrogate.high(cp));
dst.put(Surrogate.low(cp)); dst.put(Surrogate.low(cp));
} else {
return CoderResult.malformedForLength(4);
} }
} }
return CoderResult.UNDERFLOW; return CoderResult.UNDERFLOW;
@ -154,7 +153,12 @@ class UTF_32Coder {
try { try {
while (src.hasRemaining()) { while (src.hasRemaining()) {
char c = src.get(); char c = src.get();
if (Character.isHighSurrogate(c)) { if (!Character.isSurrogate(c)) {
if (dst.remaining() < 4)
return CoderResult.OVERFLOW;
mark++;
put(c, dst);
} else if (Character.isHighSurrogate(c)) {
if (!src.hasRemaining()) if (!src.hasRemaining())
return CoderResult.UNDERFLOW; return CoderResult.UNDERFLOW;
char low = src.get(); char low = src.get();
@ -162,17 +166,13 @@ class UTF_32Coder {
if (dst.remaining() < 4) if (dst.remaining() < 4)
return CoderResult.OVERFLOW; return CoderResult.OVERFLOW;
mark += 2; mark += 2;
put(Surrogate.toUCS4(c, low), dst); put(Character.toCodePoint(c, low), dst);
} else { } else {
return CoderResult.malformedForLength(1); return CoderResult.malformedForLength(1);
} }
} else if (Character.isLowSurrogate(c)) {
return CoderResult.malformedForLength(1);
} else { } else {
if (dst.remaining() < 4) // assert Character.isLowSurrogate(c);
return CoderResult.OVERFLOW; return CoderResult.malformedForLength(1);
mark++;
put(c, dst);
} }
} }
return CoderResult.UNDERFLOW; return CoderResult.UNDERFLOW;

View File

@ -102,7 +102,7 @@ class UTF_8 extends Unicode
// [F1..F3] [80..BF] [80..BF] [80..BF] // [F1..F3] [80..BF] [80..BF] [80..BF]
// [F4] [80..8F] [80..BF] [80..BF] // [F4] [80..8F] [80..BF] [80..BF]
// only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...] // only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
// will be checked by Surrogate.neededFor(uc) // will be checked by Character.isSupplementaryCodePoint(uc)
private static boolean isMalformed4(int b2, int b3, int b4) { private static boolean isMalformed4(int b2, int b3, int b4) {
return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 || return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
(b4 & 0xc0) != 0x80; (b4 & 0xc0) != 0x80;
@ -248,7 +248,8 @@ class UTF_8 extends Unicode
((b3 & 0x3f) << 06) | ((b3 & 0x3f) << 06) |
(b4 & 0x3f); (b4 & 0x3f);
if (isMalformed4(b2, b3, b4) || if (isMalformed4(b2, b3, b4) ||
!Surrogate.neededFor(uc)) { // shortest form check
!Character.isSupplementaryCodePoint(uc)) {
return malformed(src, sp, dst, dp, 4); return malformed(src, sp, dst, dp, 4);
} }
da[dp++] = Surrogate.high(uc); da[dp++] = Surrogate.high(uc);
@ -304,7 +305,8 @@ class UTF_8 extends Unicode
((b3 & 0x3f) << 06) | ((b3 & 0x3f) << 06) |
(b4 & 0x3f); (b4 & 0x3f);
if (isMalformed4(b2, b3, b4) || if (isMalformed4(b2, b3, b4) ||
!Surrogate.neededFor(uc)) { // shortest form check // shortest form check
!Character.isSupplementaryCodePoint(uc)) {
return malformed(src, mark, 4); return malformed(src, mark, 4);
} }
dst.put(Surrogate.high(uc)); dst.put(Surrogate.high(uc));

View File

@ -441,7 +441,7 @@ public class EUC_TW extends Charset implements HistoricallyNamedCharset
} }
static int encode(char hi, char low, byte[] bb) { static int encode(char hi, char low, byte[] bb) {
int c = Surrogate.toUCS4(hi, low); int c = Character.toCodePoint(hi, low);
if ((c & 0xf0000) != 0x20000) if ((c & 0xf0000) != 0x20000)
return -1; return -1;
c -= 0x20000; c -= 0x20000;

View File

@ -12628,7 +12628,7 @@ public class GB18030
if (Character.isSurrogate(c)) { if (Character.isSurrogate(c)) {
if ((condensedKey=sgp.parse(c, sa, sp, sl)) < 0) if ((condensedKey=sgp.parse(c, sa, sp, sl)) < 0)
return sgp.error(); return sgp.error();
// Surogate.toUCS4 looks like // Character.toCodePoint looks like
// (((high & 0x3ff) << 10) | (low & 0x3ff)) + 0x10000; // (((high & 0x3ff) << 10) | (low & 0x3ff)) + 0x10000;
// so we add (0x2e248 - 0x10000) to get the "key". // so we add (0x2e248 - 0x10000) to get the "key".
condensedKey += 0x1E248; condensedKey += 0x1E248;

View File

@ -36,7 +36,6 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder; import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult; import java.nio.charset.CoderResult;
import sun.nio.cs.HistoricallyNamedCharset; import sun.nio.cs.HistoricallyNamedCharset;
import sun.nio.cs.Surrogate;
public class IBM33722 public class IBM33722
extends Charset extends Charset

View File

@ -36,7 +36,6 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder; import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult; import java.nio.charset.CoderResult;
import sun.nio.cs.HistoricallyNamedCharset; import sun.nio.cs.HistoricallyNamedCharset;
import sun.nio.cs.Surrogate;
public class IBM964 public class IBM964
extends Charset extends Charset

View File

@ -46,7 +46,7 @@ public class BashStreams {
CharacterGenerator(long seed, String csn, int limit) { CharacterGenerator(long seed, String csn, int limit) {
rand = new Random(seed); rand = new Random(seed);
this.max = Surrogate.UCS4_MAX + 1; this.max = Character.MAX_CODE_POINT + 1;
this.limit = limit; this.limit = limit;
} }
@ -77,17 +77,20 @@ public class BashStreams {
int c; int c;
for (;;) { for (;;) {
c = rand.nextInt(max); c = rand.nextInt(max);
if (Surrogate.is(c) || (c == 0xfffe) || (c == 0xffff)) if ((Character.isBmpCodePoint(c)
&& (Character.isSurrogate((char) c)
|| (c == 0xfffe) || (c == 0xffff))))
continue; continue;
if (Surrogate.neededFor(c) && (count == limit - 1)) if (Character.isSupplementaryCodePoint(c)
&& (count == limit - 1))
continue; continue;
break; break;
} }
count++; count++;
if (Surrogate.neededFor(c)) { if (Character.isSupplementaryCodePoint(c)) {
count++; count++;
push(Surrogate.low(c)); push(sun.nio.cs.Surrogate.low(c));
return Surrogate.high(c); return sun.nio.cs.Surrogate.high(c);
} }
return (char)c; return (char)c;
} }
@ -137,7 +140,7 @@ public class BashStreams {
char d = cg.next(); char d = cg.next();
if (c != d) { if (c != d) {
if (c == '?') { if (c == '?') {
if (Surrogate.isHigh(d)) if (Character.isHighSurrogate(d))
cg.next(); cg.next();
continue; continue;
} }
@ -187,7 +190,7 @@ public class BashStreams {
w.write(ca, 0, n); w.write(ca, 0, n);
count += n; count += n;
} }
if (Surrogate.isHigh(ca[n - 1])) if (Character.isHighSurrogate(ca[n - 1]))
w.write(cg.next()); w.write(cg.next());
w.close(); w.close();
} }
@ -253,7 +256,8 @@ public class BashStreams {
if (!cg.hasNext()) if (!cg.hasNext())
break; break;
char c = cg.next(); char c = cg.next();
if (Surrogate.isHigh(c) && (cb.remaining() == 1)) { if (Character.isHighSurrogate(c)
&& cb.remaining() == 1) {
cg.push(c); cg.push(c);
break; break;
} }
@ -311,7 +315,7 @@ public class BashStreams {
mismatchedEOF(csn, count + i, cg.count()); mismatchedEOF(csn, count + i, cg.count());
char d = cg.next(); char d = cg.next();
if (c == '?') { if (c == '?') {
if (Surrogate.isHigh(d)) { if (Character.isHighSurrogate(d)) {
cg.next(); cg.next();
continue; continue;
} }

View File

@ -1,66 +0,0 @@
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
public class Surrogate {
public static final int UCS4_SURROGATE_MIN = 0x10000;
public static final int UCS4_MAX = (1 << 20) + UCS4_SURROGATE_MIN - 1;
// UTF-16 surrogate-character ranges
//
public static final char MIN_HIGH = '\uD800';
public static final char MAX_HIGH = '\uDBFF';
public static final char MIN_LOW = '\uDC00';
public static final char MAX_LOW = '\uDFFF';
public static final char MIN = MIN_HIGH;
public static final char MAX = MAX_LOW;
public static boolean neededFor(int uc) {
return (uc >= UCS4_SURROGATE_MIN) && (uc <= UCS4_MAX);
}
public static boolean isHigh(int c) {
return (MIN_HIGH <= c) && (c <= MAX_HIGH);
}
static char high(int uc) {
return (char)(0xd800 | (((uc - UCS4_SURROGATE_MIN) >> 10) & 0x3ff));
}
public static boolean isLow(int c) {
return (MIN_LOW <= c) && (c <= MAX_LOW);
}
static char low(int uc) {
return (char)(0xdc00 | ((uc - UCS4_SURROGATE_MIN) & 0x3ff));
}
public static boolean is(int c) {
return (MIN <= c) && (c <= MAX);
}
static int toUCS4(char c, char d) {
return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000;
}
}

View File

@ -42,9 +42,8 @@ public class Surrogates {
static void initData() throws IOException { static void initData() throws IOException {
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
for (int i = 0; i < LEN; i++) { for (int i = 0; i < LEN; i++) {
int c = Surrogate.UCS4_SURROGATE_MIN + 1; int c = Character.MIN_SUPPLEMENTARY_CODE_POINT + 1;
sb.append(Surrogate.high(c)); sb.append(Character.toChars(c));
sb.append(Surrogate.low(c));
} }
input = sb.toString().toCharArray(); input = sb.toString().toCharArray();
ByteArrayOutputStream bos = new ByteArrayOutputStream(); ByteArrayOutputStream bos = new ByteArrayOutputStream();