6934265: Add public method Character.isBmpCodePoint
Move isBmpCodePoint from sun.nio.cs.Surrogate to Character Reviewed-by: sherman
This commit is contained in:
parent
30d5c660bc
commit
a0f3e72c24
@ -721,19 +721,18 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
|
||||
* {@code codePoint} isn't a valid Unicode code point
|
||||
*/
|
||||
public AbstractStringBuilder appendCodePoint(int codePoint) {
|
||||
if (!Character.isValidCodePoint(codePoint)) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
int n = 1;
|
||||
if (codePoint >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||
n++;
|
||||
}
|
||||
ensureCapacityInternal(count + n);
|
||||
if (n == 1) {
|
||||
value[count++] = (char) codePoint;
|
||||
} else {
|
||||
final int count = this.count;
|
||||
|
||||
if (Character.isBmpCodePoint(codePoint)) {
|
||||
ensureCapacityInternal(count + 1);
|
||||
value[count] = (char) codePoint;
|
||||
this.count = count + 1;
|
||||
} else if (Character.isValidCodePoint(codePoint)) {
|
||||
ensureCapacityInternal(count + 2);
|
||||
Character.toSurrogates(codePoint, value, count);
|
||||
count += n;
|
||||
this.count = count + 2;
|
||||
} else {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
return this;
|
||||
}
|
||||
|
@ -67,17 +67,16 @@ import java.util.Locale;
|
||||
* definition</i></a> of the U+<i>n</i> notation in the Unicode
|
||||
* standard.)
|
||||
*
|
||||
* <p>The set of characters from U+0000 to U+FFFF is sometimes
|
||||
* referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
|
||||
* name="supplementary">Characters</a> whose code points are greater
|
||||
* <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
|
||||
* sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
|
||||
* <a name="supplementary">Characters</a> whose code points are greater
|
||||
* than U+FFFF are called <em>supplementary character</em>s. The Java
|
||||
* 2 platform uses the UTF-16 representation in <code>char</code>
|
||||
* arrays and in the <code>String</code> and <code>StringBuffer</code>
|
||||
* classes. In this representation, supplementary characters are
|
||||
* represented as a pair of <code>char</code> values, the first from
|
||||
* the <em>high-surrogates</em> range, (\uD800-\uDBFF), the
|
||||
* second from the <em>low-surrogates</em> range
|
||||
* (\uDC00-\uDFFF).
|
||||
* platform uses the UTF-16 representation in <code>char</code> arrays and
|
||||
* in the <code>String</code> and <code>StringBuffer</code> classes. In
|
||||
* this representation, supplementary characters are represented as a pair
|
||||
* of <code>char</code> values, the first from the <em>high-surrogates</em>
|
||||
* range, (\uD800-\uDBFF), the second from the
|
||||
* <em>low-surrogates</em> range (\uDC00-\uDFFF).
|
||||
*
|
||||
* <p>A <code>char</code> value, therefore, represents Basic
|
||||
* Multilingual Plane (BMP) code points, including the surrogate
|
||||
@ -3922,6 +3921,25 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
||||
return plane < ((MAX_CODE_POINT + 1) >>> 16);
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the specified character (Unicode code point)
|
||||
* is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
|
||||
* Such code points can be represented using a single {@code char}.
|
||||
*
|
||||
* @param codePoint the character (Unicode code point) to be tested
|
||||
* @return {@code true} if the specified code point is between
|
||||
* {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
|
||||
* {@code false} otherwise.
|
||||
* @since 1.7
|
||||
*/
|
||||
public static boolean isBmpCodePoint(int codePoint) {
|
||||
return codePoint >>> 16 == 0;
|
||||
// Optimized form of:
|
||||
// codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
|
||||
// We consistently use logical shift (>>>) to facilitate
|
||||
// additional runtime optimizations.
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether the specified character (Unicode code point)
|
||||
* is in the <a href="#supplementary">supplementary character</a> range.
|
||||
@ -4319,15 +4337,15 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
||||
* @since 1.5
|
||||
*/
|
||||
public static int toChars(int codePoint, char[] dst, int dstIndex) {
|
||||
if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||
if (isBmpCodePoint(codePoint)) {
|
||||
dst[dstIndex] = (char) codePoint;
|
||||
return 1;
|
||||
} else if (isValidCodePoint(codePoint)) {
|
||||
toSurrogates(codePoint, dst, dstIndex);
|
||||
return 2;
|
||||
} else {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
toSurrogates(codePoint, dst, dstIndex);
|
||||
return 2;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -4347,15 +4365,15 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
||||
* @since 1.5
|
||||
*/
|
||||
public static char[] toChars(int codePoint) {
|
||||
if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
|
||||
if (isBmpCodePoint(codePoint)) {
|
||||
return new char[] { (char) codePoint };
|
||||
} else if (isValidCodePoint(codePoint)) {
|
||||
char[] result = new char[2];
|
||||
toSurrogates(codePoint, result, 0);
|
||||
return result;
|
||||
} else {
|
||||
throw new IllegalArgumentException();
|
||||
}
|
||||
if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||
return new char[] { (char) codePoint };
|
||||
}
|
||||
char[] result = new char[2];
|
||||
toSurrogates(codePoint, result, 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
static void toSurrogates(int codePoint, char[] dst, int index) {
|
||||
@ -6259,8 +6277,7 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
||||
*/
|
||||
static char[] toUpperCaseCharArray(int codePoint) {
|
||||
// As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
|
||||
assert isValidCodePoint(codePoint) &&
|
||||
!isSupplementaryCodePoint(codePoint);
|
||||
assert isBmpCodePoint(codePoint);
|
||||
return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
|
||||
}
|
||||
|
||||
|
@ -99,6 +99,8 @@ import java.util.regex.PatternSyntaxException;
|
||||
*
|
||||
* @author Lee Boynton
|
||||
* @author Arthur van Hoff
|
||||
* @author Martin Buchholz
|
||||
* @author Ulf Zibis
|
||||
* @see java.lang.Object#toString()
|
||||
* @see java.lang.StringBuffer
|
||||
* @see java.lang.StringBuilder
|
||||
@ -273,32 +275,32 @@ public final class String
|
||||
throw new StringIndexOutOfBoundsException(offset + count);
|
||||
}
|
||||
|
||||
final int end = offset + count;
|
||||
|
||||
// Pass 1: Compute precise size of char[]
|
||||
int n = 0;
|
||||
for (int i = offset; i < offset + count; i++) {
|
||||
int n = count;
|
||||
for (int i = offset; i < end; i++) {
|
||||
int c = codePoints[i];
|
||||
if (c >= Character.MIN_CODE_POINT &&
|
||||
c < Character.MIN_SUPPLEMENTARY_CODE_POINT)
|
||||
n += 1;
|
||||
else if (Character.isSupplementaryCodePoint(c))
|
||||
n += 2;
|
||||
if (Character.isBmpCodePoint(c))
|
||||
continue;
|
||||
else if (Character.isValidCodePoint(c))
|
||||
n++;
|
||||
else throw new IllegalArgumentException(Integer.toString(c));
|
||||
}
|
||||
|
||||
// Pass 2: Allocate and fill in char[]
|
||||
char[] v = new char[n];
|
||||
for (int i = offset, j = 0; i < offset + count; i++) {
|
||||
final char[] v = new char[n];
|
||||
|
||||
for (int i = offset, j = 0; i < end; i++, j++) {
|
||||
int c = codePoints[i];
|
||||
if (c < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
|
||||
v[j++] = (char) c;
|
||||
} else {
|
||||
Character.toSurrogates(c, v, j);
|
||||
j += 2;
|
||||
}
|
||||
if (Character.isBmpCodePoint(c))
|
||||
v[j] = (char) c;
|
||||
else
|
||||
Character.toSurrogates(c, v, j++);
|
||||
}
|
||||
|
||||
this.value = v;
|
||||
this.count = v.length;
|
||||
this.count = n;
|
||||
this.offset = 0;
|
||||
}
|
||||
|
||||
|
@ -24,7 +24,6 @@
|
||||
*/
|
||||
package sun.io;
|
||||
|
||||
import sun.nio.cs.Surrogate;
|
||||
import sun.nio.cs.ext.DoubleByte;
|
||||
import static sun.nio.cs.CharsetMapping.*;
|
||||
|
||||
|
@ -24,7 +24,6 @@
|
||||
*/
|
||||
package sun.io;
|
||||
|
||||
import sun.nio.cs.Surrogate;
|
||||
import sun.nio.cs.ext.DoubleByte;
|
||||
import static sun.nio.cs.CharsetMapping.*;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2000, 2001, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
@ -34,8 +34,9 @@ import java.nio.charset.UnmappableCharacterException;
|
||||
* Utility class for dealing with surrogates.
|
||||
*
|
||||
* @author Mark Reinhold
|
||||
* @author Martin Buchholz
|
||||
* @author Ulf Zibis
|
||||
*/
|
||||
|
||||
public class Surrogate {
|
||||
|
||||
private Surrogate() { }
|
||||
@ -74,17 +75,10 @@ public class Surrogate {
|
||||
return (MIN <= c) && (c <= MAX);
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether or not the given UCS-4 character is in the Basic
|
||||
* Multilingual Plane, and can be represented using a single char.
|
||||
*/
|
||||
public static boolean isBMPCodePoint(int uc) {
|
||||
return uc >> 16 == 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Tells whether or not the given UCS-4 character must be represented as a
|
||||
* surrogate pair in UTF-16.
|
||||
* Use of {@link Character#isSupplementaryCodePoint} is generally preferred.
|
||||
*/
|
||||
public static boolean neededFor(int uc) {
|
||||
return Character.isSupplementaryCodePoint(uc);
|
||||
@ -110,6 +104,7 @@ public class Surrogate {
|
||||
|
||||
/**
|
||||
* Converts the given surrogate pair into a 32-bit UCS-4 character.
|
||||
* Use of {@link Character#toCodePoint} is generally preferred.
|
||||
*/
|
||||
public static int toUCS4(char c, char d) {
|
||||
assert Character.isHighSurrogate(c) && Character.isLowSurrogate(d);
|
||||
@ -290,8 +285,9 @@ public class Surrogate {
|
||||
* error() will return a descriptive result object
|
||||
*/
|
||||
public int generate(int uc, int len, CharBuffer dst) {
|
||||
if (Surrogate.isBMPCodePoint(uc)) {
|
||||
if (Surrogate.is(uc)) {
|
||||
if (Character.isBmpCodePoint(uc)) {
|
||||
char c = (char) uc;
|
||||
if (Character.isSurrogate(c)) {
|
||||
error = CoderResult.malformedForLength(len);
|
||||
return -1;
|
||||
}
|
||||
@ -299,10 +295,10 @@ public class Surrogate {
|
||||
error = CoderResult.OVERFLOW;
|
||||
return -1;
|
||||
}
|
||||
dst.put((char)uc);
|
||||
dst.put(c);
|
||||
error = null;
|
||||
return 1;
|
||||
} else if (Character.isSupplementaryCodePoint(uc)) {
|
||||
} else if (Character.isValidCodePoint(uc)) {
|
||||
if (dst.remaining() < 2) {
|
||||
error = CoderResult.OVERFLOW;
|
||||
return -1;
|
||||
@ -334,8 +330,9 @@ public class Surrogate {
|
||||
* error() will return a descriptive result object
|
||||
*/
|
||||
public int generate(int uc, int len, char[] da, int dp, int dl) {
|
||||
if (Surrogate.isBMPCodePoint(uc)) {
|
||||
if (Surrogate.is(uc)) {
|
||||
if (Character.isBmpCodePoint(uc)) {
|
||||
char c = (char) uc;
|
||||
if (Character.isSurrogate(c)) {
|
||||
error = CoderResult.malformedForLength(len);
|
||||
return -1;
|
||||
}
|
||||
@ -343,10 +340,10 @@ public class Surrogate {
|
||||
error = CoderResult.OVERFLOW;
|
||||
return -1;
|
||||
}
|
||||
da[dp] = (char)uc;
|
||||
da[dp] = c;
|
||||
error = null;
|
||||
return 1;
|
||||
} else if (Character.isSupplementaryCodePoint(uc)) {
|
||||
} else if (Character.isValidCodePoint(uc)) {
|
||||
if (dl - dp < 2) {
|
||||
error = CoderResult.OVERFLOW;
|
||||
return -1;
|
||||
|
@ -86,22 +86,21 @@ class UTF_32Coder {
|
||||
src.position(mark);
|
||||
}
|
||||
}
|
||||
while (src.remaining() > 3) {
|
||||
while (src.remaining() >= 4) {
|
||||
cp = getCP(src);
|
||||
if (cp < 0 || cp > Surrogate.UCS4_MAX) {
|
||||
return CoderResult.malformedForLength(4);
|
||||
}
|
||||
if (cp < Surrogate.UCS4_MIN) {
|
||||
if (Character.isBmpCodePoint(cp)) {
|
||||
if (!dst.hasRemaining())
|
||||
return CoderResult.OVERFLOW;
|
||||
mark += 4;
|
||||
dst.put((char)cp);
|
||||
} else {
|
||||
dst.put((char) cp);
|
||||
} else if (Character.isValidCodePoint(cp)) {
|
||||
if (dst.remaining() < 2)
|
||||
return CoderResult.OVERFLOW;
|
||||
mark += 4;
|
||||
dst.put(Surrogate.high(cp));
|
||||
dst.put(Surrogate.low(cp));
|
||||
} else {
|
||||
return CoderResult.malformedForLength(4);
|
||||
}
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
@ -154,7 +153,12 @@ class UTF_32Coder {
|
||||
try {
|
||||
while (src.hasRemaining()) {
|
||||
char c = src.get();
|
||||
if (Character.isHighSurrogate(c)) {
|
||||
if (!Character.isSurrogate(c)) {
|
||||
if (dst.remaining() < 4)
|
||||
return CoderResult.OVERFLOW;
|
||||
mark++;
|
||||
put(c, dst);
|
||||
} else if (Character.isHighSurrogate(c)) {
|
||||
if (!src.hasRemaining())
|
||||
return CoderResult.UNDERFLOW;
|
||||
char low = src.get();
|
||||
@ -162,17 +166,13 @@ class UTF_32Coder {
|
||||
if (dst.remaining() < 4)
|
||||
return CoderResult.OVERFLOW;
|
||||
mark += 2;
|
||||
put(Surrogate.toUCS4(c, low), dst);
|
||||
put(Character.toCodePoint(c, low), dst);
|
||||
} else {
|
||||
return CoderResult.malformedForLength(1);
|
||||
}
|
||||
} else if (Character.isLowSurrogate(c)) {
|
||||
return CoderResult.malformedForLength(1);
|
||||
} else {
|
||||
if (dst.remaining() < 4)
|
||||
return CoderResult.OVERFLOW;
|
||||
mark++;
|
||||
put(c, dst);
|
||||
// assert Character.isLowSurrogate(c);
|
||||
return CoderResult.malformedForLength(1);
|
||||
}
|
||||
}
|
||||
return CoderResult.UNDERFLOW;
|
||||
|
@ -102,7 +102,7 @@ class UTF_8 extends Unicode
|
||||
// [F1..F3] [80..BF] [80..BF] [80..BF]
|
||||
// [F4] [80..8F] [80..BF] [80..BF]
|
||||
// only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
|
||||
// will be checked by Surrogate.neededFor(uc)
|
||||
// will be checked by Character.isSupplementaryCodePoint(uc)
|
||||
private static boolean isMalformed4(int b2, int b3, int b4) {
|
||||
return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
|
||||
(b4 & 0xc0) != 0x80;
|
||||
@ -248,7 +248,8 @@ class UTF_8 extends Unicode
|
||||
((b3 & 0x3f) << 06) |
|
||||
(b4 & 0x3f);
|
||||
if (isMalformed4(b2, b3, b4) ||
|
||||
!Surrogate.neededFor(uc)) {
|
||||
// shortest form check
|
||||
!Character.isSupplementaryCodePoint(uc)) {
|
||||
return malformed(src, sp, dst, dp, 4);
|
||||
}
|
||||
da[dp++] = Surrogate.high(uc);
|
||||
@ -304,7 +305,8 @@ class UTF_8 extends Unicode
|
||||
((b3 & 0x3f) << 06) |
|
||||
(b4 & 0x3f);
|
||||
if (isMalformed4(b2, b3, b4) ||
|
||||
!Surrogate.neededFor(uc)) { // shortest form check
|
||||
// shortest form check
|
||||
!Character.isSupplementaryCodePoint(uc)) {
|
||||
return malformed(src, mark, 4);
|
||||
}
|
||||
dst.put(Surrogate.high(uc));
|
||||
|
@ -441,7 +441,7 @@ public class EUC_TW extends Charset implements HistoricallyNamedCharset
|
||||
}
|
||||
|
||||
static int encode(char hi, char low, byte[] bb) {
|
||||
int c = Surrogate.toUCS4(hi, low);
|
||||
int c = Character.toCodePoint(hi, low);
|
||||
if ((c & 0xf0000) != 0x20000)
|
||||
return -1;
|
||||
c -= 0x20000;
|
||||
|
@ -12628,7 +12628,7 @@ public class GB18030
|
||||
if (Character.isSurrogate(c)) {
|
||||
if ((condensedKey=sgp.parse(c, sa, sp, sl)) < 0)
|
||||
return sgp.error();
|
||||
// Surogate.toUCS4 looks like
|
||||
// Character.toCodePoint looks like
|
||||
// (((high & 0x3ff) << 10) | (low & 0x3ff)) + 0x10000;
|
||||
// so we add (0x2e248 - 0x10000) to get the "key".
|
||||
condensedKey += 0x1E248;
|
||||
|
@ -36,7 +36,6 @@ import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import sun.nio.cs.HistoricallyNamedCharset;
|
||||
import sun.nio.cs.Surrogate;
|
||||
|
||||
public class IBM33722
|
||||
extends Charset
|
||||
|
@ -36,7 +36,6 @@ import java.nio.charset.CharsetDecoder;
|
||||
import java.nio.charset.CharsetEncoder;
|
||||
import java.nio.charset.CoderResult;
|
||||
import sun.nio.cs.HistoricallyNamedCharset;
|
||||
import sun.nio.cs.Surrogate;
|
||||
|
||||
public class IBM964
|
||||
extends Charset
|
||||
|
@ -46,7 +46,7 @@ public class BashStreams {
|
||||
|
||||
CharacterGenerator(long seed, String csn, int limit) {
|
||||
rand = new Random(seed);
|
||||
this.max = Surrogate.UCS4_MAX + 1;
|
||||
this.max = Character.MAX_CODE_POINT + 1;
|
||||
this.limit = limit;
|
||||
}
|
||||
|
||||
@ -77,17 +77,20 @@ public class BashStreams {
|
||||
int c;
|
||||
for (;;) {
|
||||
c = rand.nextInt(max);
|
||||
if (Surrogate.is(c) || (c == 0xfffe) || (c == 0xffff))
|
||||
if ((Character.isBmpCodePoint(c)
|
||||
&& (Character.isSurrogate((char) c)
|
||||
|| (c == 0xfffe) || (c == 0xffff))))
|
||||
continue;
|
||||
if (Surrogate.neededFor(c) && (count == limit - 1))
|
||||
if (Character.isSupplementaryCodePoint(c)
|
||||
&& (count == limit - 1))
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
count++;
|
||||
if (Surrogate.neededFor(c)) {
|
||||
if (Character.isSupplementaryCodePoint(c)) {
|
||||
count++;
|
||||
push(Surrogate.low(c));
|
||||
return Surrogate.high(c);
|
||||
push(sun.nio.cs.Surrogate.low(c));
|
||||
return sun.nio.cs.Surrogate.high(c);
|
||||
}
|
||||
return (char)c;
|
||||
}
|
||||
@ -137,7 +140,7 @@ public class BashStreams {
|
||||
char d = cg.next();
|
||||
if (c != d) {
|
||||
if (c == '?') {
|
||||
if (Surrogate.isHigh(d))
|
||||
if (Character.isHighSurrogate(d))
|
||||
cg.next();
|
||||
continue;
|
||||
}
|
||||
@ -187,7 +190,7 @@ public class BashStreams {
|
||||
w.write(ca, 0, n);
|
||||
count += n;
|
||||
}
|
||||
if (Surrogate.isHigh(ca[n - 1]))
|
||||
if (Character.isHighSurrogate(ca[n - 1]))
|
||||
w.write(cg.next());
|
||||
w.close();
|
||||
}
|
||||
@ -253,7 +256,8 @@ public class BashStreams {
|
||||
if (!cg.hasNext())
|
||||
break;
|
||||
char c = cg.next();
|
||||
if (Surrogate.isHigh(c) && (cb.remaining() == 1)) {
|
||||
if (Character.isHighSurrogate(c)
|
||||
&& cb.remaining() == 1) {
|
||||
cg.push(c);
|
||||
break;
|
||||
}
|
||||
@ -311,7 +315,7 @@ public class BashStreams {
|
||||
mismatchedEOF(csn, count + i, cg.count());
|
||||
char d = cg.next();
|
||||
if (c == '?') {
|
||||
if (Surrogate.isHigh(d)) {
|
||||
if (Character.isHighSurrogate(d)) {
|
||||
cg.next();
|
||||
continue;
|
||||
}
|
||||
|
@ -1,66 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
|
||||
public class Surrogate {
|
||||
|
||||
public static final int UCS4_SURROGATE_MIN = 0x10000;
|
||||
public static final int UCS4_MAX = (1 << 20) + UCS4_SURROGATE_MIN - 1;
|
||||
|
||||
// UTF-16 surrogate-character ranges
|
||||
//
|
||||
public static final char MIN_HIGH = '\uD800';
|
||||
public static final char MAX_HIGH = '\uDBFF';
|
||||
public static final char MIN_LOW = '\uDC00';
|
||||
public static final char MAX_LOW = '\uDFFF';
|
||||
public static final char MIN = MIN_HIGH;
|
||||
public static final char MAX = MAX_LOW;
|
||||
|
||||
public static boolean neededFor(int uc) {
|
||||
return (uc >= UCS4_SURROGATE_MIN) && (uc <= UCS4_MAX);
|
||||
}
|
||||
|
||||
public static boolean isHigh(int c) {
|
||||
return (MIN_HIGH <= c) && (c <= MAX_HIGH);
|
||||
}
|
||||
|
||||
static char high(int uc) {
|
||||
return (char)(0xd800 | (((uc - UCS4_SURROGATE_MIN) >> 10) & 0x3ff));
|
||||
}
|
||||
|
||||
public static boolean isLow(int c) {
|
||||
return (MIN_LOW <= c) && (c <= MAX_LOW);
|
||||
}
|
||||
|
||||
static char low(int uc) {
|
||||
return (char)(0xdc00 | ((uc - UCS4_SURROGATE_MIN) & 0x3ff));
|
||||
}
|
||||
|
||||
public static boolean is(int c) {
|
||||
return (MIN <= c) && (c <= MAX);
|
||||
}
|
||||
|
||||
static int toUCS4(char c, char d) {
|
||||
return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000;
|
||||
}
|
||||
|
||||
}
|
@ -42,9 +42,8 @@ public class Surrogates {
|
||||
static void initData() throws IOException {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < LEN; i++) {
|
||||
int c = Surrogate.UCS4_SURROGATE_MIN + 1;
|
||||
sb.append(Surrogate.high(c));
|
||||
sb.append(Surrogate.low(c));
|
||||
int c = Character.MIN_SUPPLEMENTARY_CODE_POINT + 1;
|
||||
sb.append(Character.toChars(c));
|
||||
}
|
||||
input = sb.toString().toCharArray();
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||
|
Loading…
x
Reference in New Issue
Block a user