6934265: Add public method Character.isBmpCodePoint
Move isBmpCodePoint from sun.nio.cs.Surrogate to Character Reviewed-by: sherman
This commit is contained in:
parent
30d5c660bc
commit
a0f3e72c24
@ -721,19 +721,18 @@ abstract class AbstractStringBuilder implements Appendable, CharSequence {
|
|||||||
* {@code codePoint} isn't a valid Unicode code point
|
* {@code codePoint} isn't a valid Unicode code point
|
||||||
*/
|
*/
|
||||||
public AbstractStringBuilder appendCodePoint(int codePoint) {
|
public AbstractStringBuilder appendCodePoint(int codePoint) {
|
||||||
if (!Character.isValidCodePoint(codePoint)) {
|
final int count = this.count;
|
||||||
throw new IllegalArgumentException();
|
|
||||||
}
|
if (Character.isBmpCodePoint(codePoint)) {
|
||||||
int n = 1;
|
ensureCapacityInternal(count + 1);
|
||||||
if (codePoint >= Character.MIN_SUPPLEMENTARY_CODE_POINT) {
|
value[count] = (char) codePoint;
|
||||||
n++;
|
this.count = count + 1;
|
||||||
}
|
} else if (Character.isValidCodePoint(codePoint)) {
|
||||||
ensureCapacityInternal(count + n);
|
ensureCapacityInternal(count + 2);
|
||||||
if (n == 1) {
|
|
||||||
value[count++] = (char) codePoint;
|
|
||||||
} else {
|
|
||||||
Character.toSurrogates(codePoint, value, count);
|
Character.toSurrogates(codePoint, value, count);
|
||||||
count += n;
|
this.count = count + 2;
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException();
|
||||||
}
|
}
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
@ -67,17 +67,16 @@ import java.util.Locale;
|
|||||||
* definition</i></a> of the U+<i>n</i> notation in the Unicode
|
* definition</i></a> of the U+<i>n</i> notation in the Unicode
|
||||||
* standard.)
|
* standard.)
|
||||||
*
|
*
|
||||||
* <p>The set of characters from U+0000 to U+FFFF is sometimes
|
* <p><a name="BMP">The set of characters from U+0000 to U+FFFF is
|
||||||
* referred to as the <em>Basic Multilingual Plane (BMP)</em>. <a
|
* sometimes referred to as the <em>Basic Multilingual Plane (BMP)</em>.
|
||||||
* name="supplementary">Characters</a> whose code points are greater
|
* <a name="supplementary">Characters</a> whose code points are greater
|
||||||
* than U+FFFF are called <em>supplementary character</em>s. The Java
|
* than U+FFFF are called <em>supplementary character</em>s. The Java
|
||||||
* 2 platform uses the UTF-16 representation in <code>char</code>
|
* platform uses the UTF-16 representation in <code>char</code> arrays and
|
||||||
* arrays and in the <code>String</code> and <code>StringBuffer</code>
|
* in the <code>String</code> and <code>StringBuffer</code> classes. In
|
||||||
* classes. In this representation, supplementary characters are
|
* this representation, supplementary characters are represented as a pair
|
||||||
* represented as a pair of <code>char</code> values, the first from
|
* of <code>char</code> values, the first from the <em>high-surrogates</em>
|
||||||
* the <em>high-surrogates</em> range, (\uD800-\uDBFF), the
|
* range, (\uD800-\uDBFF), the second from the
|
||||||
* second from the <em>low-surrogates</em> range
|
* <em>low-surrogates</em> range (\uDC00-\uDFFF).
|
||||||
* (\uDC00-\uDFFF).
|
|
||||||
*
|
*
|
||||||
* <p>A <code>char</code> value, therefore, represents Basic
|
* <p>A <code>char</code> value, therefore, represents Basic
|
||||||
* Multilingual Plane (BMP) code points, including the surrogate
|
* Multilingual Plane (BMP) code points, including the surrogate
|
||||||
@ -3922,6 +3921,25 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
|||||||
return plane < ((MAX_CODE_POINT + 1) >>> 16);
|
return plane < ((MAX_CODE_POINT + 1) >>> 16);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Determines whether the specified character (Unicode code point)
|
||||||
|
* is in the <a href="#BMP">Basic Multilingual Plane (BMP)</a>.
|
||||||
|
* Such code points can be represented using a single {@code char}.
|
||||||
|
*
|
||||||
|
* @param codePoint the character (Unicode code point) to be tested
|
||||||
|
* @return {@code true} if the specified code point is between
|
||||||
|
* {@link #MIN_VALUE} and {@link #MAX_VALUE} inclusive;
|
||||||
|
* {@code false} otherwise.
|
||||||
|
* @since 1.7
|
||||||
|
*/
|
||||||
|
public static boolean isBmpCodePoint(int codePoint) {
|
||||||
|
return codePoint >>> 16 == 0;
|
||||||
|
// Optimized form of:
|
||||||
|
// codePoint >= MIN_VALUE && codePoint <= MAX_VALUE
|
||||||
|
// We consistently use logical shift (>>>) to facilitate
|
||||||
|
// additional runtime optimizations.
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Determines whether the specified character (Unicode code point)
|
* Determines whether the specified character (Unicode code point)
|
||||||
* is in the <a href="#supplementary">supplementary character</a> range.
|
* is in the <a href="#supplementary">supplementary character</a> range.
|
||||||
@ -4319,15 +4337,15 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
|||||||
* @since 1.5
|
* @since 1.5
|
||||||
*/
|
*/
|
||||||
public static int toChars(int codePoint, char[] dst, int dstIndex) {
|
public static int toChars(int codePoint, char[] dst, int dstIndex) {
|
||||||
if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
|
if (isBmpCodePoint(codePoint)) {
|
||||||
throw new IllegalArgumentException();
|
|
||||||
}
|
|
||||||
if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
|
|
||||||
dst[dstIndex] = (char) codePoint;
|
dst[dstIndex] = (char) codePoint;
|
||||||
return 1;
|
return 1;
|
||||||
|
} else if (isValidCodePoint(codePoint)) {
|
||||||
|
toSurrogates(codePoint, dst, dstIndex);
|
||||||
|
return 2;
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException();
|
||||||
}
|
}
|
||||||
toSurrogates(codePoint, dst, dstIndex);
|
|
||||||
return 2;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -4347,15 +4365,15 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
|||||||
* @since 1.5
|
* @since 1.5
|
||||||
*/
|
*/
|
||||||
public static char[] toChars(int codePoint) {
|
public static char[] toChars(int codePoint) {
|
||||||
if (codePoint < 0 || codePoint > MAX_CODE_POINT) {
|
if (isBmpCodePoint(codePoint)) {
|
||||||
|
return new char[] { (char) codePoint };
|
||||||
|
} else if (isValidCodePoint(codePoint)) {
|
||||||
|
char[] result = new char[2];
|
||||||
|
toSurrogates(codePoint, result, 0);
|
||||||
|
return result;
|
||||||
|
} else {
|
||||||
throw new IllegalArgumentException();
|
throw new IllegalArgumentException();
|
||||||
}
|
}
|
||||||
if (codePoint < MIN_SUPPLEMENTARY_CODE_POINT) {
|
|
||||||
return new char[] { (char) codePoint };
|
|
||||||
}
|
|
||||||
char[] result = new char[2];
|
|
||||||
toSurrogates(codePoint, result, 0);
|
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void toSurrogates(int codePoint, char[] dst, int index) {
|
static void toSurrogates(int codePoint, char[] dst, int index) {
|
||||||
@ -6259,8 +6277,7 @@ class Character extends Object implements java.io.Serializable, Comparable<Chara
|
|||||||
*/
|
*/
|
||||||
static char[] toUpperCaseCharArray(int codePoint) {
|
static char[] toUpperCaseCharArray(int codePoint) {
|
||||||
// As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
|
// As of Unicode 4.0, 1:M uppercasings only happen in the BMP.
|
||||||
assert isValidCodePoint(codePoint) &&
|
assert isBmpCodePoint(codePoint);
|
||||||
!isSupplementaryCodePoint(codePoint);
|
|
||||||
return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
|
return CharacterData.of(codePoint).toUpperCaseCharArray(codePoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -99,6 +99,8 @@ import java.util.regex.PatternSyntaxException;
|
|||||||
*
|
*
|
||||||
* @author Lee Boynton
|
* @author Lee Boynton
|
||||||
* @author Arthur van Hoff
|
* @author Arthur van Hoff
|
||||||
|
* @author Martin Buchholz
|
||||||
|
* @author Ulf Zibis
|
||||||
* @see java.lang.Object#toString()
|
* @see java.lang.Object#toString()
|
||||||
* @see java.lang.StringBuffer
|
* @see java.lang.StringBuffer
|
||||||
* @see java.lang.StringBuilder
|
* @see java.lang.StringBuilder
|
||||||
@ -273,32 +275,32 @@ public final class String
|
|||||||
throw new StringIndexOutOfBoundsException(offset + count);
|
throw new StringIndexOutOfBoundsException(offset + count);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
final int end = offset + count;
|
||||||
|
|
||||||
// Pass 1: Compute precise size of char[]
|
// Pass 1: Compute precise size of char[]
|
||||||
int n = 0;
|
int n = count;
|
||||||
for (int i = offset; i < offset + count; i++) {
|
for (int i = offset; i < end; i++) {
|
||||||
int c = codePoints[i];
|
int c = codePoints[i];
|
||||||
if (c >= Character.MIN_CODE_POINT &&
|
if (Character.isBmpCodePoint(c))
|
||||||
c < Character.MIN_SUPPLEMENTARY_CODE_POINT)
|
continue;
|
||||||
n += 1;
|
else if (Character.isValidCodePoint(c))
|
||||||
else if (Character.isSupplementaryCodePoint(c))
|
n++;
|
||||||
n += 2;
|
|
||||||
else throw new IllegalArgumentException(Integer.toString(c));
|
else throw new IllegalArgumentException(Integer.toString(c));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Pass 2: Allocate and fill in char[]
|
// Pass 2: Allocate and fill in char[]
|
||||||
char[] v = new char[n];
|
final char[] v = new char[n];
|
||||||
for (int i = offset, j = 0; i < offset + count; i++) {
|
|
||||||
|
for (int i = offset, j = 0; i < end; i++, j++) {
|
||||||
int c = codePoints[i];
|
int c = codePoints[i];
|
||||||
if (c < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
|
if (Character.isBmpCodePoint(c))
|
||||||
v[j++] = (char) c;
|
v[j] = (char) c;
|
||||||
} else {
|
else
|
||||||
Character.toSurrogates(c, v, j);
|
Character.toSurrogates(c, v, j++);
|
||||||
j += 2;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
this.value = v;
|
this.value = v;
|
||||||
this.count = v.length;
|
this.count = n;
|
||||||
this.offset = 0;
|
this.offset = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,7 +24,6 @@
|
|||||||
*/
|
*/
|
||||||
package sun.io;
|
package sun.io;
|
||||||
|
|
||||||
import sun.nio.cs.Surrogate;
|
|
||||||
import sun.nio.cs.ext.DoubleByte;
|
import sun.nio.cs.ext.DoubleByte;
|
||||||
import static sun.nio.cs.CharsetMapping.*;
|
import static sun.nio.cs.CharsetMapping.*;
|
||||||
|
|
||||||
|
@ -24,7 +24,6 @@
|
|||||||
*/
|
*/
|
||||||
package sun.io;
|
package sun.io;
|
||||||
|
|
||||||
import sun.nio.cs.Surrogate;
|
|
||||||
import sun.nio.cs.ext.DoubleByte;
|
import sun.nio.cs.ext.DoubleByte;
|
||||||
import static sun.nio.cs.CharsetMapping.*;
|
import static sun.nio.cs.CharsetMapping.*;
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2000, 2001, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -34,8 +34,9 @@ import java.nio.charset.UnmappableCharacterException;
|
|||||||
* Utility class for dealing with surrogates.
|
* Utility class for dealing with surrogates.
|
||||||
*
|
*
|
||||||
* @author Mark Reinhold
|
* @author Mark Reinhold
|
||||||
|
* @author Martin Buchholz
|
||||||
|
* @author Ulf Zibis
|
||||||
*/
|
*/
|
||||||
|
|
||||||
public class Surrogate {
|
public class Surrogate {
|
||||||
|
|
||||||
private Surrogate() { }
|
private Surrogate() { }
|
||||||
@ -74,17 +75,10 @@ public class Surrogate {
|
|||||||
return (MIN <= c) && (c <= MAX);
|
return (MIN <= c) && (c <= MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Tells whether or not the given UCS-4 character is in the Basic
|
|
||||||
* Multilingual Plane, and can be represented using a single char.
|
|
||||||
*/
|
|
||||||
public static boolean isBMPCodePoint(int uc) {
|
|
||||||
return uc >> 16 == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tells whether or not the given UCS-4 character must be represented as a
|
* Tells whether or not the given UCS-4 character must be represented as a
|
||||||
* surrogate pair in UTF-16.
|
* surrogate pair in UTF-16.
|
||||||
|
* Use of {@link Character#isSupplementaryCodePoint} is generally preferred.
|
||||||
*/
|
*/
|
||||||
public static boolean neededFor(int uc) {
|
public static boolean neededFor(int uc) {
|
||||||
return Character.isSupplementaryCodePoint(uc);
|
return Character.isSupplementaryCodePoint(uc);
|
||||||
@ -110,6 +104,7 @@ public class Surrogate {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Converts the given surrogate pair into a 32-bit UCS-4 character.
|
* Converts the given surrogate pair into a 32-bit UCS-4 character.
|
||||||
|
* Use of {@link Character#toCodePoint} is generally preferred.
|
||||||
*/
|
*/
|
||||||
public static int toUCS4(char c, char d) {
|
public static int toUCS4(char c, char d) {
|
||||||
assert Character.isHighSurrogate(c) && Character.isLowSurrogate(d);
|
assert Character.isHighSurrogate(c) && Character.isLowSurrogate(d);
|
||||||
@ -290,8 +285,9 @@ public class Surrogate {
|
|||||||
* error() will return a descriptive result object
|
* error() will return a descriptive result object
|
||||||
*/
|
*/
|
||||||
public int generate(int uc, int len, CharBuffer dst) {
|
public int generate(int uc, int len, CharBuffer dst) {
|
||||||
if (Surrogate.isBMPCodePoint(uc)) {
|
if (Character.isBmpCodePoint(uc)) {
|
||||||
if (Surrogate.is(uc)) {
|
char c = (char) uc;
|
||||||
|
if (Character.isSurrogate(c)) {
|
||||||
error = CoderResult.malformedForLength(len);
|
error = CoderResult.malformedForLength(len);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -299,10 +295,10 @@ public class Surrogate {
|
|||||||
error = CoderResult.OVERFLOW;
|
error = CoderResult.OVERFLOW;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
dst.put((char)uc);
|
dst.put(c);
|
||||||
error = null;
|
error = null;
|
||||||
return 1;
|
return 1;
|
||||||
} else if (Character.isSupplementaryCodePoint(uc)) {
|
} else if (Character.isValidCodePoint(uc)) {
|
||||||
if (dst.remaining() < 2) {
|
if (dst.remaining() < 2) {
|
||||||
error = CoderResult.OVERFLOW;
|
error = CoderResult.OVERFLOW;
|
||||||
return -1;
|
return -1;
|
||||||
@ -334,8 +330,9 @@ public class Surrogate {
|
|||||||
* error() will return a descriptive result object
|
* error() will return a descriptive result object
|
||||||
*/
|
*/
|
||||||
public int generate(int uc, int len, char[] da, int dp, int dl) {
|
public int generate(int uc, int len, char[] da, int dp, int dl) {
|
||||||
if (Surrogate.isBMPCodePoint(uc)) {
|
if (Character.isBmpCodePoint(uc)) {
|
||||||
if (Surrogate.is(uc)) {
|
char c = (char) uc;
|
||||||
|
if (Character.isSurrogate(c)) {
|
||||||
error = CoderResult.malformedForLength(len);
|
error = CoderResult.malformedForLength(len);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
@ -343,10 +340,10 @@ public class Surrogate {
|
|||||||
error = CoderResult.OVERFLOW;
|
error = CoderResult.OVERFLOW;
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
da[dp] = (char)uc;
|
da[dp] = c;
|
||||||
error = null;
|
error = null;
|
||||||
return 1;
|
return 1;
|
||||||
} else if (Character.isSupplementaryCodePoint(uc)) {
|
} else if (Character.isValidCodePoint(uc)) {
|
||||||
if (dl - dp < 2) {
|
if (dl - dp < 2) {
|
||||||
error = CoderResult.OVERFLOW;
|
error = CoderResult.OVERFLOW;
|
||||||
return -1;
|
return -1;
|
||||||
|
@ -86,22 +86,21 @@ class UTF_32Coder {
|
|||||||
src.position(mark);
|
src.position(mark);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
while (src.remaining() > 3) {
|
while (src.remaining() >= 4) {
|
||||||
cp = getCP(src);
|
cp = getCP(src);
|
||||||
if (cp < 0 || cp > Surrogate.UCS4_MAX) {
|
if (Character.isBmpCodePoint(cp)) {
|
||||||
return CoderResult.malformedForLength(4);
|
|
||||||
}
|
|
||||||
if (cp < Surrogate.UCS4_MIN) {
|
|
||||||
if (!dst.hasRemaining())
|
if (!dst.hasRemaining())
|
||||||
return CoderResult.OVERFLOW;
|
return CoderResult.OVERFLOW;
|
||||||
mark += 4;
|
mark += 4;
|
||||||
dst.put((char)cp);
|
dst.put((char) cp);
|
||||||
} else {
|
} else if (Character.isValidCodePoint(cp)) {
|
||||||
if (dst.remaining() < 2)
|
if (dst.remaining() < 2)
|
||||||
return CoderResult.OVERFLOW;
|
return CoderResult.OVERFLOW;
|
||||||
mark += 4;
|
mark += 4;
|
||||||
dst.put(Surrogate.high(cp));
|
dst.put(Surrogate.high(cp));
|
||||||
dst.put(Surrogate.low(cp));
|
dst.put(Surrogate.low(cp));
|
||||||
|
} else {
|
||||||
|
return CoderResult.malformedForLength(4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return CoderResult.UNDERFLOW;
|
return CoderResult.UNDERFLOW;
|
||||||
@ -154,7 +153,12 @@ class UTF_32Coder {
|
|||||||
try {
|
try {
|
||||||
while (src.hasRemaining()) {
|
while (src.hasRemaining()) {
|
||||||
char c = src.get();
|
char c = src.get();
|
||||||
if (Character.isHighSurrogate(c)) {
|
if (!Character.isSurrogate(c)) {
|
||||||
|
if (dst.remaining() < 4)
|
||||||
|
return CoderResult.OVERFLOW;
|
||||||
|
mark++;
|
||||||
|
put(c, dst);
|
||||||
|
} else if (Character.isHighSurrogate(c)) {
|
||||||
if (!src.hasRemaining())
|
if (!src.hasRemaining())
|
||||||
return CoderResult.UNDERFLOW;
|
return CoderResult.UNDERFLOW;
|
||||||
char low = src.get();
|
char low = src.get();
|
||||||
@ -162,17 +166,13 @@ class UTF_32Coder {
|
|||||||
if (dst.remaining() < 4)
|
if (dst.remaining() < 4)
|
||||||
return CoderResult.OVERFLOW;
|
return CoderResult.OVERFLOW;
|
||||||
mark += 2;
|
mark += 2;
|
||||||
put(Surrogate.toUCS4(c, low), dst);
|
put(Character.toCodePoint(c, low), dst);
|
||||||
} else {
|
} else {
|
||||||
return CoderResult.malformedForLength(1);
|
return CoderResult.malformedForLength(1);
|
||||||
}
|
}
|
||||||
} else if (Character.isLowSurrogate(c)) {
|
|
||||||
return CoderResult.malformedForLength(1);
|
|
||||||
} else {
|
} else {
|
||||||
if (dst.remaining() < 4)
|
// assert Character.isLowSurrogate(c);
|
||||||
return CoderResult.OVERFLOW;
|
return CoderResult.malformedForLength(1);
|
||||||
mark++;
|
|
||||||
put(c, dst);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return CoderResult.UNDERFLOW;
|
return CoderResult.UNDERFLOW;
|
||||||
|
@ -102,7 +102,7 @@ class UTF_8 extends Unicode
|
|||||||
// [F1..F3] [80..BF] [80..BF] [80..BF]
|
// [F1..F3] [80..BF] [80..BF] [80..BF]
|
||||||
// [F4] [80..8F] [80..BF] [80..BF]
|
// [F4] [80..8F] [80..BF] [80..BF]
|
||||||
// only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
|
// only check 80-be range here, the [0xf0,0x80...] and [0xf4,0x90-...]
|
||||||
// will be checked by Surrogate.neededFor(uc)
|
// will be checked by Character.isSupplementaryCodePoint(uc)
|
||||||
private static boolean isMalformed4(int b2, int b3, int b4) {
|
private static boolean isMalformed4(int b2, int b3, int b4) {
|
||||||
return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
|
return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
|
||||||
(b4 & 0xc0) != 0x80;
|
(b4 & 0xc0) != 0x80;
|
||||||
@ -248,7 +248,8 @@ class UTF_8 extends Unicode
|
|||||||
((b3 & 0x3f) << 06) |
|
((b3 & 0x3f) << 06) |
|
||||||
(b4 & 0x3f);
|
(b4 & 0x3f);
|
||||||
if (isMalformed4(b2, b3, b4) ||
|
if (isMalformed4(b2, b3, b4) ||
|
||||||
!Surrogate.neededFor(uc)) {
|
// shortest form check
|
||||||
|
!Character.isSupplementaryCodePoint(uc)) {
|
||||||
return malformed(src, sp, dst, dp, 4);
|
return malformed(src, sp, dst, dp, 4);
|
||||||
}
|
}
|
||||||
da[dp++] = Surrogate.high(uc);
|
da[dp++] = Surrogate.high(uc);
|
||||||
@ -304,7 +305,8 @@ class UTF_8 extends Unicode
|
|||||||
((b3 & 0x3f) << 06) |
|
((b3 & 0x3f) << 06) |
|
||||||
(b4 & 0x3f);
|
(b4 & 0x3f);
|
||||||
if (isMalformed4(b2, b3, b4) ||
|
if (isMalformed4(b2, b3, b4) ||
|
||||||
!Surrogate.neededFor(uc)) { // shortest form check
|
// shortest form check
|
||||||
|
!Character.isSupplementaryCodePoint(uc)) {
|
||||||
return malformed(src, mark, 4);
|
return malformed(src, mark, 4);
|
||||||
}
|
}
|
||||||
dst.put(Surrogate.high(uc));
|
dst.put(Surrogate.high(uc));
|
||||||
|
@ -441,7 +441,7 @@ public class EUC_TW extends Charset implements HistoricallyNamedCharset
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int encode(char hi, char low, byte[] bb) {
|
static int encode(char hi, char low, byte[] bb) {
|
||||||
int c = Surrogate.toUCS4(hi, low);
|
int c = Character.toCodePoint(hi, low);
|
||||||
if ((c & 0xf0000) != 0x20000)
|
if ((c & 0xf0000) != 0x20000)
|
||||||
return -1;
|
return -1;
|
||||||
c -= 0x20000;
|
c -= 0x20000;
|
||||||
|
@ -12628,7 +12628,7 @@ public class GB18030
|
|||||||
if (Character.isSurrogate(c)) {
|
if (Character.isSurrogate(c)) {
|
||||||
if ((condensedKey=sgp.parse(c, sa, sp, sl)) < 0)
|
if ((condensedKey=sgp.parse(c, sa, sp, sl)) < 0)
|
||||||
return sgp.error();
|
return sgp.error();
|
||||||
// Surogate.toUCS4 looks like
|
// Character.toCodePoint looks like
|
||||||
// (((high & 0x3ff) << 10) | (low & 0x3ff)) + 0x10000;
|
// (((high & 0x3ff) << 10) | (low & 0x3ff)) + 0x10000;
|
||||||
// so we add (0x2e248 - 0x10000) to get the "key".
|
// so we add (0x2e248 - 0x10000) to get the "key".
|
||||||
condensedKey += 0x1E248;
|
condensedKey += 0x1E248;
|
||||||
|
@ -36,7 +36,6 @@ import java.nio.charset.CharsetDecoder;
|
|||||||
import java.nio.charset.CharsetEncoder;
|
import java.nio.charset.CharsetEncoder;
|
||||||
import java.nio.charset.CoderResult;
|
import java.nio.charset.CoderResult;
|
||||||
import sun.nio.cs.HistoricallyNamedCharset;
|
import sun.nio.cs.HistoricallyNamedCharset;
|
||||||
import sun.nio.cs.Surrogate;
|
|
||||||
|
|
||||||
public class IBM33722
|
public class IBM33722
|
||||||
extends Charset
|
extends Charset
|
||||||
|
@ -36,7 +36,6 @@ import java.nio.charset.CharsetDecoder;
|
|||||||
import java.nio.charset.CharsetEncoder;
|
import java.nio.charset.CharsetEncoder;
|
||||||
import java.nio.charset.CoderResult;
|
import java.nio.charset.CoderResult;
|
||||||
import sun.nio.cs.HistoricallyNamedCharset;
|
import sun.nio.cs.HistoricallyNamedCharset;
|
||||||
import sun.nio.cs.Surrogate;
|
|
||||||
|
|
||||||
public class IBM964
|
public class IBM964
|
||||||
extends Charset
|
extends Charset
|
||||||
|
@ -46,7 +46,7 @@ public class BashStreams {
|
|||||||
|
|
||||||
CharacterGenerator(long seed, String csn, int limit) {
|
CharacterGenerator(long seed, String csn, int limit) {
|
||||||
rand = new Random(seed);
|
rand = new Random(seed);
|
||||||
this.max = Surrogate.UCS4_MAX + 1;
|
this.max = Character.MAX_CODE_POINT + 1;
|
||||||
this.limit = limit;
|
this.limit = limit;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -77,17 +77,20 @@ public class BashStreams {
|
|||||||
int c;
|
int c;
|
||||||
for (;;) {
|
for (;;) {
|
||||||
c = rand.nextInt(max);
|
c = rand.nextInt(max);
|
||||||
if (Surrogate.is(c) || (c == 0xfffe) || (c == 0xffff))
|
if ((Character.isBmpCodePoint(c)
|
||||||
|
&& (Character.isSurrogate((char) c)
|
||||||
|
|| (c == 0xfffe) || (c == 0xffff))))
|
||||||
continue;
|
continue;
|
||||||
if (Surrogate.neededFor(c) && (count == limit - 1))
|
if (Character.isSupplementaryCodePoint(c)
|
||||||
|
&& (count == limit - 1))
|
||||||
continue;
|
continue;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
count++;
|
count++;
|
||||||
if (Surrogate.neededFor(c)) {
|
if (Character.isSupplementaryCodePoint(c)) {
|
||||||
count++;
|
count++;
|
||||||
push(Surrogate.low(c));
|
push(sun.nio.cs.Surrogate.low(c));
|
||||||
return Surrogate.high(c);
|
return sun.nio.cs.Surrogate.high(c);
|
||||||
}
|
}
|
||||||
return (char)c;
|
return (char)c;
|
||||||
}
|
}
|
||||||
@ -137,7 +140,7 @@ public class BashStreams {
|
|||||||
char d = cg.next();
|
char d = cg.next();
|
||||||
if (c != d) {
|
if (c != d) {
|
||||||
if (c == '?') {
|
if (c == '?') {
|
||||||
if (Surrogate.isHigh(d))
|
if (Character.isHighSurrogate(d))
|
||||||
cg.next();
|
cg.next();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -187,7 +190,7 @@ public class BashStreams {
|
|||||||
w.write(ca, 0, n);
|
w.write(ca, 0, n);
|
||||||
count += n;
|
count += n;
|
||||||
}
|
}
|
||||||
if (Surrogate.isHigh(ca[n - 1]))
|
if (Character.isHighSurrogate(ca[n - 1]))
|
||||||
w.write(cg.next());
|
w.write(cg.next());
|
||||||
w.close();
|
w.close();
|
||||||
}
|
}
|
||||||
@ -253,7 +256,8 @@ public class BashStreams {
|
|||||||
if (!cg.hasNext())
|
if (!cg.hasNext())
|
||||||
break;
|
break;
|
||||||
char c = cg.next();
|
char c = cg.next();
|
||||||
if (Surrogate.isHigh(c) && (cb.remaining() == 1)) {
|
if (Character.isHighSurrogate(c)
|
||||||
|
&& cb.remaining() == 1) {
|
||||||
cg.push(c);
|
cg.push(c);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -311,7 +315,7 @@ public class BashStreams {
|
|||||||
mismatchedEOF(csn, count + i, cg.count());
|
mismatchedEOF(csn, count + i, cg.count());
|
||||||
char d = cg.next();
|
char d = cg.next();
|
||||||
if (c == '?') {
|
if (c == '?') {
|
||||||
if (Surrogate.isHigh(d)) {
|
if (Character.isHighSurrogate(d)) {
|
||||||
cg.next();
|
cg.next();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
@ -1,66 +0,0 @@
|
|||||||
/*
|
|
||||||
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
|
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
||||||
*
|
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
|
||||||
* under the terms of the GNU General Public License version 2 only, as
|
|
||||||
* published by the Free Software Foundation.
|
|
||||||
*
|
|
||||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
|
||||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
||||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
||||||
* version 2 for more details (a copy is included in the LICENSE file that
|
|
||||||
* accompanied this code).
|
|
||||||
*
|
|
||||||
* You should have received a copy of the GNU General Public License version
|
|
||||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
|
||||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
||||||
*
|
|
||||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
|
||||||
* or visit www.oracle.com if you need additional information or have any
|
|
||||||
* questions.
|
|
||||||
*/
|
|
||||||
|
|
||||||
public class Surrogate {
|
|
||||||
|
|
||||||
public static final int UCS4_SURROGATE_MIN = 0x10000;
|
|
||||||
public static final int UCS4_MAX = (1 << 20) + UCS4_SURROGATE_MIN - 1;
|
|
||||||
|
|
||||||
// UTF-16 surrogate-character ranges
|
|
||||||
//
|
|
||||||
public static final char MIN_HIGH = '\uD800';
|
|
||||||
public static final char MAX_HIGH = '\uDBFF';
|
|
||||||
public static final char MIN_LOW = '\uDC00';
|
|
||||||
public static final char MAX_LOW = '\uDFFF';
|
|
||||||
public static final char MIN = MIN_HIGH;
|
|
||||||
public static final char MAX = MAX_LOW;
|
|
||||||
|
|
||||||
public static boolean neededFor(int uc) {
|
|
||||||
return (uc >= UCS4_SURROGATE_MIN) && (uc <= UCS4_MAX);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isHigh(int c) {
|
|
||||||
return (MIN_HIGH <= c) && (c <= MAX_HIGH);
|
|
||||||
}
|
|
||||||
|
|
||||||
static char high(int uc) {
|
|
||||||
return (char)(0xd800 | (((uc - UCS4_SURROGATE_MIN) >> 10) & 0x3ff));
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isLow(int c) {
|
|
||||||
return (MIN_LOW <= c) && (c <= MAX_LOW);
|
|
||||||
}
|
|
||||||
|
|
||||||
static char low(int uc) {
|
|
||||||
return (char)(0xdc00 | ((uc - UCS4_SURROGATE_MIN) & 0x3ff));
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean is(int c) {
|
|
||||||
return (MIN <= c) && (c <= MAX);
|
|
||||||
}
|
|
||||||
|
|
||||||
static int toUCS4(char c, char d) {
|
|
||||||
return (((c & 0x3ff) << 10) | (d & 0x3ff)) + 0x10000;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -42,9 +42,8 @@ public class Surrogates {
|
|||||||
static void initData() throws IOException {
|
static void initData() throws IOException {
|
||||||
StringBuffer sb = new StringBuffer();
|
StringBuffer sb = new StringBuffer();
|
||||||
for (int i = 0; i < LEN; i++) {
|
for (int i = 0; i < LEN; i++) {
|
||||||
int c = Surrogate.UCS4_SURROGATE_MIN + 1;
|
int c = Character.MIN_SUPPLEMENTARY_CODE_POINT + 1;
|
||||||
sb.append(Surrogate.high(c));
|
sb.append(Character.toChars(c));
|
||||||
sb.append(Surrogate.low(c));
|
|
||||||
}
|
}
|
||||||
input = sb.toString().toCharArray();
|
input = sb.toString().toCharArray();
|
||||||
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
ByteArrayOutputStream bos = new ByteArrayOutputStream();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user