8301971: Make JDK source code UTF-8
8338973: Document need to have UTF-8 locale available to build the JDK Reviewed-by: erikj, naoto, mbaesken
This commit is contained in:
parent
74e981e855
commit
3aa2ea7e67
@ -1,5 +1,8 @@
|
||||
root = true
|
||||
|
||||
[*]
|
||||
charset = utf-8
|
||||
|
||||
[*.{cpp,hpp,c,h,java,cc,hh,m,mm,S,md,properties,gmk,m4,ac}]
|
||||
trim_trailing_whitespace = true
|
||||
|
||||
|
1
.gitattributes
vendored
1
.gitattributes
vendored
@ -1,4 +1,5 @@
|
||||
* -text
|
||||
* encoding=utf-8
|
||||
*.java diff=java
|
||||
*.c diff=cpp
|
||||
*.h diff=cpp
|
||||
|
@ -305,6 +305,14 @@ using
|
||||
<li><p>If using <a href="#cygwin">Cygwin</a>, you must make sure the
|
||||
file permissions and attributes between Windows and Cygwin are
|
||||
consistent. It is recommended that you follow this procedure:</p>
|
||||
<li><p>UTF-8 support is needed to compile the JDK. On Unix systems, this
|
||||
typically means that the <code>C.UTF-8</code> or
|
||||
<code>en_US.UTF-8</code> locale needs to be available. For Windows
|
||||
users, please see the section on <a href="#locale-requirements">Locale
|
||||
Requirements</a> below.</p></li>
|
||||
<li><p>On Windows, if using <a href="#cygwin">Cygwin</a>, extra care
|
||||
must be taken to make sure the environment is consistent. It is
|
||||
recommended that you follow this procedure:</p>
|
||||
<ul>
|
||||
<li><p>Create the directory that is going to contain the top directory
|
||||
of the JDK clone by using the <code>mkdir</code> command in the Cygwin
|
||||
|
@ -83,6 +83,11 @@ on where and how to check out the source code.
|
||||
for the source code, see below for suggestions on how to keep the build
|
||||
artifacts on a local disk.
|
||||
|
||||
* UTF-8 support is needed to compile the JDK. On Unix systems, this typically
|
||||
means that the `C.UTF-8` or `en_US.UTF-8` locale needs to be available. For
|
||||
Windows users, please see the section on [Locale
|
||||
Requirements](#locale-requirements) below.
|
||||
|
||||
* On Windows, extra care must be taken to have a smooth building experience:
|
||||
|
||||
* Make sure that all relevant paths have short names. Short names are used by
|
||||
|
@ -96,14 +96,14 @@ JAVADOC_DISABLED_DOCLINT_PACKAGES := org.w3c.* javax.smartcardio
|
||||
|
||||
# The initial set of options for javadoc
|
||||
JAVADOC_OPTIONS := -use -keywords -notimestamp \
|
||||
-serialwarn -encoding ISO-8859-1 -docencoding UTF-8 -breakiterator \
|
||||
-serialwarn -encoding utf-8 -docencoding utf-8 -breakiterator \
|
||||
-splitIndex --system none -javafx --expand-requires transitive \
|
||||
--override-methods=summary --syntax-highlight
|
||||
|
||||
# The reference options must stay stable to allow for comparisons across the
|
||||
# development cycle.
|
||||
REFERENCE_OPTIONS := -XDignore.symbol.file=true -use -keywords -notimestamp \
|
||||
-serialwarn -encoding ISO-8859-1 -breakiterator -splitIndex --system none \
|
||||
-serialwarn -encoding utf-8 -breakiterator -splitIndex --system none \
|
||||
-html5 -javafx --expand-requires transitive
|
||||
|
||||
# Should we add DRAFT stamps to the generated javadoc?
|
||||
|
@ -134,17 +134,33 @@ AC_DEFUN_ONCE([BASIC_SETUP_BUILD_ENV],
|
||||
)
|
||||
AC_SUBST(BUILD_ENV)
|
||||
|
||||
AC_MSG_CHECKING([for locale to use])
|
||||
if test "x$LOCALE" != x; then
|
||||
# Check if we actually have C.UTF-8; if so, use it
|
||||
if $LOCALE -a | $GREP -q -E "^C\.(utf8|UTF-8)$"; then
|
||||
LOCALE_USED=C.UTF-8
|
||||
AC_MSG_RESULT([C.UTF-8 (recommended)])
|
||||
elif $LOCALE -a | $GREP -q -E "^en_US\.(utf8|UTF-8)$"; then
|
||||
LOCALE_USED=en_US.UTF-8
|
||||
AC_MSG_RESULT([en_US.UTF-8 (acceptable fallback)])
|
||||
else
|
||||
AC_MSG_WARN([C.UTF-8 locale not found, using C locale])
|
||||
# As a fallback, check if users locale is UTF-8. USER_LOCALE was saved
|
||||
# by the wrapper configure script before autconf messed up LC_ALL.
|
||||
if $ECHO $USER_LOCALE | $GREP -q -E "\.(utf8|UTF-8)$"; then
|
||||
LOCALE_USED=$USER_LOCALE
|
||||
AC_MSG_RESULT([$USER_LOCALE (untested fallback)])
|
||||
AC_MSG_WARN([Could not find C.UTF-8 or en_US.UTF-8 locale. This is not supported, and the build might fail unexpectedly.])
|
||||
else
|
||||
AC_MSG_RESULT([no UTF-8 locale found])
|
||||
AC_MSG_WARN([No UTF-8 locale found. This is not supported. Proceeding with the C locale, but the build might fail unexpectedly.])
|
||||
LOCALE_USED=C
|
||||
fi
|
||||
AC_MSG_NOTICE([The recommended locale is C.UTF-8, but en_US.UTF-8 is also accepted.])
|
||||
fi
|
||||
else
|
||||
AC_MSG_WARN([locale command not not found, using C locale])
|
||||
LOCALE_USED=C
|
||||
LOCALE_USED=C.UTF-8
|
||||
AC_MSG_RESULT([C.UTF-8 (default)])
|
||||
AC_MSG_WARN([locale command not not found, using C.UTF-8 locale])
|
||||
fi
|
||||
|
||||
export LC_ALL=$LOCALE_USED
|
||||
|
4
make/autoconf/configure
vendored
4
make/autoconf/configure
vendored
@ -49,7 +49,9 @@ fi
|
||||
export CONFIG_SHELL=$BASH
|
||||
export _as_can_reexec=no
|
||||
|
||||
# Make sure all shell commands are executed with the C locale
|
||||
# Save user's current locale, but make sure all future shell commands are
|
||||
# executed with the C locale
|
||||
export USER_LOCALE=$LC_ALL
|
||||
export LC_ALL=C
|
||||
|
||||
if test "x$CUSTOM_CONFIG_DIR" != x; then
|
||||
|
@ -573,12 +573,20 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_HELPER],
|
||||
TOOLCHAIN_CFLAGS_JDK="$TOOLCHAIN_CFLAGS_JDK -fvisibility=hidden -fstack-protector"
|
||||
|
||||
elif test "x$TOOLCHAIN_TYPE" = xmicrosoft; then
|
||||
# The -utf-8 option sets source and execution character sets to UTF-8 to enable correct
|
||||
# compilation of all source files regardless of the active code page on Windows.
|
||||
TOOLCHAIN_CFLAGS_JVM="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -utf-8 -MP"
|
||||
TOOLCHAIN_CFLAGS_JDK="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -utf-8 -Zc:wchar_t-"
|
||||
TOOLCHAIN_CFLAGS_JVM="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -MP"
|
||||
TOOLCHAIN_CFLAGS_JDK="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -Zc:wchar_t-"
|
||||
fi
|
||||
|
||||
# Set character encoding in source
|
||||
if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
|
||||
CHARSET_CFLAGS="-finput-charset=utf-8"
|
||||
elif test "x$TOOLCHAIN_TYPE" = xmicrosoft; then
|
||||
# The -utf-8 option sets both source and execution character sets
|
||||
CHARSET_CFLAGS="-utf-8 -validate-charset"
|
||||
fi
|
||||
TOOLCHAIN_CFLAGS_JVM="$TOOLCHAIN_CFLAGS_JVM $CHARSET_CFLAGS"
|
||||
TOOLCHAIN_CFLAGS_JDK="$TOOLCHAIN_CFLAGS_JDK $CHARSET_CFLAGS"
|
||||
|
||||
# CFLAGS C language level for JDK sources (hotspot only uses C++)
|
||||
if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
|
||||
LANGSTD_CFLAGS="-std=c11"
|
||||
|
@ -80,15 +80,13 @@ endef
|
||||
#
|
||||
# The sed expression does this:
|
||||
# 1. Add a backslash before any :, = or ! that do not have a backslash already.
|
||||
# 2. Apply the file unicode2x.sed which does a whole bunch of \u00XX to \xXX
|
||||
# conversions.
|
||||
# 3. Delete all lines starting with #.
|
||||
# 4. Delete empty lines.
|
||||
# 5. Append lines ending with \ with the next line.
|
||||
# 6. Remove leading and trailing white space. Note that tabs must be explicit
|
||||
# 2. Delete all lines starting with #.
|
||||
# 3. Delete empty lines.
|
||||
# 4. Append lines ending with \ with the next line.
|
||||
# 5. Remove leading and trailing white space. Note that tabs must be explicit
|
||||
# as sed on macosx does not understand '\t'.
|
||||
# 7. Replace the first \= with just =.
|
||||
# 8. Finally it's all sorted to create a stable output.
|
||||
# 6. Replace the first \= with just =.
|
||||
# 7. Finally it's all sorted to create a stable output.
|
||||
#
|
||||
# It is assumed that = is the character used for separating names and values.
|
||||
define add_file_to_clean
|
||||
@ -108,7 +106,6 @@ define add_file_to_clean
|
||||
( $(CAT) $$< && $(ECHO) "" ) \
|
||||
| $(SED) -e 's/\([^\\]\):/\1\\:/g' -e 's/\([^\\]\)=/\1\\=/g' \
|
||||
-e 's/\([^\\]\)!/\1\\!/g' -e 's/^[ ]*#.*/#/g' \
|
||||
| $(SED) -f "$$(TOPDIR)/make/common/support/unicode2x.sed" \
|
||||
| $(SED) -e '/^#/d' -e '/^$$$$/d' \
|
||||
-e :a -e '/\\$$$$/N; s/\\\n//; ta' \
|
||||
-e 's/^[ ]*//;s/[ ]*$$$$//' \
|
||||
@ -265,10 +262,12 @@ define SetupJavaCompilationBody
|
||||
endif
|
||||
|
||||
# Tell javac to do exactly as told and no more
|
||||
PARANOIA_FLAGS := -implicit:none -Xprefer:source -XDignore.symbol.file=true -encoding ascii
|
||||
PARANOIA_FLAGS := -implicit:none -Xprefer:source -XDignore.symbol.file=true
|
||||
|
||||
$1_FLAGS += -g -Xlint:all $$($1_TARGET_RELEASE) $$(PARANOIA_FLAGS)
|
||||
$1_FLAGS += $$($1_JAVAC_FLAGS)
|
||||
# Set character encoding in source
|
||||
$1_FLAGS += -encoding utf-8
|
||||
|
||||
ifeq ($$(JAVA_WARNINGS_AS_ERRORS), true)
|
||||
$1_FLAGS += -Werror
|
||||
|
@ -227,6 +227,8 @@ endef
|
||||
|
||||
GLOBAL_VERSION_INFO_RESOURCE := $(TOPDIR)/src/java.base/windows/native/common/version.rc
|
||||
|
||||
# \xA9 is the copyright symbol in ANSI encoding (Windows-1252), which rc.exe
|
||||
# assumes the resource file is in.
|
||||
JDK_RCFLAGS=$(RCFLAGS) \
|
||||
-D"JDK_VERSION_STRING=$(VERSION_STRING)" \
|
||||
-D"JDK_COMPANY=$(JDK_RC_COMPANY_NAME)" \
|
||||
|
@ -1,100 +0,0 @@
|
||||
s/\\u0020/\x20/g
|
||||
s/\\u003A/\x3A/g
|
||||
s/\\u006B/\x6B/g
|
||||
s/\\u0075/\x75/g
|
||||
s/\\u00A0/\xA0/g
|
||||
s/\\u00A3/\xA3/g
|
||||
s/\\u00B0/\xB0/g
|
||||
s/\\u00B7/\xB7/g
|
||||
s/\\u00BA/\xBA/g
|
||||
s/\\u00BF/\xBF/g
|
||||
s/\\u00C0/\xC0/g
|
||||
s/\\u00C1/\xC1/g
|
||||
s/\\u00C2/\xC2/g
|
||||
s/\\u00C4/\xC4/g
|
||||
s/\\u00C5/\xC5/g
|
||||
s/\\u00C8/\xC8/g
|
||||
s/\\u00C9/\xC9/g
|
||||
s/\\u00CA/\xCA/g
|
||||
s/\\u00CD/\xCD/g
|
||||
s/\\u00CE/\xCE/g
|
||||
s/\\u00D3/\xD3/g
|
||||
s/\\u00D4/\xD4/g
|
||||
s/\\u00D6/\xD6/g
|
||||
s/\\u00DA/\xDA/g
|
||||
s/\\u00DC/\xDC/g
|
||||
s/\\u00DD/\xDD/g
|
||||
s/\\u00DF/\xDF/g
|
||||
s/\\u00E0/\xE0/g
|
||||
s/\\u00E1/\xE1/g
|
||||
s/\\u00E2/\xE2/g
|
||||
s/\\u00E3/\xE3/g
|
||||
s/\\u00E4/\xE4/g
|
||||
s/\\u00E5/\xE5/g
|
||||
s/\\u00E6/\xE6/g
|
||||
s/\\u00E7/\xE7/g
|
||||
s/\\u00E8/\xE8/g
|
||||
s/\\u00E9/\xE9/g
|
||||
s/\\u00EA/\xEA/g
|
||||
s/\\u00EB/\xEB/g
|
||||
s/\\u00EC/\xEC/g
|
||||
s/\\u00ED/\xED/g
|
||||
s/\\u00EE/\xEE/g
|
||||
s/\\u00EF/\xEF/g
|
||||
s/\\u00F1/\xF1/g
|
||||
s/\\u00F2/\xF2/g
|
||||
s/\\u00F3/\xF3/g
|
||||
s/\\u00F4/\xF4/g
|
||||
s/\\u00F5/\xF5/g
|
||||
s/\\u00F6/\xF6/g
|
||||
s/\\u00F9/\xF9/g
|
||||
s/\\u00FA/\xFA/g
|
||||
s/\\u00FC/\xFC/g
|
||||
s/\\u0020/\x20/g
|
||||
s/\\u003f/\x3f/g
|
||||
s/\\u006f/\x6f/g
|
||||
s/\\u0075/\x75/g
|
||||
s/\\u00a0/\xa0/g
|
||||
s/\\u00a3/\xa3/g
|
||||
s/\\u00b0/\xb0/g
|
||||
s/\\u00ba/\xba/g
|
||||
s/\\u00bf/\xbf/g
|
||||
s/\\u00c1/\xc1/g
|
||||
s/\\u00c4/\xc4/g
|
||||
s/\\u00c5/\xc5/g
|
||||
s/\\u00c8/\xc8/g
|
||||
s/\\u00c9/\xc9/g
|
||||
s/\\u00ca/\xca/g
|
||||
s/\\u00cd/\xcd/g
|
||||
s/\\u00d6/\xd6/g
|
||||
s/\\u00dc/\xdc/g
|
||||
s/\\u00dd/\xdd/g
|
||||
s/\\u00df/\xdf/g
|
||||
s/\\u00e0/\xe0/g
|
||||
s/\\u00e1/\xe1/g
|
||||
s/\\u00e2/\xe2/g
|
||||
s/\\u00e3/\xe3/g
|
||||
s/\\u00e4/\xe4/g
|
||||
s/\\u00e5/\xe5/g
|
||||
s/\\u00e7/\xe7/g
|
||||
s/\\u00e8/\xe8/g
|
||||
s/\\u00e9/\xe9/g
|
||||
s/\\u00ea/\xea/g
|
||||
s/\\u00eb/\xeb/g
|
||||
s/\\u00ec/\xec/g
|
||||
s/\\u00ed/\xed/g
|
||||
s/\\u00ee/\xee/g
|
||||
s/\\u00ef/\xef/g
|
||||
s/\\u00f0/\xf0/g
|
||||
s/\\u00f1/\xf1/g
|
||||
s/\\u00f2/\xf2/g
|
||||
s/\\u00f3/\xf3/g
|
||||
s/\\u00f4/\xf4/g
|
||||
s/\\u00f5/\xf5/g
|
||||
s/\\u00f6/\xf6/g
|
||||
s/\\u00f7/\xf7/g
|
||||
s/\\u00f8/\xf8/g
|
||||
s/\\u00f9/\xf9/g
|
||||
s/\\u00fa/\xfa/g
|
||||
s/\\u00fc/\xfc/g
|
||||
s/\\u00ff/\xff/g
|
@ -87,7 +87,7 @@
|
||||
"zh", "zh_CN",
|
||||
#ifdef __linux__
|
||||
"bokmal", "nb_NO",
|
||||
"bokm\xE5l", "nb_NO",
|
||||
"bokmål", "nb_NO",
|
||||
"catalan", "ca_ES",
|
||||
"croatian", "hr_HR",
|
||||
"czech", "cs_CZ",
|
||||
@ -98,7 +98,7 @@
|
||||
"eesti", "et_EE",
|
||||
"estonian", "et_EE",
|
||||
"finnish", "fi_FI",
|
||||
"fran\xE7\x61is", "fr_FR",
|
||||
"français", "fr_FR",
|
||||
"french", "fr_FR",
|
||||
"galego", "gl_ES",
|
||||
"galician", "gl_ES",
|
||||
@ -162,7 +162,7 @@ static char *language_names[] = {
|
||||
"deutsch", "de",
|
||||
"dutch", "nl",
|
||||
"finnish", "fi",
|
||||
"fran\xE7\x61is", "fr",
|
||||
"français", "fr",
|
||||
"french", "fr",
|
||||
"german", "de",
|
||||
"greek", "el",
|
||||
|
@ -134,7 +134,7 @@ WCHAR * fixes[2][2][3][16] =
|
||||
L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
|
||||
},
|
||||
{ // currency
|
||||
L"\xA4", L"", L"\xA4 ", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
|
||||
L"¤", L"", L"¤ ", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
|
||||
},
|
||||
{ // percent
|
||||
L"", L"", L"%", L"% ", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
|
||||
@ -145,7 +145,7 @@ WCHAR * fixes[2][2][3][16] =
|
||||
L"(", L"-", L"- ", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
|
||||
},
|
||||
{ //currency
|
||||
L"(\xA4", L"-\xA4", L"\xA4-", L"\xA4", L"(", L"-", L"", L"", L"-", L"-\xA4 ", L"", L"\xA4 ", L"\xA4 -", L"", L"(\xA4 ", L"("
|
||||
L"(¤", L"-¤", L"¤-", L"¤", L"(", L"-", L"", L"", L"-", L"-¤ ", L"", L"¤ ", L"¤ -", L"", L"(¤ ", L"("
|
||||
},
|
||||
{ // percent
|
||||
L"-", L"-", L"-%", L"%-", L"%", L"", L"", L"-% ", L"", L"% ", L"% -", L"", L"", L"", L"", L"",
|
||||
@ -158,7 +158,7 @@ WCHAR * fixes[2][2][3][16] =
|
||||
L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L""
|
||||
},
|
||||
{ // currency
|
||||
L"", L"\xA4 ", L"", L" \xA4", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
|
||||
L"", L"¤ ", L"", L" ¤", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
|
||||
},
|
||||
{ // percent
|
||||
L" %", L"%", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
|
||||
@ -169,7 +169,7 @@ WCHAR * fixes[2][2][3][16] =
|
||||
L")", L"", L" ", L"-", L" -", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
|
||||
},
|
||||
{ //currency
|
||||
L")", L"", L"", L"-", L"\xA4)", L"\xA4", L"-\xA4", L"\xA4-", L" \xA4", L"", L" \xA4-", L"-", L"", L"- \xA4", L")", L" \xA4)"
|
||||
L")", L"", L"", L"-", L"¤)", L"¤", L"-¤", L"¤-", L" ¤", L"", L" ¤-", L"-", L"", L"- ¤", L")", L" ¤)"
|
||||
},
|
||||
{ // percent
|
||||
L" %", L"%", L"", L"", L"-", L"-%", L"%-", L"", L" %-", L"-", L"", L"- %", L"", L"", L"", L"",
|
||||
|
Loading…
x
Reference in New Issue
Block a user