diff --git a/.editorconfig b/.editorconfig index 98f07b3c5fb..8cf0bd8d62a 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,5 +1,8 @@ root = true +[*] +charset = utf-8 + [*.{cpp,hpp,c,h,java,cc,hh,m,mm,S,md,properties,gmk,m4,ac}] trim_trailing_whitespace = true diff --git a/.gitattributes b/.gitattributes index ebb586628c3..5a18aa21d98 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,4 +1,5 @@ * -text +* encoding=utf-8 *.java diff=java *.c diff=cpp *.h diff=cpp diff --git a/doc/building.html b/doc/building.html index b5d39a68ea8..fde6cde4c32 100644 --- a/doc/building.html +++ b/doc/building.html @@ -305,6 +305,14 @@ using
If using Cygwin, you must make sure the file permissions and attributes between Windows and Cygwin are consistent. It is recommended that you follow this procedure:
+UTF-8 support is needed to compile the JDK. On Unix systems, this
+typically means that the C.UTF-8
or
+en_US.UTF-8
locale needs to be available. For Windows
+users, please see the section on Locale
+Requirements below.
On Windows, if using Cygwin, extra care +must be taken to make sure the environment is consistent. It is +recommended that you follow this procedure:
Create the directory that is going to contain the top directory
of the JDK clone by using the mkdir
command in the Cygwin
diff --git a/doc/building.md b/doc/building.md
index 61490820061..1a9fe6b2e78 100644
--- a/doc/building.md
+++ b/doc/building.md
@@ -83,6 +83,11 @@ on where and how to check out the source code.
for the source code, see below for suggestions on how to keep the build
artifacts on a local disk.
+* UTF-8 support is needed to compile the JDK. On Unix systems, this typically
+ means that the `C.UTF-8` or `en_US.UTF-8` locale needs to be available. For
+ Windows users, please see the section on [Locale
+ Requirements](#locale-requirements) below.
+
* On Windows, extra care must be taken to have a smooth building experience:
* Make sure that all relevant paths have short names. Short names are used by
diff --git a/make/Docs.gmk b/make/Docs.gmk
index 2d8ce7c2a69..4733b22b1ad 100644
--- a/make/Docs.gmk
+++ b/make/Docs.gmk
@@ -96,14 +96,14 @@ JAVADOC_DISABLED_DOCLINT_PACKAGES := org.w3c.* javax.smartcardio
# The initial set of options for javadoc
JAVADOC_OPTIONS := -use -keywords -notimestamp \
- -serialwarn -encoding ISO-8859-1 -docencoding UTF-8 -breakiterator \
+ -serialwarn -encoding utf-8 -docencoding utf-8 -breakiterator \
-splitIndex --system none -javafx --expand-requires transitive \
--override-methods=summary --syntax-highlight
# The reference options must stay stable to allow for comparisons across the
# development cycle.
REFERENCE_OPTIONS := -XDignore.symbol.file=true -use -keywords -notimestamp \
- -serialwarn -encoding ISO-8859-1 -breakiterator -splitIndex --system none \
+ -serialwarn -encoding utf-8 -breakiterator -splitIndex --system none \
-html5 -javafx --expand-requires transitive
# Should we add DRAFT stamps to the generated javadoc?
diff --git a/make/autoconf/basic.m4 b/make/autoconf/basic.m4
index 6daba35547b..0e9470a1cff 100644
--- a/make/autoconf/basic.m4
+++ b/make/autoconf/basic.m4
@@ -134,17 +134,33 @@ AC_DEFUN_ONCE([BASIC_SETUP_BUILD_ENV],
)
AC_SUBST(BUILD_ENV)
+ AC_MSG_CHECKING([for locale to use])
if test "x$LOCALE" != x; then
# Check if we actually have C.UTF-8; if so, use it
if $LOCALE -a | $GREP -q -E "^C\.(utf8|UTF-8)$"; then
LOCALE_USED=C.UTF-8
+ AC_MSG_RESULT([C.UTF-8 (recommended)])
+ elif $LOCALE -a | $GREP -q -E "^en_US\.(utf8|UTF-8)$"; then
+ LOCALE_USED=en_US.UTF-8
+ AC_MSG_RESULT([en_US.UTF-8 (acceptable fallback)])
else
- AC_MSG_WARN([C.UTF-8 locale not found, using C locale])
- LOCALE_USED=C
+ # As a fallback, check if users locale is UTF-8. USER_LOCALE was saved
+ # by the wrapper configure script before autconf messed up LC_ALL.
+ if $ECHO $USER_LOCALE | $GREP -q -E "\.(utf8|UTF-8)$"; then
+ LOCALE_USED=$USER_LOCALE
+ AC_MSG_RESULT([$USER_LOCALE (untested fallback)])
+ AC_MSG_WARN([Could not find C.UTF-8 or en_US.UTF-8 locale. This is not supported, and the build might fail unexpectedly.])
+ else
+ AC_MSG_RESULT([no UTF-8 locale found])
+ AC_MSG_WARN([No UTF-8 locale found. This is not supported. Proceeding with the C locale, but the build might fail unexpectedly.])
+ LOCALE_USED=C
+ fi
+ AC_MSG_NOTICE([The recommended locale is C.UTF-8, but en_US.UTF-8 is also accepted.])
fi
else
- AC_MSG_WARN([locale command not not found, using C locale])
- LOCALE_USED=C
+ LOCALE_USED=C.UTF-8
+ AC_MSG_RESULT([C.UTF-8 (default)])
+ AC_MSG_WARN([locale command not not found, using C.UTF-8 locale])
fi
export LC_ALL=$LOCALE_USED
diff --git a/make/autoconf/configure b/make/autoconf/configure
index 6fa0aacfbc9..443a37bae77 100644
--- a/make/autoconf/configure
+++ b/make/autoconf/configure
@@ -49,7 +49,9 @@ fi
export CONFIG_SHELL=$BASH
export _as_can_reexec=no
-# Make sure all shell commands are executed with the C locale
+# Save user's current locale, but make sure all future shell commands are
+# executed with the C locale
+export USER_LOCALE=$LC_ALL
export LC_ALL=C
if test "x$CUSTOM_CONFIG_DIR" != x; then
diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4
index bafedddf04f..eb0e5e20e4c 100644
--- a/make/autoconf/flags-cflags.m4
+++ b/make/autoconf/flags-cflags.m4
@@ -573,12 +573,20 @@ AC_DEFUN([FLAGS_SETUP_CFLAGS_HELPER],
TOOLCHAIN_CFLAGS_JDK="$TOOLCHAIN_CFLAGS_JDK -fvisibility=hidden -fstack-protector"
elif test "x$TOOLCHAIN_TYPE" = xmicrosoft; then
- # The -utf-8 option sets source and execution character sets to UTF-8 to enable correct
- # compilation of all source files regardless of the active code page on Windows.
- TOOLCHAIN_CFLAGS_JVM="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -utf-8 -MP"
- TOOLCHAIN_CFLAGS_JDK="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -utf-8 -Zc:wchar_t-"
+ TOOLCHAIN_CFLAGS_JVM="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -MP"
+ TOOLCHAIN_CFLAGS_JDK="-nologo -MD -Zc:preprocessor -Zc:inline -Zc:throwingNew -permissive- -Zc:wchar_t-"
fi
+ # Set character encoding in source
+ if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
+ CHARSET_CFLAGS="-finput-charset=utf-8"
+ elif test "x$TOOLCHAIN_TYPE" = xmicrosoft; then
+ # The -utf-8 option sets both source and execution character sets
+ CHARSET_CFLAGS="-utf-8 -validate-charset"
+ fi
+ TOOLCHAIN_CFLAGS_JVM="$TOOLCHAIN_CFLAGS_JVM $CHARSET_CFLAGS"
+ TOOLCHAIN_CFLAGS_JDK="$TOOLCHAIN_CFLAGS_JDK $CHARSET_CFLAGS"
+
# CFLAGS C language level for JDK sources (hotspot only uses C++)
if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
LANGSTD_CFLAGS="-std=c11"
diff --git a/make/common/JavaCompilation.gmk b/make/common/JavaCompilation.gmk
index 70b3557baea..cb647f0ed47 100644
--- a/make/common/JavaCompilation.gmk
+++ b/make/common/JavaCompilation.gmk
@@ -80,15 +80,13 @@ endef
#
# The sed expression does this:
# 1. Add a backslash before any :, = or ! that do not have a backslash already.
-# 2. Apply the file unicode2x.sed which does a whole bunch of \u00XX to \xXX
-# conversions.
-# 3. Delete all lines starting with #.
-# 4. Delete empty lines.
-# 5. Append lines ending with \ with the next line.
-# 6. Remove leading and trailing white space. Note that tabs must be explicit
+# 2. Delete all lines starting with #.
+# 3. Delete empty lines.
+# 4. Append lines ending with \ with the next line.
+# 5. Remove leading and trailing white space. Note that tabs must be explicit
# as sed on macosx does not understand '\t'.
-# 7. Replace the first \= with just =.
-# 8. Finally it's all sorted to create a stable output.
+# 6. Replace the first \= with just =.
+# 7. Finally it's all sorted to create a stable output.
#
# It is assumed that = is the character used for separating names and values.
define add_file_to_clean
@@ -108,7 +106,6 @@ define add_file_to_clean
( $(CAT) $$< && $(ECHO) "" ) \
| $(SED) -e 's/\([^\\]\):/\1\\:/g' -e 's/\([^\\]\)=/\1\\=/g' \
-e 's/\([^\\]\)!/\1\\!/g' -e 's/^[ ]*#.*/#/g' \
- | $(SED) -f "$$(TOPDIR)/make/common/support/unicode2x.sed" \
| $(SED) -e '/^#/d' -e '/^$$$$/d' \
-e :a -e '/\\$$$$/N; s/\\\n//; ta' \
-e 's/^[ ]*//;s/[ ]*$$$$//' \
@@ -265,10 +262,12 @@ define SetupJavaCompilationBody
endif
# Tell javac to do exactly as told and no more
- PARANOIA_FLAGS := -implicit:none -Xprefer:source -XDignore.symbol.file=true -encoding ascii
+ PARANOIA_FLAGS := -implicit:none -Xprefer:source -XDignore.symbol.file=true
$1_FLAGS += -g -Xlint:all $$($1_TARGET_RELEASE) $$(PARANOIA_FLAGS)
$1_FLAGS += $$($1_JAVAC_FLAGS)
+ # Set character encoding in source
+ $1_FLAGS += -encoding utf-8
ifeq ($$(JAVA_WARNINGS_AS_ERRORS), true)
$1_FLAGS += -Werror
diff --git a/make/common/JdkNativeCompilation.gmk b/make/common/JdkNativeCompilation.gmk
index 372ad39305c..0285669ffd8 100644
--- a/make/common/JdkNativeCompilation.gmk
+++ b/make/common/JdkNativeCompilation.gmk
@@ -227,6 +227,8 @@ endef
GLOBAL_VERSION_INFO_RESOURCE := $(TOPDIR)/src/java.base/windows/native/common/version.rc
+# \xA9 is the copyright symbol in ANSI encoding (Windows-1252), which rc.exe
+# assumes the resource file is in.
JDK_RCFLAGS=$(RCFLAGS) \
-D"JDK_VERSION_STRING=$(VERSION_STRING)" \
-D"JDK_COMPANY=$(JDK_RC_COMPANY_NAME)" \
diff --git a/make/common/support/unicode2x.sed b/make/common/support/unicode2x.sed
deleted file mode 100644
index 5188b97fe03..00000000000
--- a/make/common/support/unicode2x.sed
+++ /dev/null
@@ -1,100 +0,0 @@
-s/\\u0020/\x20/g
-s/\\u003A/\x3A/g
-s/\\u006B/\x6B/g
-s/\\u0075/\x75/g
-s/\\u00A0/\xA0/g
-s/\\u00A3/\xA3/g
-s/\\u00B0/\xB0/g
-s/\\u00B7/\xB7/g
-s/\\u00BA/\xBA/g
-s/\\u00BF/\xBF/g
-s/\\u00C0/\xC0/g
-s/\\u00C1/\xC1/g
-s/\\u00C2/\xC2/g
-s/\\u00C4/\xC4/g
-s/\\u00C5/\xC5/g
-s/\\u00C8/\xC8/g
-s/\\u00C9/\xC9/g
-s/\\u00CA/\xCA/g
-s/\\u00CD/\xCD/g
-s/\\u00CE/\xCE/g
-s/\\u00D3/\xD3/g
-s/\\u00D4/\xD4/g
-s/\\u00D6/\xD6/g
-s/\\u00DA/\xDA/g
-s/\\u00DC/\xDC/g
-s/\\u00DD/\xDD/g
-s/\\u00DF/\xDF/g
-s/\\u00E0/\xE0/g
-s/\\u00E1/\xE1/g
-s/\\u00E2/\xE2/g
-s/\\u00E3/\xE3/g
-s/\\u00E4/\xE4/g
-s/\\u00E5/\xE5/g
-s/\\u00E6/\xE6/g
-s/\\u00E7/\xE7/g
-s/\\u00E8/\xE8/g
-s/\\u00E9/\xE9/g
-s/\\u00EA/\xEA/g
-s/\\u00EB/\xEB/g
-s/\\u00EC/\xEC/g
-s/\\u00ED/\xED/g
-s/\\u00EE/\xEE/g
-s/\\u00EF/\xEF/g
-s/\\u00F1/\xF1/g
-s/\\u00F2/\xF2/g
-s/\\u00F3/\xF3/g
-s/\\u00F4/\xF4/g
-s/\\u00F5/\xF5/g
-s/\\u00F6/\xF6/g
-s/\\u00F9/\xF9/g
-s/\\u00FA/\xFA/g
-s/\\u00FC/\xFC/g
-s/\\u0020/\x20/g
-s/\\u003f/\x3f/g
-s/\\u006f/\x6f/g
-s/\\u0075/\x75/g
-s/\\u00a0/\xa0/g
-s/\\u00a3/\xa3/g
-s/\\u00b0/\xb0/g
-s/\\u00ba/\xba/g
-s/\\u00bf/\xbf/g
-s/\\u00c1/\xc1/g
-s/\\u00c4/\xc4/g
-s/\\u00c5/\xc5/g
-s/\\u00c8/\xc8/g
-s/\\u00c9/\xc9/g
-s/\\u00ca/\xca/g
-s/\\u00cd/\xcd/g
-s/\\u00d6/\xd6/g
-s/\\u00dc/\xdc/g
-s/\\u00dd/\xdd/g
-s/\\u00df/\xdf/g
-s/\\u00e0/\xe0/g
-s/\\u00e1/\xe1/g
-s/\\u00e2/\xe2/g
-s/\\u00e3/\xe3/g
-s/\\u00e4/\xe4/g
-s/\\u00e5/\xe5/g
-s/\\u00e7/\xe7/g
-s/\\u00e8/\xe8/g
-s/\\u00e9/\xe9/g
-s/\\u00ea/\xea/g
-s/\\u00eb/\xeb/g
-s/\\u00ec/\xec/g
-s/\\u00ed/\xed/g
-s/\\u00ee/\xee/g
-s/\\u00ef/\xef/g
-s/\\u00f0/\xf0/g
-s/\\u00f1/\xf1/g
-s/\\u00f2/\xf2/g
-s/\\u00f3/\xf3/g
-s/\\u00f4/\xf4/g
-s/\\u00f5/\xf5/g
-s/\\u00f6/\xf6/g
-s/\\u00f7/\xf7/g
-s/\\u00f8/\xf8/g
-s/\\u00f9/\xf9/g
-s/\\u00fa/\xfa/g
-s/\\u00fc/\xfc/g
-s/\\u00ff/\xff/g
diff --git a/src/java.base/unix/native/libjava/locale_str.h b/src/java.base/unix/native/libjava/locale_str.h
index 4d85b88ffb7..ea1a8c8fd47 100644
--- a/src/java.base/unix/native/libjava/locale_str.h
+++ b/src/java.base/unix/native/libjava/locale_str.h
@@ -87,7 +87,7 @@
"zh", "zh_CN",
#ifdef __linux__
"bokmal", "nb_NO",
- "bokm\xE5l", "nb_NO",
+ "bokmål", "nb_NO",
"catalan", "ca_ES",
"croatian", "hr_HR",
"czech", "cs_CZ",
@@ -98,7 +98,7 @@
"eesti", "et_EE",
"estonian", "et_EE",
"finnish", "fi_FI",
- "fran\xE7\x61is", "fr_FR",
+ "français", "fr_FR",
"french", "fr_FR",
"galego", "gl_ES",
"galician", "gl_ES",
@@ -162,7 +162,7 @@ static char *language_names[] = {
"deutsch", "de",
"dutch", "nl",
"finnish", "fi",
- "fran\xE7\x61is", "fr",
+ "français", "fr",
"french", "fr",
"german", "de",
"greek", "el",
diff --git a/src/java.base/windows/native/libjava/HostLocaleProviderAdapter_md.c b/src/java.base/windows/native/libjava/HostLocaleProviderAdapter_md.c
index ebbad326146..df5ad1e8911 100644
--- a/src/java.base/windows/native/libjava/HostLocaleProviderAdapter_md.c
+++ b/src/java.base/windows/native/libjava/HostLocaleProviderAdapter_md.c
@@ -134,7 +134,7 @@ WCHAR * fixes[2][2][3][16] =
L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
},
{ // currency
- L"\xA4", L"", L"\xA4 ", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
+ L"¤", L"", L"¤ ", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
},
{ // percent
L"", L"", L"%", L"% ", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
@@ -145,7 +145,7 @@ WCHAR * fixes[2][2][3][16] =
L"(", L"-", L"- ", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
},
{ //currency
- L"(\xA4", L"-\xA4", L"\xA4-", L"\xA4", L"(", L"-", L"", L"", L"-", L"-\xA4 ", L"", L"\xA4 ", L"\xA4 -", L"", L"(\xA4 ", L"("
+ L"(¤", L"-¤", L"¤-", L"¤", L"(", L"-", L"", L"", L"-", L"-¤ ", L"", L"¤ ", L"¤ -", L"", L"(¤ ", L"("
},
{ // percent
L"-", L"-", L"-%", L"%-", L"%", L"", L"", L"-% ", L"", L"% ", L"% -", L"", L"", L"", L"", L"",
@@ -158,7 +158,7 @@ WCHAR * fixes[2][2][3][16] =
L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L""
},
{ // currency
- L"", L"\xA4 ", L"", L" \xA4", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
+ L"", L"¤ ", L"", L" ¤", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
},
{ // percent
L" %", L"%", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
@@ -169,7 +169,7 @@ WCHAR * fixes[2][2][3][16] =
L")", L"", L" ", L"-", L" -", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"",
},
{ //currency
- L")", L"", L"", L"-", L"\xA4)", L"\xA4", L"-\xA4", L"\xA4-", L" \xA4", L"", L" \xA4-", L"-", L"", L"- \xA4", L")", L" \xA4)"
+ L")", L"", L"", L"-", L"¤)", L"¤", L"-¤", L"¤-", L" ¤", L"", L" ¤-", L"-", L"", L"- ¤", L")", L" ¤)"
},
{ // percent
L" %", L"%", L"", L"", L"-", L"-%", L"%-", L"", L" %-", L"-", L"", L"- %", L"", L"", L"", L"",