8358066: Non-ascii package names gives compilation error "import requires canonical name"

Reviewed-by: jlahoda, naoto
2025-06-04 12:56:18 +00:00 · 2025-06-04 12:56:18 +00:00 · 09ec4de74d
commit 09ec4de74d
parent dc961609f8
2 changed files with 73 additions and 5 deletions
--- a/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Convert.java
+++ b/src/jdk.compiler/share/classes/com/sun/tools/javac/util/Convert.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2024, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2025, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -224,9 +224,7 @@ public class Convert {
    public static int utfNumChars(byte[] buf, int off, int len) {
        int numChars = 0;
        while (len-- > 0) {
-            int byte1 = buf[off++];
-            if (byte1 < 0)
-                len -= ((byte1 & 0xe0) == 0xc0) ? 1 : 2;
+            if ((buf[off++] & 0xc0) != 0x80)
                numChars++;
        }
        return numChars;
--- a/test/langtools/tools/javac/nametable/TestUtfNumChars.java
+++ b/test/langtools/tools/javac/nametable/TestUtfNumChars.java
@ -0,0 +1,70 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8358066
+ * @summary Test for bug in Convert.utfNumChars()
+ * @modules jdk.compiler/com.sun.tools.javac.util
+ * @run main TestUtfNumChars
+ */
+
+import com.sun.tools.javac.util.Convert;
+
+import java.util.function.IntPredicate;
+import java.util.stream.IntStream;
+
+public class TestUtfNumChars {
+
+    public static void main(String[] args) {
+
+        // This is the string "ab«cd≤ef🟢gh"
+        String s = "ab\u00ABcd\u2264ef\ud83d\udd34gh";
+
+        // This is its modified UTF-8 encoding
+        byte[] utf8 = Convert.string2utf(s);    // UTF-8: 61 62 c2 ab 63 64 e2 89 a4 65 66 ed a0 bd ed b4 b4 67 68
+                                                // Bytes: 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 16 17 18
+                                                // Chars: 00 01 02 .. 03 04 05 .. .. 06 07 08 .. .. 09 .. .. 10 11
+
+        // These are the offsets in "utf8" marking the boundaries of encoded Java charcters
+        int[] offsets = new int[] {
+            0, 1, 2, 4, 5, 6, 9, 10, 11, 14, 17, 18
+        };
+        IntPredicate boundary = off -> off == utf8.length || IntStream.of(offsets).anyMatch(off2 -> off2 == off);
+
+        // Check Convert.utfNumChars() on every subsequence
+        for (int i = 0; i < offsets.length; i++) {
+            int i_off = offsets[i];
+            if (!boundary.test(i_off))
+                continue;
+            for (int j = i; j < offsets.length; j++) {
+                int j_off = offsets[j];
+                if (!boundary.test(j_off))
+                    continue;
+                int nchars = Convert.utfNumChars(utf8, i_off, j_off - i_off);
+                if (nchars != j - i)
+                    throw new AssertionError(String.format("nchars %d != %d for [%d, %d)", nchars, j - i, i_off, j_off));
+            }
+        }
+    }
+}