diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java index 293fbdb78dc..c02af28c37d 100644 --- a/src/java.base/share/classes/java/lang/StringCoding.java +++ b/src/java.base/share/classes/java/lang/StringCoding.java @@ -1,5 +1,6 @@ /* - * Copyright (c) 2000, 2022, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2000, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,6 +35,45 @@ class StringCoding { private StringCoding() { } + /** + * Count the number of leading non-zero ascii chars in the range. + */ + public static int countNonZeroAscii(String s) { + byte[] value = s.value(); + if (s.isLatin1()) { + return countNonZeroAsciiLatin1(value, 0, value.length); + } else { + return countNonZeroAsciiUTF16(value, 0, s.length()); + } + } + + /** + * Count the number of non-zero ascii chars in the range. + */ + public static int countNonZeroAsciiLatin1(byte[] ba, int off, int len) { + int limit = off + len; + for (int i = off; i < limit; i++) { + if (ba[i] <= 0) { + return i - off; + } + } + return len; + } + + /** + * Count the number of leading non-zero ascii chars in the range. + */ + public static int countNonZeroAsciiUTF16(byte[] ba, int off, int strlen) { + int limit = off + strlen; + for (int i = off; i < limit; i++) { + char c = StringUTF16.charAt(ba, i); + if (c == 0 || c > 0x7F) { + return i - off; + } + } + return strlen; + } + public static boolean hasNegatives(byte[] ba, int off, int len) { return countPositives(ba, off, len) != len; } diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java index 3c25ad3720d..682b6ca2c2a 100644 --- a/src/java.base/share/classes/java/lang/System.java +++ b/src/java.base/share/classes/java/lang/System.java @@ -2569,6 +2569,9 @@ public final class System { public int countPositives(byte[] bytes, int offset, int length) { return StringCoding.countPositives(bytes, offset, length); } + public int countNonZeroAscii(String s) { + return StringCoding.countNonZeroAscii(s); + } public String newStringNoRepl(byte[] bytes, Charset cs) throws CharacterCodingException { return String.newStringNoRepl(bytes, cs); } diff --git a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java index 51d4ef51c1f..a980fcc9896 100644 --- a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java +++ b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java @@ -318,6 +318,11 @@ public interface JavaLangAccess { */ int countPositives(byte[] ba, int off, int len); + /** + * Count the number of leading non-zero ascii chars in the String. + */ + int countNonZeroAscii(String s); + /** * Constructs a new {@code String} by decoding the specified subarray of * bytes using the specified {@linkplain java.nio.charset.Charset charset}. diff --git a/src/java.base/share/classes/jdk/internal/classfile/impl/AbstractPoolEntry.java b/src/java.base/share/classes/jdk/internal/classfile/impl/AbstractPoolEntry.java index bc885291b44..9f9fcd8d91a 100644 --- a/src/java.base/share/classes/jdk/internal/classfile/impl/AbstractPoolEntry.java +++ b/src/java.base/share/classes/jdk/internal/classfile/impl/AbstractPoolEntry.java @@ -1,5 +1,6 @@ /* * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -409,60 +410,14 @@ public abstract sealed class AbstractPoolEntry { @Override void writeTo(BufWriterImpl pool) { + pool.writeU1(tag); if (rawBytes != null) { - pool.writeU1(tag); pool.writeU2(rawLen); pool.writeBytes(rawBytes, offset, rawLen); } else { // state == STRING and no raw bytes - if (stringValue.length() > 65535) { - throw new IllegalArgumentException("string too long"); - } - pool.writeU1(tag); - pool.writeU2(charLen); - for (int i = 0; i < charLen; ++i) { - char c = stringValue.charAt(i); - if (c >= '\001' && c <= '\177') { - // Optimistic writing -- hope everything is bytes - // If not, we bail out, and alternate path patches the length - pool.writeU1((byte) c); - } - else { - int charLength = stringValue.length(); - int byteLength = i; - char c1; - for (int j = i; j < charLength; ++j) { - c1 = (stringValue).charAt(j); - if (c1 >= '\001' && c1 <= '\177') { - byteLength++; - } else if (c1 > '\u07FF') { - byteLength += 3; - } else { - byteLength += 2; - } - } - if (byteLength > 65535) { - throw new IllegalArgumentException(); - } - int byteLengthFinal = byteLength; - pool.patchInt(pool.size() - i - 2, 2, byteLengthFinal); - for (int j = i; j < charLength; ++j) { - c1 = (stringValue).charAt(j); - if (c1 >= '\001' && c1 <= '\177') { - pool.writeU1((byte) c1); - } else if (c1 > '\u07FF') { - pool.writeU1((byte) (0xE0 | c1 >> 12 & 0xF)); - pool.writeU1((byte) (0x80 | c1 >> 6 & 0x3F)); - pool.writeU1((byte) (0x80 | c1 & 0x3F)); - } else { - pool.writeU1((byte) (0xC0 | c1 >> 6 & 0x1F)); - pool.writeU1((byte) (0x80 | c1 & 0x3F)); - } - } - break; - } - } + pool.writeUTF(stringValue); } } } diff --git a/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java b/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java index 255e5e21cf0..0c317065162 100644 --- a/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java +++ b/src/java.base/share/classes/jdk/internal/classfile/impl/BufWriterImpl.java @@ -1,5 +1,6 @@ /* * Copyright (c) 2022, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -34,7 +35,11 @@ import java.lang.classfile.constantpool.ConstantPool; import java.lang.classfile.constantpool.ConstantPoolBuilder; import java.lang.classfile.constantpool.PoolEntry; +import jdk.internal.access.JavaLangAccess; +import jdk.internal.access.SharedSecrets; + public final class BufWriterImpl implements BufWriter { + private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); private final ConstantPoolBuilder constantPool; private final ClassFileImpl context; @@ -152,6 +157,52 @@ public final class BufWriterImpl implements BufWriter { writeBytes(other.elems, 0, other.offset); } + @SuppressWarnings("deprecation") + void writeUTF(String str) { + int strlen = str.length(); + int countNonZeroAscii = JLA.countNonZeroAscii(str); + int utflen = strlen; + if (countNonZeroAscii != strlen) { + for (int i = countNonZeroAscii; i < strlen; i++) { + int c = str.charAt(i); + if (c >= 0x80 || c == 0) + utflen += (c >= 0x800) ? 2 : 1; + } + } + if (utflen > 65535) { + throw new IllegalArgumentException("string too long"); + } + reserveSpace(utflen + 2); + + int offset = this.offset; + byte[] elems = this.elems; + + elems[offset ] = (byte) (utflen >> 8); + elems[offset + 1] = (byte) utflen; + offset += 2; + + str.getBytes(0, countNonZeroAscii, elems, offset); + offset += countNonZeroAscii; + + for (int i = countNonZeroAscii; i < strlen; ++i) { + char c = str.charAt(i); + if (c >= '\001' && c <= '\177') { + elems[offset++] = (byte) c; + } else if (c > '\u07FF') { + elems[offset ] = (byte) (0xE0 | c >> 12 & 0xF); + elems[offset + 1] = (byte) (0x80 | c >> 6 & 0x3F); + elems[offset + 2] = (byte) (0x80 | c & 0x3F); + offset += 3; + } else { + elems[offset ] = (byte) (0xC0 | c >> 6 & 0x1F); + elems[offset + 1] = (byte) (0x80 | c & 0x3F); + offset += 2; + } + } + + this.offset = offset; + } + @Override public void writeBytes(byte[] arr, int start, int length) { reserveSpace(length); diff --git a/test/jdk/java/lang/String/CountNonZeroAscii.java b/test/jdk/java/lang/String/CountNonZeroAscii.java new file mode 100644 index 00000000000..d4b8a4fb1eb --- /dev/null +++ b/test/jdk/java/lang/String/CountNonZeroAscii.java @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import jdk.internal.access.JavaLangAccess; +import jdk.internal.access.SharedSecrets; + +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +/* + * @test + * @modules java.base/jdk.internal.access + * @summary test latin1 String countNonZeroAscii + * @run main/othervm -XX:+CompactStrings CountNonZeroAscii + * @run main/othervm -XX:-CompactStrings CountNonZeroAscii + */ +public class CountNonZeroAscii { + private static final JavaLangAccess JLA = SharedSecrets.getJavaLangAccess(); + + public static void main(String [] args) { + byte[] bytes = new byte[1000]; + + Arrays.fill(bytes, (byte) 'A'); + String s = new String(bytes, StandardCharsets.ISO_8859_1); + assertEquals(bytes.length, JLA.countNonZeroAscii(s)); + + for (int i = 0; i < bytes.length; i++) { + for (int j = Byte.MIN_VALUE; j <= 0; j++) { + bytes[i] = (byte) j; + s = new String(bytes, StandardCharsets.ISO_8859_1); + assertEquals(i, JLA.countNonZeroAscii(s)); + } + bytes[i] = (byte) 'A'; + } + } + + static void assertEquals(int expected, int actual) { + if (expected != actual) { + throw new AssertionError("Expected " + expected + " but got " + actual); + } + } +} diff --git a/test/micro/org/openjdk/bench/java/lang/classfile/Utf8EntryWriteTo.java b/test/micro/org/openjdk/bench/java/lang/classfile/Utf8EntryWriteTo.java new file mode 100644 index 00000000000..a124b079268 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/classfile/Utf8EntryWriteTo.java @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2024, Alibaba Group Holding Limited. All Rights Reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.lang.classfile; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +import java.lang.classfile.constantpool.ConstantPoolBuilder; +import java.lang.classfile.constantpool.ClassEntry; +import java.lang.classfile.*; +import java.lang.constant.*; +import java.nio.charset.StandardCharsets; +import java.util.HexFormat; +import java.util.concurrent.TimeUnit; +import java.util.function.Consumer; + +import static java.lang.classfile.ClassFile.*; +import static java.lang.constant.ConstantDescs.*; + +import jdk.internal.classfile.impl.*; +/** + * Test various operations on + */ +@BenchmarkMode(Mode.AverageTime) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Warmup(iterations = 1, time = 2) +@Measurement(iterations = 3, time = 1) +@Fork(jvmArgsAppend = "--enable-preview", value = 3) +@State(Scope.Thread) +public class Utf8EntryWriteTo { + static final ClassDesc STRING_BUILDER = ClassDesc.ofDescriptor("Ljava/lang/StringBuilder;"); + static final MethodTypeDesc MTD_append = MethodTypeDesc.of(STRING_BUILDER, CD_String); + static final MethodTypeDesc MTD_String = MethodTypeDesc.of(CD_String); + static final ClassDesc CLASS_DESC = ClassDesc.ofDescriptor("Lorg/openjdk/bench/java/lang/classfile/String$$StringConcat;"); + + @Param({"ascii", "utf8_2_bytes", "utf8_3_bytes", "emoji"}) + public String charType; + ConstantPoolBuilder poolBuilder; + ClassEntry thisClass; + + @Setup + public void setup() throws Exception { + byte[] bytes = HexFormat.of().parseHex( + switch (charType) { + case "ascii" -> "78"; + case "utf8_2_bytes" -> "c2a9"; + case "utf8_3_bytes" -> "e6b8a9"; + case "emoji" -> "e29da3efb88f"; + default -> throw new IllegalArgumentException("bad charType: " + charType); + } + ); + String s = new String(bytes, 0, bytes.length, StandardCharsets.UTF_8); + String[] constants = new String[128]; + for (int i = 0; i < constants.length; i++) { + constants[i] = "A".repeat(i).concat(s); + } + + poolBuilder = ConstantPoolBuilder.of(); + thisClass = poolBuilder.classEntry(CLASS_DESC); + for (var c : constants) { + poolBuilder.utf8Entry(c); + } + } + + @Benchmark + public void writeTo(Blackhole bh) { + bh.consume(ClassFile + .of() + .build(thisClass, poolBuilder, (ClassBuilder clb) -> {})); + } +}