8348638: Performance regression in Math.tanh

Reviewed-by: jbhateja, epeter, sviswanathan
This commit is contained in:
Mohamed Issa 2025-05-02 17:21:50 +00:00 committed by Jatin Bhateja
parent 84f570c573
commit c8bbcaf5de
2 changed files with 171 additions and 18 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2024, Intel Corporation. All rights reserved.
* Copyright (c) 2024, 2025, Intel Corporation. All rights reserved.
* Intel Math Library (LIBM) Source Code
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
@ -46,7 +46,7 @@
// for |x| in [23/64,3*2^7)
// e^{-2*|x|}=2^{-k-f}*2^{-r} ~ 2^{-k}*(Tn+Dn)*(1+p)=(T0+D0)*(1+p)
//
// For |x| in [2^{-4},2^5):
// For |x| in [2^{-4},22):
// 2^{-r}-1 ~ p=c1*r+c2*r^2+..+c5*r^5
// Let R=1/(1+T0+p*T0), truncated to 35 significant bits
// R=1/(1+T0+D0+p*(T0+D0))*(1+eps), |eps|<2^{-33}
@ -66,11 +66,11 @@
//
// For |x|<2^{-64}: x is returned
//
// For |x|>=2^32: return +/-1
// For |x|>=22: return +/-1
//
// Special cases:
// tanh(NaN) = quiet NaN, and raise invalid exception
// tanh(INF) = that INF
// tanh(+/-INF) = +/-1
// tanh(+/-0) = +/-0
//
/******************************************************************************/
@ -324,6 +324,12 @@ address StubGenerator::generate_libmTanh() {
__ enter(); // required for proper stackwalking of RuntimeStub frame
__ bind(B1_2);
__ pextrw(rcx, xmm0, 3);
__ movl(rdx, 32768);
__ andl(rdx, rcx);
__ andl(rcx, 32767);
__ cmpl(rcx, 16438);
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_1); // Branch only if |x| >= 22
__ movsd(xmm3, ExternalAddress(HALFMASK), r11 /*rscratch*/);
__ xorpd(xmm4, xmm4);
__ movsd(xmm1, ExternalAddress(L2E), r11 /*rscratch*/);
@ -331,16 +337,12 @@ address StubGenerator::generate_libmTanh() {
__ movl(rax, 32768);
__ pinsrw(xmm4, rax, 3);
__ movsd(xmm6, ExternalAddress(Shifter), r11 /*rscratch*/);
__ pextrw(rcx, xmm0, 3);
__ andpd(xmm3, xmm0);
__ andnpd(xmm4, xmm0);
__ pshufd(xmm5, xmm4, 68);
__ movl(rdx, 32768);
__ andl(rdx, rcx);
__ andl(rcx, 32767);
__ subl(rcx, 16304);
__ cmpl(rcx, 144);
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_1);
__ cmpl(rcx, 134);
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_1); // Branch only if |x| is not in [2^{-4},22)
__ subsd(xmm4, xmm3);
__ mulsd(xmm3, xmm1);
__ mulsd(xmm2, xmm5);
@ -427,8 +429,8 @@ address StubGenerator::generate_libmTanh() {
__ bind(L_2TAG_PACKET_0_0_1);
__ addl(rcx, 960);
__ cmpl(rcx, 1104);
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_1_0_1);
__ cmpl(rcx, 1094);
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_1_0_1); // Branch only if |x| not in [2^{-64}, 2^{-4})
__ movdqu(xmm2, ExternalAddress(pv), r11 /*rscratch*/);
__ pshufd(xmm1, xmm0, 68);
__ movdqu(xmm3, ExternalAddress(pv + 16), r11 /*rscratch*/);
@ -449,11 +451,8 @@ address StubGenerator::generate_libmTanh() {
__ jmp(B1_4);
__ bind(L_2TAG_PACKET_1_0_1);
__ addl(rcx, 15344);
__ cmpl(rcx, 16448);
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_1);
__ cmpl(rcx, 16);
__ jcc(Assembler::below, L_2TAG_PACKET_3_0_1);
__ jcc(Assembler::below, L_2TAG_PACKET_3_0_1); // Branch only if |x| is denormalized
__ xorpd(xmm2, xmm2);
__ movl(rax, 17392);
__ pinsrw(xmm2, rax, 3);
@ -468,7 +467,7 @@ address StubGenerator::generate_libmTanh() {
__ bind(L_2TAG_PACKET_2_0_1);
__ cmpl(rcx, 32752);
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_4_0_1);
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_4_0_1); // Branch only if |x| is INF or NaN
__ xorpd(xmm2, xmm2);
__ movl(rcx, 15344);
__ pinsrw(xmm2, rcx, 3);
@ -489,7 +488,7 @@ address StubGenerator::generate_libmTanh() {
__ movdl(rcx, xmm2);
__ orl(rcx, rax);
__ cmpl(rcx, 0);
__ jcc(Assembler::equal, L_2TAG_PACKET_5_0_1);
__ jcc(Assembler::equal, L_2TAG_PACKET_5_0_1); // Branch only if |x| is not NaN
__ addsd(xmm0, xmm0);
__ bind(B1_4);

View File

@ -0,0 +1,154 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package org.openjdk.bench.java.lang;
import java.util.concurrent.TimeUnit;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Level;
import org.openjdk.jmh.annotations.Warmup;
import org.openjdk.jmh.annotations.OperationsPerInvocation;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.runner.Runner;
import org.openjdk.jmh.runner.RunnerException;
import org.openjdk.jmh.runner.options.Options;
import org.openjdk.jmh.runner.options.OptionsBuilder;
import java.util.Random;
public class TanhPerf {
@Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.MILLISECONDS)
@Measurement(iterations = 4, time = 5, timeUnit = TimeUnit.MILLISECONDS)
@Fork(2)
@BenchmarkMode(Mode.Throughput)
@State(Scope.Thread)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public static class TanhPerfRanges {
public static int tanhInputCount = 2048;
@Param({"0", "1", "2", "3"})
public int tanhRangeIndex;
public double [] tanhPosRandInputs;
public double [] tanhNegRandInputs;
public int tanhInputIndex = 0;
public double tanhRangeInputs[][] = {{0.0, 0x1.0P-55}, {0x1.0P-55, 1.0}, {1.0, 22.0}, {22.1, 1.7976931348623157E308} };
@Setup
public void setupValues() {
Random random = new Random(1023);
// Fill the positive and negative tanh vectors with random values
tanhPosRandInputs = new double[tanhInputCount];
tanhNegRandInputs = new double[tanhInputCount];
for (int i = 0; i < tanhInputCount; i++) {
double tanhLowerBound = tanhRangeInputs[tanhRangeIndex][0];
double tanhUpperBound = tanhRangeInputs[tanhRangeIndex][1];
tanhPosRandInputs[i] = random.nextDouble(tanhLowerBound, tanhUpperBound);
tanhNegRandInputs[i] = random.nextDouble(-tanhUpperBound, -tanhLowerBound);
}
}
@Benchmark
@OperationsPerInvocation(2048)
public double tanhPosRangeDouble() {
double res = 0.0;
for (int i = 0; i < tanhInputCount; i++) {
res += Math.tanh(tanhPosRandInputs[i]);
}
return res;
}
@Benchmark
@OperationsPerInvocation(2048)
public double tanhNegRangeDouble() {
double res = 0.0;
for (int i = 0; i < tanhInputCount; i++) {
res += Math.tanh(tanhNegRandInputs[i]);
}
return res;
}
}
@Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 4, time = 5, timeUnit = TimeUnit.SECONDS)
@Fork(2)
@BenchmarkMode(Mode.Throughput)
@State(Scope.Thread)
@OutputTimeUnit(TimeUnit.MILLISECONDS)
public static class TanhPerfConstant {
public static final double constDoubleTiny = 0x1.0P-57;
public static final double constDoubleSmall = 0x1.0P-54;
public static final double constDouble1 = 1.0;
public static final double constDouble21 = 21.0;
public static final double constDoubleLarge = 23.0;
@Benchmark
public double tanhConstDoubleTiny() {
return Math.tanh(constDoubleTiny);
}
@Benchmark
public double tanhConstDoubleSmall() {
return Math.tanh(constDoubleSmall);
}
@Benchmark
public double tanhConstDouble1() {
return Math.tanh(constDouble1);
}
@Benchmark
public double tanhConstDouble21() {
return Math.tanh(constDouble21);
}
@Benchmark
public double tanhConstDoubleLarge() {
return Math.tanh(constDoubleLarge);
}
}
public static void main(String[] args) throws RunnerException {
Options opt = new OptionsBuilder()
.include(TanhPerfRanges.class.getSimpleName())
.build();
new Runner(opt).run();
opt = new OptionsBuilder()
.include(TanhPerfConstant.class.getSimpleName())
.build();
new Runner(opt).run();
}
}