8348638: Performance regression in Math.tanh
Reviewed-by: jbhateja, epeter, sviswanathan
This commit is contained in:
parent
84f570c573
commit
c8bbcaf5de
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2024, Intel Corporation. All rights reserved.
|
||||
* Copyright (c) 2024, 2025, Intel Corporation. All rights reserved.
|
||||
* Intel Math Library (LIBM) Source Code
|
||||
*
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
@ -46,7 +46,7 @@
|
||||
// for |x| in [23/64,3*2^7)
|
||||
// e^{-2*|x|}=2^{-k-f}*2^{-r} ~ 2^{-k}*(Tn+Dn)*(1+p)=(T0+D0)*(1+p)
|
||||
//
|
||||
// For |x| in [2^{-4},2^5):
|
||||
// For |x| in [2^{-4},22):
|
||||
// 2^{-r}-1 ~ p=c1*r+c2*r^2+..+c5*r^5
|
||||
// Let R=1/(1+T0+p*T0), truncated to 35 significant bits
|
||||
// R=1/(1+T0+D0+p*(T0+D0))*(1+eps), |eps|<2^{-33}
|
||||
@ -66,11 +66,11 @@
|
||||
//
|
||||
// For |x|<2^{-64}: x is returned
|
||||
//
|
||||
// For |x|>=2^32: return +/-1
|
||||
// For |x|>=22: return +/-1
|
||||
//
|
||||
// Special cases:
|
||||
// tanh(NaN) = quiet NaN, and raise invalid exception
|
||||
// tanh(INF) = that INF
|
||||
// tanh(+/-INF) = +/-1
|
||||
// tanh(+/-0) = +/-0
|
||||
//
|
||||
/******************************************************************************/
|
||||
@ -324,6 +324,12 @@ address StubGenerator::generate_libmTanh() {
|
||||
__ enter(); // required for proper stackwalking of RuntimeStub frame
|
||||
|
||||
__ bind(B1_2);
|
||||
__ pextrw(rcx, xmm0, 3);
|
||||
__ movl(rdx, 32768);
|
||||
__ andl(rdx, rcx);
|
||||
__ andl(rcx, 32767);
|
||||
__ cmpl(rcx, 16438);
|
||||
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_1); // Branch only if |x| >= 22
|
||||
__ movsd(xmm3, ExternalAddress(HALFMASK), r11 /*rscratch*/);
|
||||
__ xorpd(xmm4, xmm4);
|
||||
__ movsd(xmm1, ExternalAddress(L2E), r11 /*rscratch*/);
|
||||
@ -331,16 +337,12 @@ address StubGenerator::generate_libmTanh() {
|
||||
__ movl(rax, 32768);
|
||||
__ pinsrw(xmm4, rax, 3);
|
||||
__ movsd(xmm6, ExternalAddress(Shifter), r11 /*rscratch*/);
|
||||
__ pextrw(rcx, xmm0, 3);
|
||||
__ andpd(xmm3, xmm0);
|
||||
__ andnpd(xmm4, xmm0);
|
||||
__ pshufd(xmm5, xmm4, 68);
|
||||
__ movl(rdx, 32768);
|
||||
__ andl(rdx, rcx);
|
||||
__ andl(rcx, 32767);
|
||||
__ subl(rcx, 16304);
|
||||
__ cmpl(rcx, 144);
|
||||
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_1);
|
||||
__ cmpl(rcx, 134);
|
||||
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_1); // Branch only if |x| is not in [2^{-4},22)
|
||||
__ subsd(xmm4, xmm3);
|
||||
__ mulsd(xmm3, xmm1);
|
||||
__ mulsd(xmm2, xmm5);
|
||||
@ -427,8 +429,8 @@ address StubGenerator::generate_libmTanh() {
|
||||
|
||||
__ bind(L_2TAG_PACKET_0_0_1);
|
||||
__ addl(rcx, 960);
|
||||
__ cmpl(rcx, 1104);
|
||||
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_1_0_1);
|
||||
__ cmpl(rcx, 1094);
|
||||
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_1_0_1); // Branch only if |x| not in [2^{-64}, 2^{-4})
|
||||
__ movdqu(xmm2, ExternalAddress(pv), r11 /*rscratch*/);
|
||||
__ pshufd(xmm1, xmm0, 68);
|
||||
__ movdqu(xmm3, ExternalAddress(pv + 16), r11 /*rscratch*/);
|
||||
@ -449,11 +451,8 @@ address StubGenerator::generate_libmTanh() {
|
||||
__ jmp(B1_4);
|
||||
|
||||
__ bind(L_2TAG_PACKET_1_0_1);
|
||||
__ addl(rcx, 15344);
|
||||
__ cmpl(rcx, 16448);
|
||||
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_1);
|
||||
__ cmpl(rcx, 16);
|
||||
__ jcc(Assembler::below, L_2TAG_PACKET_3_0_1);
|
||||
__ jcc(Assembler::below, L_2TAG_PACKET_3_0_1); // Branch only if |x| is denormalized
|
||||
__ xorpd(xmm2, xmm2);
|
||||
__ movl(rax, 17392);
|
||||
__ pinsrw(xmm2, rax, 3);
|
||||
@ -468,7 +467,7 @@ address StubGenerator::generate_libmTanh() {
|
||||
|
||||
__ bind(L_2TAG_PACKET_2_0_1);
|
||||
__ cmpl(rcx, 32752);
|
||||
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_4_0_1);
|
||||
__ jcc(Assembler::aboveEqual, L_2TAG_PACKET_4_0_1); // Branch only if |x| is INF or NaN
|
||||
__ xorpd(xmm2, xmm2);
|
||||
__ movl(rcx, 15344);
|
||||
__ pinsrw(xmm2, rcx, 3);
|
||||
@ -489,7 +488,7 @@ address StubGenerator::generate_libmTanh() {
|
||||
__ movdl(rcx, xmm2);
|
||||
__ orl(rcx, rax);
|
||||
__ cmpl(rcx, 0);
|
||||
__ jcc(Assembler::equal, L_2TAG_PACKET_5_0_1);
|
||||
__ jcc(Assembler::equal, L_2TAG_PACKET_5_0_1); // Branch only if |x| is not NaN
|
||||
__ addsd(xmm0, xmm0);
|
||||
|
||||
__ bind(B1_4);
|
||||
|
154
test/micro/org/openjdk/bench/java/lang/TanhPerf.java
Normal file
154
test/micro/org/openjdk/bench/java/lang/TanhPerf.java
Normal file
@ -0,0 +1,154 @@
|
||||
/*
|
||||
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
|
||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||
*
|
||||
* This code is free software; you can redistribute it and/or modify it
|
||||
* under the terms of the GNU General Public License version 2 only, as
|
||||
* published by the Free Software Foundation.
|
||||
*
|
||||
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
* version 2 for more details (a copy is included in the LICENSE file that
|
||||
* accompanied this code).
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License version
|
||||
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*
|
||||
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||
* or visit www.oracle.com if you need additional information or have any
|
||||
* questions.
|
||||
*/
|
||||
package org.openjdk.bench.java.lang;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import org.openjdk.jmh.annotations.Benchmark;
|
||||
import org.openjdk.jmh.annotations.BenchmarkMode;
|
||||
import org.openjdk.jmh.annotations.Fork;
|
||||
import org.openjdk.jmh.annotations.Measurement;
|
||||
import org.openjdk.jmh.annotations.Mode;
|
||||
import org.openjdk.jmh.annotations.Param;
|
||||
import org.openjdk.jmh.annotations.Scope;
|
||||
import org.openjdk.jmh.annotations.Setup;
|
||||
import org.openjdk.jmh.annotations.State;
|
||||
import org.openjdk.jmh.annotations.Level;
|
||||
import org.openjdk.jmh.annotations.Warmup;
|
||||
import org.openjdk.jmh.annotations.OperationsPerInvocation;
|
||||
import org.openjdk.jmh.annotations.OutputTimeUnit;
|
||||
import org.openjdk.jmh.runner.Runner;
|
||||
import org.openjdk.jmh.runner.RunnerException;
|
||||
import org.openjdk.jmh.runner.options.Options;
|
||||
import org.openjdk.jmh.runner.options.OptionsBuilder;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
public class TanhPerf {
|
||||
|
||||
@Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Measurement(iterations = 4, time = 5, timeUnit = TimeUnit.MILLISECONDS)
|
||||
@Fork(2)
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
@State(Scope.Thread)
|
||||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
public static class TanhPerfRanges {
|
||||
public static int tanhInputCount = 2048;
|
||||
|
||||
@Param({"0", "1", "2", "3"})
|
||||
public int tanhRangeIndex;
|
||||
|
||||
public double [] tanhPosRandInputs;
|
||||
public double [] tanhNegRandInputs;
|
||||
public int tanhInputIndex = 0;
|
||||
public double tanhRangeInputs[][] = {{0.0, 0x1.0P-55}, {0x1.0P-55, 1.0}, {1.0, 22.0}, {22.1, 1.7976931348623157E308} };
|
||||
|
||||
@Setup
|
||||
public void setupValues() {
|
||||
Random random = new Random(1023);
|
||||
|
||||
// Fill the positive and negative tanh vectors with random values
|
||||
tanhPosRandInputs = new double[tanhInputCount];
|
||||
tanhNegRandInputs = new double[tanhInputCount];
|
||||
|
||||
for (int i = 0; i < tanhInputCount; i++) {
|
||||
double tanhLowerBound = tanhRangeInputs[tanhRangeIndex][0];
|
||||
double tanhUpperBound = tanhRangeInputs[tanhRangeIndex][1];
|
||||
tanhPosRandInputs[i] = random.nextDouble(tanhLowerBound, tanhUpperBound);
|
||||
tanhNegRandInputs[i] = random.nextDouble(-tanhUpperBound, -tanhLowerBound);
|
||||
}
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(2048)
|
||||
public double tanhPosRangeDouble() {
|
||||
double res = 0.0;
|
||||
for (int i = 0; i < tanhInputCount; i++) {
|
||||
res += Math.tanh(tanhPosRandInputs[i]);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
@OperationsPerInvocation(2048)
|
||||
public double tanhNegRangeDouble() {
|
||||
double res = 0.0;
|
||||
for (int i = 0; i < tanhInputCount; i++) {
|
||||
res += Math.tanh(tanhNegRandInputs[i]);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
@Warmup(iterations = 3, time = 5, timeUnit = TimeUnit.SECONDS)
|
||||
@Measurement(iterations = 4, time = 5, timeUnit = TimeUnit.SECONDS)
|
||||
@Fork(2)
|
||||
@BenchmarkMode(Mode.Throughput)
|
||||
@State(Scope.Thread)
|
||||
@OutputTimeUnit(TimeUnit.MILLISECONDS)
|
||||
public static class TanhPerfConstant {
|
||||
public static final double constDoubleTiny = 0x1.0P-57;
|
||||
public static final double constDoubleSmall = 0x1.0P-54;
|
||||
public static final double constDouble1 = 1.0;
|
||||
public static final double constDouble21 = 21.0;
|
||||
public static final double constDoubleLarge = 23.0;
|
||||
|
||||
@Benchmark
|
||||
public double tanhConstDoubleTiny() {
|
||||
return Math.tanh(constDoubleTiny);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public double tanhConstDoubleSmall() {
|
||||
return Math.tanh(constDoubleSmall);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public double tanhConstDouble1() {
|
||||
return Math.tanh(constDouble1);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public double tanhConstDouble21() {
|
||||
return Math.tanh(constDouble21);
|
||||
}
|
||||
|
||||
@Benchmark
|
||||
public double tanhConstDoubleLarge() {
|
||||
return Math.tanh(constDoubleLarge);
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws RunnerException {
|
||||
Options opt = new OptionsBuilder()
|
||||
.include(TanhPerfRanges.class.getSimpleName())
|
||||
.build();
|
||||
|
||||
new Runner(opt).run();
|
||||
|
||||
opt = new OptionsBuilder()
|
||||
.include(TanhPerfConstant.class.getSimpleName())
|
||||
.build();
|
||||
|
||||
new Runner(opt).run();
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user