8202326: AARCH64: optimize string compare intrinsic

Reviewed-by: dsamersoff
This commit is contained in:
Dmitrij Pochepko 2018-06-25 16:31:37 +03:00
parent 703073a564
commit 97aa261f24
6 changed files with 524 additions and 88 deletions

View File

@ -15852,70 +15852,76 @@ instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp,
%}
instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %}
ins_encode %{
// Count is in 8-bit bytes; non-Compact chars are 16 bits.
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$Register,
fnoreg, fnoreg, StrIntrinsicNode::UU);
$tmp1$$Register, $tmp2$$Register,
fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
%}
ins_pipe(pipe_class_memory);
%}
instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %}
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$Register,
fnoreg, fnoreg, StrIntrinsicNode::LL);
$tmp1$$Register, $tmp2$$Register,
fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
%}
ins_pipe(pipe_class_memory);
%}
instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %}
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$Register,
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
$tmp1$$Register, $tmp2$$Register,
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
$vtmp3$$FloatRegister, StrIntrinsicNode::UL);
%}
ins_pipe(pipe_class_memory);
%}
instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
%{
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %}
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
ins_encode %{
__ string_compare($str1$$Register, $str2$$Register,
$cnt1$$Register, $cnt2$$Register, $result$$Register,
$tmp1$$Register,
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
$tmp1$$Register, $tmp2$$Register,
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
$vtmp3$$FloatRegister,StrIntrinsicNode::LU);
%}
ins_pipe(pipe_class_memory);
%}

View File

@ -4733,12 +4733,13 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
// Compare strings.
void MacroAssembler::string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
Register tmp1,
FloatRegister vtmp, FloatRegister vtmpZ, int ae) {
Label LENGTH_DIFF, DONE, SHORT_LOOP, SHORT_STRING,
NEXT_WORD, DIFFERENCE;
Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
SHORT_LOOP_START, TAIL_CHECK;
const int STUB_THRESHOLD = 64 + 8;
bool isLL = ae == StrIntrinsicNode::LL;
bool isLU = ae == StrIntrinsicNode::LU;
bool isUL = ae == StrIntrinsicNode::UL;
@ -4750,7 +4751,9 @@ void MacroAssembler::string_compare(Register str1, Register str2,
int str2_chr_shift = str2_isL ? 0 : 1;
int str1_chr_size = str1_isL ? 1 : 2;
int str2_chr_size = str2_isL ? 1 : 2;
int minCharsInWord = isLL ? wordSize : wordSize/2;
FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;
chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
(chr_insn)&MacroAssembler::ldrh;
chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
@ -4766,73 +4769,116 @@ void MacroAssembler::string_compare(Register str1, Register str2,
if (!str2_isL) asrw(cnt2, cnt2, 1);
// Compute the minimum of the string lengths and save the difference.
subsw(tmp1, cnt1, cnt2);
subsw(result, cnt1, cnt2);
cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
// A very short string
cmpw(cnt2, isLL ? 8:4);
cmpw(cnt2, minCharsInWord);
br(Assembler::LT, SHORT_STRING);
// Check if the strings start at the same location.
cmp(str1, str2);
br(Assembler::EQ, LENGTH_DIFF);
// Compare longwords
// load first parts of strings and finish initialization while loading
{
subw(cnt2, cnt2, isLL ? 8:4); // The last longword is a special case
// Move both string pointers to the last longword of their
// strings, negate the remaining count, and convert it to bytes.
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
if (isLU || isUL) {
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
if (str1_isL == str2_isL) { // LL or UU
ldr(tmp1, Address(str1));
cmp(str1, str2);
br(Assembler::EQ, DONE);
ldr(tmp2, Address(str2));
cmp(cnt2, STUB_THRESHOLD);
br(GE, STUB);
subsw(cnt2, cnt2, minCharsInWord);
br(EQ, TAIL_CHECK);
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
} else if (isLU) {
ldrs(vtmp, Address(str1));
cmp(str1, str2);
br(Assembler::EQ, DONE);
ldr(tmp2, Address(str2));
cmp(cnt2, STUB_THRESHOLD);
br(GE, STUB);
subsw(cnt2, cnt2, 4);
br(EQ, TAIL_CHECK);
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
}
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
// Loop, loading longwords and comparing them into rscratch2.
bind(NEXT_WORD);
if (isLU) {
ldrs(vtmp, Address(str1, cnt1));
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
zip1(vtmp, T8B, vtmp, vtmpZ);
umov(result, vtmp, D, 0);
} else {
ldr(result, Address(str1, isUL ? cnt1:cnt2));
}
if (isUL) {
ldrs(vtmp, Address(str2, cnt2));
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
add(cnt1, cnt1, 4);
fmovd(tmp1, vtmp);
} else { // UL case
ldr(tmp1, Address(str1));
cmp(str1, str2);
br(Assembler::EQ, DONE);
ldrs(vtmp, Address(str2));
cmp(cnt2, STUB_THRESHOLD);
br(GE, STUB);
subsw(cnt2, cnt2, 4);
br(EQ, TAIL_CHECK);
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
zip1(vtmp, T8B, vtmp, vtmpZ);
umov(rscratch1, vtmp, D, 0);
} else {
ldr(rscratch1, Address(str2, cnt2));
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
add(cnt1, cnt1, 8);
fmovd(tmp2, vtmp);
}
adds(cnt2, cnt2, isUL ? 4:8);
if (isLU || isUL) add(cnt1, cnt1, isLU ? 4:8);
eor(rscratch2, result, rscratch1);
adds(cnt2, cnt2, isUL ? 4 : 8);
br(GE, TAIL);
eor(rscratch2, tmp1, tmp2);
cbnz(rscratch2, DIFFERENCE);
br(Assembler::LT, NEXT_WORD);
// main loop
bind(NEXT_WORD);
if (str1_isL == str2_isL) {
ldr(tmp1, Address(str1, cnt2));
ldr(tmp2, Address(str2, cnt2));
adds(cnt2, cnt2, 8);
} else if (isLU) {
ldrs(vtmp, Address(str1, cnt1));
ldr(tmp2, Address(str2, cnt2));
add(cnt1, cnt1, 4);
zip1(vtmp, T8B, vtmp, vtmpZ);
fmovd(tmp1, vtmp);
adds(cnt2, cnt2, 8);
} else { // UL
ldrs(vtmp, Address(str2, cnt2));
ldr(tmp1, Address(str1, cnt1));
zip1(vtmp, T8B, vtmp, vtmpZ);
add(cnt1, cnt1, 8);
fmovd(tmp2, vtmp);
adds(cnt2, cnt2, 4);
}
br(GE, TAIL);
eor(rscratch2, tmp1, tmp2);
cbz(rscratch2, NEXT_WORD);
b(DIFFERENCE);
bind(TAIL);
eor(rscratch2, tmp1, tmp2);
cbnz(rscratch2, DIFFERENCE);
// Last longword. In the case where length == 4 we compare the
// same longword twice, but that's still faster than another
// conditional branch.
if (isLU) {
if (str1_isL == str2_isL) {
ldr(tmp1, Address(str1));
ldr(tmp2, Address(str2));
} else if (isLU) {
ldrs(vtmp, Address(str1));
ldr(tmp2, Address(str2));
zip1(vtmp, T8B, vtmp, vtmpZ);
umov(result, vtmp, D, 0);
} else {
ldr(result, Address(str1));
}
if (isUL) {
fmovd(tmp1, vtmp);
} else { // UL
ldrs(vtmp, Address(str2));
ldr(tmp1, Address(str1));
zip1(vtmp, T8B, vtmp, vtmpZ);
umov(rscratch1, vtmp, D, 0);
} else {
ldr(rscratch1, Address(str2));
fmovd(tmp2, vtmp);
}
eor(rscratch2, result, rscratch1);
cbz(rscratch2, LENGTH_DIFF);
bind(TAIL_CHECK);
eor(rscratch2, tmp1, tmp2);
cbz(rscratch2, DONE);
// Find the first different characters in the longwords and
// compute their difference.
@ -4840,31 +4886,78 @@ void MacroAssembler::string_compare(Register str1, Register str2,
rev(rscratch2, rscratch2);
clz(rscratch2, rscratch2);
andr(rscratch2, rscratch2, isLL ? -8 : -16);
lsrv(result, result, rscratch2);
(this->*ext_chr)(result, result);
lsrv(rscratch1, rscratch1, rscratch2);
(this->*ext_chr)(rscratch1, rscratch1);
subw(result, result, rscratch1);
lsrv(tmp1, tmp1, rscratch2);
(this->*ext_chr)(tmp1, tmp1);
lsrv(tmp2, tmp2, rscratch2);
(this->*ext_chr)(tmp2, tmp2);
subw(result, tmp1, tmp2);
b(DONE);
}
bind(STUB);
RuntimeAddress stub = NULL;
switch(ae) {
case StrIntrinsicNode::LL:
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());
break;
case StrIntrinsicNode::UU:
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());
break;
case StrIntrinsicNode::LU:
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());
break;
case StrIntrinsicNode::UL:
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());
break;
default:
ShouldNotReachHere();
}
assert(stub.target() != NULL, "compare_long_string stub has not been generated");
trampoline_call(stub);
b(DONE);
bind(SHORT_STRING);
// Is the minimum length zero?
cbz(cnt2, LENGTH_DIFF);
bind(SHORT_LOOP);
(this->*str1_load_chr)(result, Address(post(str1, str1_chr_size)));
cbz(cnt2, DONE);
// arrange code to do most branches while loading and loading next characters
// while comparing previous
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
subs(cnt2, cnt2, 1);
br(EQ, SHORT_LAST_INIT);
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
subw(result, result, cnt1);
cbnz(result, DONE);
sub(cnt2, cnt2, 1);
cbnz(cnt2, SHORT_LOOP);
b(SHORT_LOOP_START);
bind(SHORT_LOOP);
subs(cnt2, cnt2, 1);
br(EQ, SHORT_LAST);
bind(SHORT_LOOP_START);
(this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));
(this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));
cmp(tmp1, cnt1);
br(NE, SHORT_LOOP_TAIL);
subs(cnt2, cnt2, 1);
br(EQ, SHORT_LAST2);
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
cmp(tmp2, rscratch1);
br(EQ, SHORT_LOOP);
sub(result, tmp2, rscratch1);
b(DONE);
bind(SHORT_LOOP_TAIL);
sub(result, tmp1, cnt1);
b(DONE);
bind(SHORT_LAST2);
cmp(tmp2, rscratch1);
br(EQ, DONE);
sub(result, tmp2, rscratch1);
// Strings are equal up to min length. Return the length difference.
bind(LENGTH_DIFF);
mov(result, tmp1);
b(DONE);
bind(SHORT_LAST_INIT);
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
bind(SHORT_LAST);
cmp(tmp1, cnt1);
br(EQ, DONE);
sub(result, tmp1, cnt1);
// That's it
bind(DONE);
BLOCK_COMMENT("} string_compare");

View File

@ -1212,8 +1212,8 @@ public:
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,
Register tmp1,
FloatRegister vtmp, FloatRegister vtmpZ, int ae);
Register tmp1, Register tmp2, FloatRegister vtmp1,
FloatRegister vtmp2, FloatRegister vtmp3, int ae);
void has_negatives(Register ary1, Register len, Register result);

View File

@ -4014,6 +4014,317 @@ class StubGenerator: public StubCodeGenerator {
return entry;
}
// code for comparing 16 bytes of strings with same encoding
void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, tmp1 = r10, tmp2 = r11;
__ ldr(rscratch1, Address(__ post(str1, 8)));
__ eor(rscratch2, tmp1, tmp2);
__ ldr(cnt1, Address(__ post(str2, 8)));
__ cbnz(rscratch2, DIFF1);
__ ldr(tmp1, Address(__ post(str1, 8)));
__ eor(rscratch2, rscratch1, cnt1);
__ ldr(tmp2, Address(__ post(str2, 8)));
__ cbnz(rscratch2, DIFF2);
}
// code for comparing 16 characters of strings with Latin1 and Utf16 encoding
void compare_string_16_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
Label &DIFF2) {
Register cnt1 = r2, tmp1 = r10, tmp2 = r11, tmp3 = r12;
FloatRegister vtmp = v1, vtmpZ = v0, vtmp3 = v2;
__ ldrq(vtmp, Address(__ post(tmp2, 16)));
__ ldr(tmpU, Address(__ post(cnt1, 8)));
__ zip1(vtmp3, __ T16B, vtmp, vtmpZ);
// now we have 32 bytes of characters (converted to U) in vtmp:vtmp3
__ fmovd(tmpL, vtmp3);
__ eor(rscratch2, tmp3, tmpL);
__ cbnz(rscratch2, DIFF2);
__ ldr(tmp3, Address(__ post(cnt1, 8)));
__ umov(tmpL, vtmp3, __ D, 1);
__ eor(rscratch2, tmpU, tmpL);
__ cbnz(rscratch2, DIFF1);
__ zip2(vtmp, __ T16B, vtmp, vtmpZ);
__ ldr(tmpU, Address(__ post(cnt1, 8)));
__ fmovd(tmpL, vtmp);
__ eor(rscratch2, tmp3, tmpL);
__ cbnz(rscratch2, DIFF2);
__ ldr(tmp3, Address(__ post(cnt1, 8)));
__ umov(tmpL, vtmp, __ D, 1);
__ eor(rscratch2, tmpU, tmpL);
__ cbnz(rscratch2, DIFF1);
}
// r0 = result
// r1 = str1
// r2 = cnt1
// r3 = str2
// r4 = cnt2
// r10 = tmp1
// r11 = tmp2
address generate_compare_long_string_different_encoding(bool isLU) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", isLU
? "compare_long_string_different_encoding LU"
: "compare_long_string_different_encoding UL");
address entry = __ pc();
Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
DONE, CALCULATE_DIFFERENCE, LARGE_LOOP_PREFETCH, SMALL_LOOP_ENTER,
LARGE_LOOP_PREFETCH_REPEAT1, LARGE_LOOP_PREFETCH_REPEAT2;
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
tmp1 = r10, tmp2 = r11, tmp3 = r12, tmp4 = r14;
FloatRegister vtmpZ = v0, vtmp = v1, vtmp3 = v2;
RegSet spilled_regs = RegSet::of(tmp3, tmp4);
int prefetchLoopExitCondition = MAX(32, SoftwarePrefetchHintDistance/2);
__ eor(vtmpZ, __ T16B, vtmpZ, vtmpZ);
// cnt2 == amount of characters left to compare
// Check already loaded first 4 symbols(vtmp and tmp2(LU)/tmp1(UL))
__ zip1(vtmp, __ T8B, vtmp, vtmpZ);
__ add(str1, str1, isLU ? wordSize/2 : wordSize);
__ add(str2, str2, isLU ? wordSize : wordSize/2);
__ fmovd(isLU ? tmp1 : tmp2, vtmp);
__ subw(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
__ add(str1, str1, cnt2, __ LSL, isLU ? 0 : 1);
__ eor(rscratch2, tmp1, tmp2);
__ add(str2, str2, cnt2, __ LSL, isLU ? 1 : 0);
__ mov(rscratch1, tmp2);
__ cbnz(rscratch2, CALCULATE_DIFFERENCE);
Register strU = isLU ? str2 : str1,
strL = isLU ? str1 : str2,
tmpU = isLU ? rscratch1 : tmp1, // where to keep U for comparison
tmpL = isLU ? tmp1 : rscratch1; // where to keep L for comparison
__ push(spilled_regs, sp);
__ sub(tmp2, strL, cnt2); // strL pointer to load from
__ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from
__ ldr(tmp3, Address(__ post(cnt1, 8)));
if (SoftwarePrefetchHintDistance >= 0) {
__ cmp(cnt2, prefetchLoopExitCondition);
__ br(__ LT, SMALL_LOOP);
__ bind(LARGE_LOOP_PREFETCH);
__ prfm(Address(tmp2, SoftwarePrefetchHintDistance));
__ mov(tmp4, 2);
__ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
__ bind(LARGE_LOOP_PREFETCH_REPEAT1);
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
__ subs(tmp4, tmp4, 1);
__ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT1);
__ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
__ mov(tmp4, 2);
__ bind(LARGE_LOOP_PREFETCH_REPEAT2);
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
__ subs(tmp4, tmp4, 1);
__ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2);
__ sub(cnt2, cnt2, 64);
__ cmp(cnt2, prefetchLoopExitCondition);
__ br(__ GE, LARGE_LOOP_PREFETCH);
}
__ cbz(cnt2, LOAD_LAST); // no characters left except last load
__ subs(cnt2, cnt2, 16);
__ br(__ LT, TAIL);
__ b(SMALL_LOOP_ENTER);
__ bind(SMALL_LOOP); // smaller loop
__ subs(cnt2, cnt2, 16);
__ bind(SMALL_LOOP_ENTER);
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
__ br(__ GE, SMALL_LOOP);
__ cbz(cnt2, LOAD_LAST);
__ bind(TAIL); // 1..15 characters left
__ cmp(cnt2, -8);
__ br(__ GT, TAIL_LOAD_16);
__ ldrd(vtmp, Address(tmp2));
__ zip1(vtmp3, __ T8B, vtmp, vtmpZ);
__ ldr(tmpU, Address(__ post(cnt1, 8)));
__ fmovd(tmpL, vtmp3);
__ eor(rscratch2, tmp3, tmpL);
__ cbnz(rscratch2, DIFF2);
__ umov(tmpL, vtmp3, __ D, 1);
__ eor(rscratch2, tmpU, tmpL);
__ cbnz(rscratch2, DIFF1);
__ b(LOAD_LAST);
__ bind(TAIL_LOAD_16);
__ ldrq(vtmp, Address(tmp2));
__ ldr(tmpU, Address(__ post(cnt1, 8)));
__ zip1(vtmp3, __ T16B, vtmp, vtmpZ);
__ zip2(vtmp, __ T16B, vtmp, vtmpZ);
__ fmovd(tmpL, vtmp3);
__ eor(rscratch2, tmp3, tmpL);
__ cbnz(rscratch2, DIFF2);
__ ldr(tmp3, Address(__ post(cnt1, 8)));
__ umov(tmpL, vtmp3, __ D, 1);
__ eor(rscratch2, tmpU, tmpL);
__ cbnz(rscratch2, DIFF1);
__ ldr(tmpU, Address(__ post(cnt1, 8)));
__ fmovd(tmpL, vtmp);
__ eor(rscratch2, tmp3, tmpL);
__ cbnz(rscratch2, DIFF2);
__ umov(tmpL, vtmp, __ D, 1);
__ eor(rscratch2, tmpU, tmpL);
__ cbnz(rscratch2, DIFF1);
__ b(LOAD_LAST);
__ bind(DIFF2);
__ mov(tmpU, tmp3);
__ bind(DIFF1);
__ pop(spilled_regs, sp);
__ b(CALCULATE_DIFFERENCE);
__ bind(LOAD_LAST);
__ pop(spilled_regs, sp);
__ ldrs(vtmp, Address(strL));
__ ldr(tmpU, Address(strU));
__ zip1(vtmp, __ T8B, vtmp, vtmpZ);
__ fmovd(tmpL, vtmp);
__ eor(rscratch2, tmpU, tmpL);
__ cbz(rscratch2, DONE);
// Find the first different characters in the longwords and
// compute their difference.
__ bind(CALCULATE_DIFFERENCE);
__ rev(rscratch2, rscratch2);
__ clz(rscratch2, rscratch2);
__ andr(rscratch2, rscratch2, -16);
__ lsrv(tmp1, tmp1, rscratch2);
__ uxthw(tmp1, tmp1);
__ lsrv(rscratch1, rscratch1, rscratch2);
__ uxthw(rscratch1, rscratch1);
__ subw(result, tmp1, rscratch1);
__ bind(DONE);
__ ret(lr);
return entry;
}
// r0 = result
// r1 = str1
// r2 = cnt1
// r3 = str2
// r4 = cnt2
// r10 = tmp1
// r11 = tmp2
address generate_compare_long_string_same_encoding(bool isLL) {
__ align(CodeEntryAlignment);
StubCodeMark mark(this, "StubRoutines", isLL
? "compare_long_string_same_encoding LL"
: "compare_long_string_same_encoding UU");
address entry = __ pc();
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
tmp1 = r10, tmp2 = r11;
Label SMALL_LOOP, LARGE_LOOP_PREFETCH, CHECK_LAST, DIFF2, TAIL,
LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF,
DIFF_LAST_POSITION, DIFF_LAST_POSITION2;
// exit from large loop when less than 64 bytes left to read or we're about
// to prefetch memory behind array border
int largeLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2);
// cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
// update cnt2 counter with already loaded 8 bytes
__ sub(cnt2, cnt2, wordSize/(isLL ? 1 : 2));
// update pointers, because of previous read
__ add(str1, str1, wordSize);
__ add(str2, str2, wordSize);
if (SoftwarePrefetchHintDistance >= 0) {
__ bind(LARGE_LOOP_PREFETCH);
__ prfm(Address(str1, SoftwarePrefetchHintDistance));
__ prfm(Address(str2, SoftwarePrefetchHintDistance));
compare_string_16_bytes_same(DIFF, DIFF2);
compare_string_16_bytes_same(DIFF, DIFF2);
__ sub(cnt2, cnt2, isLL ? 64 : 32);
compare_string_16_bytes_same(DIFF, DIFF2);
__ cmp(cnt2, largeLoopExitCondition);
compare_string_16_bytes_same(DIFF, DIFF2);
__ br(__ GT, LARGE_LOOP_PREFETCH);
__ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?
// less than 16 bytes left?
__ subs(cnt2, cnt2, isLL ? 16 : 8);
__ br(__ LT, TAIL);
}
__ bind(SMALL_LOOP);
compare_string_16_bytes_same(DIFF, DIFF2);
__ subs(cnt2, cnt2, isLL ? 16 : 8);
__ br(__ GE, SMALL_LOOP);
__ bind(TAIL);
__ adds(cnt2, cnt2, isLL ? 16 : 8);
__ br(__ EQ, LAST_CHECK_AND_LENGTH_DIFF);
__ subs(cnt2, cnt2, isLL ? 8 : 4);
__ br(__ LE, CHECK_LAST);
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, DIFF);
__ ldr(tmp1, Address(__ post(str1, 8)));
__ ldr(tmp2, Address(__ post(str2, 8)));
__ sub(cnt2, cnt2, isLL ? 8 : 4);
__ bind(CHECK_LAST);
if (!isLL) {
__ add(cnt2, cnt2, cnt2); // now in bytes
}
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, DIFF);
__ ldr(rscratch1, Address(str1, cnt2));
__ ldr(cnt1, Address(str2, cnt2));
__ eor(rscratch2, rscratch1, cnt1);
__ cbz(rscratch2, LENGTH_DIFF);
// Find the first different characters in the longwords and
// compute their difference.
__ bind(DIFF2);
__ rev(rscratch2, rscratch2);
__ clz(rscratch2, rscratch2);
__ andr(rscratch2, rscratch2, isLL ? -8 : -16);
__ lsrv(rscratch1, rscratch1, rscratch2);
if (isLL) {
__ lsrv(cnt1, cnt1, rscratch2);
__ uxtbw(rscratch1, rscratch1);
__ uxtbw(cnt1, cnt1);
} else {
__ lsrv(cnt1, cnt1, rscratch2);
__ uxthw(rscratch1, rscratch1);
__ uxthw(cnt1, cnt1);
}
__ subw(result, rscratch1, cnt1);
__ b(LENGTH_DIFF);
__ bind(DIFF);
__ rev(rscratch2, rscratch2);
__ clz(rscratch2, rscratch2);
__ andr(rscratch2, rscratch2, isLL ? -8 : -16);
__ lsrv(tmp1, tmp1, rscratch2);
if (isLL) {
__ lsrv(tmp2, tmp2, rscratch2);
__ uxtbw(tmp1, tmp1);
__ uxtbw(tmp2, tmp2);
} else {
__ lsrv(tmp2, tmp2, rscratch2);
__ uxthw(tmp1, tmp1);
__ uxthw(tmp2, tmp2);
}
__ subw(result, tmp1, tmp2);
__ b(LENGTH_DIFF);
__ bind(LAST_CHECK_AND_LENGTH_DIFF);
__ eor(rscratch2, tmp1, tmp2);
__ cbnz(rscratch2, DIFF);
__ bind(LENGTH_DIFF);
__ ret(lr);
return entry;
}
void generate_compare_long_strings() {
StubRoutines::aarch64::_compare_long_string_LL
= generate_compare_long_string_same_encoding(true);
StubRoutines::aarch64::_compare_long_string_UU
= generate_compare_long_string_same_encoding(false);
StubRoutines::aarch64::_compare_long_string_LU
= generate_compare_long_string_different_encoding(true);
StubRoutines::aarch64::_compare_long_string_UL
= generate_compare_long_string_different_encoding(false);
}
/**
* Arguments:
*
@ -5113,6 +5424,8 @@ class StubGenerator: public StubCodeGenerator {
StubRoutines::aarch64::_large_array_equals = generate_large_array_equals();
}
generate_compare_long_strings();
if (UseMultiplyToLenIntrinsic) {
StubRoutines::_multiplyToLen = generate_multiplyToLen();
}

View File

@ -48,6 +48,10 @@ address StubRoutines::aarch64::_zero_blocks = NULL;
address StubRoutines::aarch64::_has_negatives = NULL;
address StubRoutines::aarch64::_has_negatives_long = NULL;
address StubRoutines::aarch64::_large_array_equals = NULL;
address StubRoutines::aarch64::_compare_long_string_LL = NULL;
address StubRoutines::aarch64::_compare_long_string_UU = NULL;
address StubRoutines::aarch64::_compare_long_string_LU = NULL;
address StubRoutines::aarch64::_compare_long_string_UL = NULL;
bool StubRoutines::aarch64::_completed = false;
/**

View File

@ -66,6 +66,10 @@ class aarch64 {
static address _has_negatives;
static address _has_negatives_long;
static address _large_array_equals;
static address _compare_long_string_LL;
static address _compare_long_string_LU;
static address _compare_long_string_UL;
static address _compare_long_string_UU;
static bool _completed;
public:
@ -136,6 +140,22 @@ class aarch64 {
return _large_array_equals;
}
static address compare_long_string_LL() {
return _compare_long_string_LL;
}
static address compare_long_string_LU() {
return _compare_long_string_LU;
}
static address compare_long_string_UL() {
return _compare_long_string_UL;
}
static address compare_long_string_UU() {
return _compare_long_string_UU;
}
static bool complete() {
return _completed;
}