8202326: AARCH64: optimize string compare intrinsic
Reviewed-by: dsamersoff
This commit is contained in:
parent
703073a564
commit
97aa261f24
@ -15852,70 +15852,76 @@ instruct partialSubtypeCheckVsZero(iRegP_R4 sub, iRegP_R0 super, iRegP_R2 temp,
|
||||
%}
|
||||
|
||||
instruct string_compareU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
|
||||
iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
|
||||
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %}
|
||||
ins_encode %{
|
||||
// Count is in 8-bit bytes; non-Compact chars are 16 bits.
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$Register,
|
||||
fnoreg, fnoreg, StrIntrinsicNode::UU);
|
||||
$tmp1$$Register, $tmp2$$Register,
|
||||
fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct string_compareL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
|
||||
iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
|
||||
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %}
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$Register,
|
||||
fnoreg, fnoreg, StrIntrinsicNode::LL);
|
||||
$tmp1$$Register, $tmp2$$Register,
|
||||
fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct string_compareUL(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
|
||||
iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
|
||||
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
|
||||
vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
|
||||
effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
|
||||
USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %}
|
||||
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$Register,
|
||||
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::UL);
|
||||
$tmp1$$Register, $tmp2$$Register,
|
||||
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
|
||||
$vtmp3$$FloatRegister, StrIntrinsicNode::UL);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
||||
instruct string_compareLU(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
|
||||
iRegI_R0 result, vRegD vtmp1, vRegD vtmp2, iRegP_R10 tmp1, rFlagsReg cr)
|
||||
iRegI_R0 result, iRegP_R10 tmp1, iRegL_R11 tmp2,
|
||||
vRegD_V0 vtmp1, vRegD_V1 vtmp2, vRegD_V2 vtmp3, rFlagsReg cr)
|
||||
%{
|
||||
predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
|
||||
match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
|
||||
effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, TEMP vtmp1, TEMP vtmp2, KILL cr);
|
||||
effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
|
||||
USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
|
||||
|
||||
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1" %}
|
||||
format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
|
||||
ins_encode %{
|
||||
__ string_compare($str1$$Register, $str2$$Register,
|
||||
$cnt1$$Register, $cnt2$$Register, $result$$Register,
|
||||
$tmp1$$Register,
|
||||
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister, StrIntrinsicNode::LU);
|
||||
$tmp1$$Register, $tmp2$$Register,
|
||||
$vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
|
||||
$vtmp3$$FloatRegister,StrIntrinsicNode::LU);
|
||||
%}
|
||||
ins_pipe(pipe_class_memory);
|
||||
%}
|
||||
|
@ -4733,12 +4733,13 @@ void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
|
||||
|
||||
// Compare strings.
|
||||
void MacroAssembler::string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
Register tmp1,
|
||||
FloatRegister vtmp, FloatRegister vtmpZ, int ae) {
|
||||
Label LENGTH_DIFF, DONE, SHORT_LOOP, SHORT_STRING,
|
||||
NEXT_WORD, DIFFERENCE;
|
||||
Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
|
||||
FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
|
||||
Label DONE, SHORT_LOOP, SHORT_STRING, SHORT_LAST, TAIL, STUB,
|
||||
DIFFERENCE, NEXT_WORD, SHORT_LOOP_TAIL, SHORT_LAST2, SHORT_LAST_INIT,
|
||||
SHORT_LOOP_START, TAIL_CHECK;
|
||||
|
||||
const int STUB_THRESHOLD = 64 + 8;
|
||||
bool isLL = ae == StrIntrinsicNode::LL;
|
||||
bool isLU = ae == StrIntrinsicNode::LU;
|
||||
bool isUL = ae == StrIntrinsicNode::UL;
|
||||
@ -4750,7 +4751,9 @@ void MacroAssembler::string_compare(Register str1, Register str2,
|
||||
int str2_chr_shift = str2_isL ? 0 : 1;
|
||||
int str1_chr_size = str1_isL ? 1 : 2;
|
||||
int str2_chr_size = str2_isL ? 1 : 2;
|
||||
int minCharsInWord = isLL ? wordSize : wordSize/2;
|
||||
|
||||
FloatRegister vtmpZ = vtmp1, vtmp = vtmp2;
|
||||
chr_insn str1_load_chr = str1_isL ? (chr_insn)&MacroAssembler::ldrb :
|
||||
(chr_insn)&MacroAssembler::ldrh;
|
||||
chr_insn str2_load_chr = str2_isL ? (chr_insn)&MacroAssembler::ldrb :
|
||||
@ -4766,73 +4769,116 @@ void MacroAssembler::string_compare(Register str1, Register str2,
|
||||
if (!str2_isL) asrw(cnt2, cnt2, 1);
|
||||
|
||||
// Compute the minimum of the string lengths and save the difference.
|
||||
subsw(tmp1, cnt1, cnt2);
|
||||
subsw(result, cnt1, cnt2);
|
||||
cselw(cnt2, cnt1, cnt2, Assembler::LE); // min
|
||||
|
||||
// A very short string
|
||||
cmpw(cnt2, isLL ? 8:4);
|
||||
cmpw(cnt2, minCharsInWord);
|
||||
br(Assembler::LT, SHORT_STRING);
|
||||
|
||||
// Check if the strings start at the same location.
|
||||
cmp(str1, str2);
|
||||
br(Assembler::EQ, LENGTH_DIFF);
|
||||
|
||||
// Compare longwords
|
||||
// load first parts of strings and finish initialization while loading
|
||||
{
|
||||
subw(cnt2, cnt2, isLL ? 8:4); // The last longword is a special case
|
||||
|
||||
// Move both string pointers to the last longword of their
|
||||
// strings, negate the remaining count, and convert it to bytes.
|
||||
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
|
||||
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
|
||||
if (isLU || isUL) {
|
||||
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
|
||||
if (str1_isL == str2_isL) { // LL or UU
|
||||
ldr(tmp1, Address(str1));
|
||||
cmp(str1, str2);
|
||||
br(Assembler::EQ, DONE);
|
||||
ldr(tmp2, Address(str2));
|
||||
cmp(cnt2, STUB_THRESHOLD);
|
||||
br(GE, STUB);
|
||||
subsw(cnt2, cnt2, minCharsInWord);
|
||||
br(EQ, TAIL_CHECK);
|
||||
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
|
||||
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
|
||||
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
|
||||
} else if (isLU) {
|
||||
ldrs(vtmp, Address(str1));
|
||||
cmp(str1, str2);
|
||||
br(Assembler::EQ, DONE);
|
||||
ldr(tmp2, Address(str2));
|
||||
cmp(cnt2, STUB_THRESHOLD);
|
||||
br(GE, STUB);
|
||||
subsw(cnt2, cnt2, 4);
|
||||
br(EQ, TAIL_CHECK);
|
||||
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
|
||||
}
|
||||
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
|
||||
|
||||
// Loop, loading longwords and comparing them into rscratch2.
|
||||
bind(NEXT_WORD);
|
||||
if (isLU) {
|
||||
ldrs(vtmp, Address(str1, cnt1));
|
||||
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
|
||||
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
umov(result, vtmp, D, 0);
|
||||
} else {
|
||||
ldr(result, Address(str1, isUL ? cnt1:cnt2));
|
||||
}
|
||||
if (isUL) {
|
||||
ldrs(vtmp, Address(str2, cnt2));
|
||||
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
|
||||
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
|
||||
add(cnt1, cnt1, 4);
|
||||
fmovd(tmp1, vtmp);
|
||||
} else { // UL case
|
||||
ldr(tmp1, Address(str1));
|
||||
cmp(str1, str2);
|
||||
br(Assembler::EQ, DONE);
|
||||
ldrs(vtmp, Address(str2));
|
||||
cmp(cnt2, STUB_THRESHOLD);
|
||||
br(GE, STUB);
|
||||
subsw(cnt2, cnt2, 4);
|
||||
br(EQ, TAIL_CHECK);
|
||||
lea(str1, Address(str1, cnt2, Address::uxtw(str1_chr_shift)));
|
||||
eor(vtmpZ, T16B, vtmpZ, vtmpZ);
|
||||
lea(str2, Address(str2, cnt2, Address::uxtw(str2_chr_shift)));
|
||||
sub(cnt1, zr, cnt2, LSL, str1_chr_shift);
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
umov(rscratch1, vtmp, D, 0);
|
||||
} else {
|
||||
ldr(rscratch1, Address(str2, cnt2));
|
||||
sub(cnt2, zr, cnt2, LSL, str2_chr_shift);
|
||||
add(cnt1, cnt1, 8);
|
||||
fmovd(tmp2, vtmp);
|
||||
}
|
||||
adds(cnt2, cnt2, isUL ? 4:8);
|
||||
if (isLU || isUL) add(cnt1, cnt1, isLU ? 4:8);
|
||||
eor(rscratch2, result, rscratch1);
|
||||
adds(cnt2, cnt2, isUL ? 4 : 8);
|
||||
br(GE, TAIL);
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbnz(rscratch2, DIFFERENCE);
|
||||
br(Assembler::LT, NEXT_WORD);
|
||||
// main loop
|
||||
bind(NEXT_WORD);
|
||||
if (str1_isL == str2_isL) {
|
||||
ldr(tmp1, Address(str1, cnt2));
|
||||
ldr(tmp2, Address(str2, cnt2));
|
||||
adds(cnt2, cnt2, 8);
|
||||
} else if (isLU) {
|
||||
ldrs(vtmp, Address(str1, cnt1));
|
||||
ldr(tmp2, Address(str2, cnt2));
|
||||
add(cnt1, cnt1, 4);
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
fmovd(tmp1, vtmp);
|
||||
adds(cnt2, cnt2, 8);
|
||||
} else { // UL
|
||||
ldrs(vtmp, Address(str2, cnt2));
|
||||
ldr(tmp1, Address(str1, cnt1));
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
add(cnt1, cnt1, 8);
|
||||
fmovd(tmp2, vtmp);
|
||||
adds(cnt2, cnt2, 4);
|
||||
}
|
||||
br(GE, TAIL);
|
||||
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbz(rscratch2, NEXT_WORD);
|
||||
b(DIFFERENCE);
|
||||
bind(TAIL);
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbnz(rscratch2, DIFFERENCE);
|
||||
// Last longword. In the case where length == 4 we compare the
|
||||
// same longword twice, but that's still faster than another
|
||||
// conditional branch.
|
||||
|
||||
if (isLU) {
|
||||
if (str1_isL == str2_isL) {
|
||||
ldr(tmp1, Address(str1));
|
||||
ldr(tmp2, Address(str2));
|
||||
} else if (isLU) {
|
||||
ldrs(vtmp, Address(str1));
|
||||
ldr(tmp2, Address(str2));
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
umov(result, vtmp, D, 0);
|
||||
} else {
|
||||
ldr(result, Address(str1));
|
||||
}
|
||||
if (isUL) {
|
||||
fmovd(tmp1, vtmp);
|
||||
} else { // UL
|
||||
ldrs(vtmp, Address(str2));
|
||||
ldr(tmp1, Address(str1));
|
||||
zip1(vtmp, T8B, vtmp, vtmpZ);
|
||||
umov(rscratch1, vtmp, D, 0);
|
||||
} else {
|
||||
ldr(rscratch1, Address(str2));
|
||||
fmovd(tmp2, vtmp);
|
||||
}
|
||||
eor(rscratch2, result, rscratch1);
|
||||
cbz(rscratch2, LENGTH_DIFF);
|
||||
bind(TAIL_CHECK);
|
||||
eor(rscratch2, tmp1, tmp2);
|
||||
cbz(rscratch2, DONE);
|
||||
|
||||
// Find the first different characters in the longwords and
|
||||
// compute their difference.
|
||||
@ -4840,31 +4886,78 @@ void MacroAssembler::string_compare(Register str1, Register str2,
|
||||
rev(rscratch2, rscratch2);
|
||||
clz(rscratch2, rscratch2);
|
||||
andr(rscratch2, rscratch2, isLL ? -8 : -16);
|
||||
lsrv(result, result, rscratch2);
|
||||
(this->*ext_chr)(result, result);
|
||||
lsrv(rscratch1, rscratch1, rscratch2);
|
||||
(this->*ext_chr)(rscratch1, rscratch1);
|
||||
subw(result, result, rscratch1);
|
||||
lsrv(tmp1, tmp1, rscratch2);
|
||||
(this->*ext_chr)(tmp1, tmp1);
|
||||
lsrv(tmp2, tmp2, rscratch2);
|
||||
(this->*ext_chr)(tmp2, tmp2);
|
||||
subw(result, tmp1, tmp2);
|
||||
b(DONE);
|
||||
}
|
||||
|
||||
bind(STUB);
|
||||
RuntimeAddress stub = NULL;
|
||||
switch(ae) {
|
||||
case StrIntrinsicNode::LL:
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LL());
|
||||
break;
|
||||
case StrIntrinsicNode::UU:
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UU());
|
||||
break;
|
||||
case StrIntrinsicNode::LU:
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_LU());
|
||||
break;
|
||||
case StrIntrinsicNode::UL:
|
||||
stub = RuntimeAddress(StubRoutines::aarch64::compare_long_string_UL());
|
||||
break;
|
||||
default:
|
||||
ShouldNotReachHere();
|
||||
}
|
||||
assert(stub.target() != NULL, "compare_long_string stub has not been generated");
|
||||
trampoline_call(stub);
|
||||
b(DONE);
|
||||
|
||||
bind(SHORT_STRING);
|
||||
// Is the minimum length zero?
|
||||
cbz(cnt2, LENGTH_DIFF);
|
||||
|
||||
bind(SHORT_LOOP);
|
||||
(this->*str1_load_chr)(result, Address(post(str1, str1_chr_size)));
|
||||
cbz(cnt2, DONE);
|
||||
// arrange code to do most branches while loading and loading next characters
|
||||
// while comparing previous
|
||||
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
|
||||
subs(cnt2, cnt2, 1);
|
||||
br(EQ, SHORT_LAST_INIT);
|
||||
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
|
||||
subw(result, result, cnt1);
|
||||
cbnz(result, DONE);
|
||||
sub(cnt2, cnt2, 1);
|
||||
cbnz(cnt2, SHORT_LOOP);
|
||||
b(SHORT_LOOP_START);
|
||||
bind(SHORT_LOOP);
|
||||
subs(cnt2, cnt2, 1);
|
||||
br(EQ, SHORT_LAST);
|
||||
bind(SHORT_LOOP_START);
|
||||
(this->*str1_load_chr)(tmp2, Address(post(str1, str1_chr_size)));
|
||||
(this->*str2_load_chr)(rscratch1, Address(post(str2, str2_chr_size)));
|
||||
cmp(tmp1, cnt1);
|
||||
br(NE, SHORT_LOOP_TAIL);
|
||||
subs(cnt2, cnt2, 1);
|
||||
br(EQ, SHORT_LAST2);
|
||||
(this->*str1_load_chr)(tmp1, Address(post(str1, str1_chr_size)));
|
||||
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
|
||||
cmp(tmp2, rscratch1);
|
||||
br(EQ, SHORT_LOOP);
|
||||
sub(result, tmp2, rscratch1);
|
||||
b(DONE);
|
||||
bind(SHORT_LOOP_TAIL);
|
||||
sub(result, tmp1, cnt1);
|
||||
b(DONE);
|
||||
bind(SHORT_LAST2);
|
||||
cmp(tmp2, rscratch1);
|
||||
br(EQ, DONE);
|
||||
sub(result, tmp2, rscratch1);
|
||||
|
||||
// Strings are equal up to min length. Return the length difference.
|
||||
bind(LENGTH_DIFF);
|
||||
mov(result, tmp1);
|
||||
b(DONE);
|
||||
bind(SHORT_LAST_INIT);
|
||||
(this->*str2_load_chr)(cnt1, Address(post(str2, str2_chr_size)));
|
||||
bind(SHORT_LAST);
|
||||
cmp(tmp1, cnt1);
|
||||
br(EQ, DONE);
|
||||
sub(result, tmp1, cnt1);
|
||||
|
||||
// That's it
|
||||
bind(DONE);
|
||||
|
||||
BLOCK_COMMENT("} string_compare");
|
||||
|
@ -1212,8 +1212,8 @@ public:
|
||||
|
||||
void string_compare(Register str1, Register str2,
|
||||
Register cnt1, Register cnt2, Register result,
|
||||
Register tmp1,
|
||||
FloatRegister vtmp, FloatRegister vtmpZ, int ae);
|
||||
Register tmp1, Register tmp2, FloatRegister vtmp1,
|
||||
FloatRegister vtmp2, FloatRegister vtmp3, int ae);
|
||||
|
||||
void has_negatives(Register ary1, Register len, Register result);
|
||||
|
||||
|
@ -4014,6 +4014,317 @@ class StubGenerator: public StubCodeGenerator {
|
||||
return entry;
|
||||
}
|
||||
|
||||
// code for comparing 16 bytes of strings with same encoding
|
||||
void compare_string_16_bytes_same(Label &DIFF1, Label &DIFF2) {
|
||||
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, tmp1 = r10, tmp2 = r11;
|
||||
__ ldr(rscratch1, Address(__ post(str1, 8)));
|
||||
__ eor(rscratch2, tmp1, tmp2);
|
||||
__ ldr(cnt1, Address(__ post(str2, 8)));
|
||||
__ cbnz(rscratch2, DIFF1);
|
||||
__ ldr(tmp1, Address(__ post(str1, 8)));
|
||||
__ eor(rscratch2, rscratch1, cnt1);
|
||||
__ ldr(tmp2, Address(__ post(str2, 8)));
|
||||
__ cbnz(rscratch2, DIFF2);
|
||||
}
|
||||
|
||||
// code for comparing 16 characters of strings with Latin1 and Utf16 encoding
|
||||
void compare_string_16_x_LU(Register tmpL, Register tmpU, Label &DIFF1,
|
||||
Label &DIFF2) {
|
||||
Register cnt1 = r2, tmp1 = r10, tmp2 = r11, tmp3 = r12;
|
||||
FloatRegister vtmp = v1, vtmpZ = v0, vtmp3 = v2;
|
||||
|
||||
__ ldrq(vtmp, Address(__ post(tmp2, 16)));
|
||||
__ ldr(tmpU, Address(__ post(cnt1, 8)));
|
||||
__ zip1(vtmp3, __ T16B, vtmp, vtmpZ);
|
||||
// now we have 32 bytes of characters (converted to U) in vtmp:vtmp3
|
||||
|
||||
__ fmovd(tmpL, vtmp3);
|
||||
__ eor(rscratch2, tmp3, tmpL);
|
||||
__ cbnz(rscratch2, DIFF2);
|
||||
|
||||
__ ldr(tmp3, Address(__ post(cnt1, 8)));
|
||||
__ umov(tmpL, vtmp3, __ D, 1);
|
||||
__ eor(rscratch2, tmpU, tmpL);
|
||||
__ cbnz(rscratch2, DIFF1);
|
||||
|
||||
__ zip2(vtmp, __ T16B, vtmp, vtmpZ);
|
||||
__ ldr(tmpU, Address(__ post(cnt1, 8)));
|
||||
__ fmovd(tmpL, vtmp);
|
||||
__ eor(rscratch2, tmp3, tmpL);
|
||||
__ cbnz(rscratch2, DIFF2);
|
||||
|
||||
__ ldr(tmp3, Address(__ post(cnt1, 8)));
|
||||
__ umov(tmpL, vtmp, __ D, 1);
|
||||
__ eor(rscratch2, tmpU, tmpL);
|
||||
__ cbnz(rscratch2, DIFF1);
|
||||
}
|
||||
|
||||
// r0 = result
|
||||
// r1 = str1
|
||||
// r2 = cnt1
|
||||
// r3 = str2
|
||||
// r4 = cnt2
|
||||
// r10 = tmp1
|
||||
// r11 = tmp2
|
||||
address generate_compare_long_string_different_encoding(bool isLU) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", isLU
|
||||
? "compare_long_string_different_encoding LU"
|
||||
: "compare_long_string_different_encoding UL");
|
||||
address entry = __ pc();
|
||||
Label SMALL_LOOP, TAIL, TAIL_LOAD_16, LOAD_LAST, DIFF1, DIFF2,
|
||||
DONE, CALCULATE_DIFFERENCE, LARGE_LOOP_PREFETCH, SMALL_LOOP_ENTER,
|
||||
LARGE_LOOP_PREFETCH_REPEAT1, LARGE_LOOP_PREFETCH_REPEAT2;
|
||||
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
|
||||
tmp1 = r10, tmp2 = r11, tmp3 = r12, tmp4 = r14;
|
||||
FloatRegister vtmpZ = v0, vtmp = v1, vtmp3 = v2;
|
||||
RegSet spilled_regs = RegSet::of(tmp3, tmp4);
|
||||
|
||||
int prefetchLoopExitCondition = MAX(32, SoftwarePrefetchHintDistance/2);
|
||||
|
||||
__ eor(vtmpZ, __ T16B, vtmpZ, vtmpZ);
|
||||
// cnt2 == amount of characters left to compare
|
||||
// Check already loaded first 4 symbols(vtmp and tmp2(LU)/tmp1(UL))
|
||||
__ zip1(vtmp, __ T8B, vtmp, vtmpZ);
|
||||
__ add(str1, str1, isLU ? wordSize/2 : wordSize);
|
||||
__ add(str2, str2, isLU ? wordSize : wordSize/2);
|
||||
__ fmovd(isLU ? tmp1 : tmp2, vtmp);
|
||||
__ subw(cnt2, cnt2, 8); // Already loaded 4 symbols. Last 4 is special case.
|
||||
__ add(str1, str1, cnt2, __ LSL, isLU ? 0 : 1);
|
||||
__ eor(rscratch2, tmp1, tmp2);
|
||||
__ add(str2, str2, cnt2, __ LSL, isLU ? 1 : 0);
|
||||
__ mov(rscratch1, tmp2);
|
||||
__ cbnz(rscratch2, CALCULATE_DIFFERENCE);
|
||||
Register strU = isLU ? str2 : str1,
|
||||
strL = isLU ? str1 : str2,
|
||||
tmpU = isLU ? rscratch1 : tmp1, // where to keep U for comparison
|
||||
tmpL = isLU ? tmp1 : rscratch1; // where to keep L for comparison
|
||||
__ push(spilled_regs, sp);
|
||||
__ sub(tmp2, strL, cnt2); // strL pointer to load from
|
||||
__ sub(cnt1, strU, cnt2, __ LSL, 1); // strU pointer to load from
|
||||
|
||||
__ ldr(tmp3, Address(__ post(cnt1, 8)));
|
||||
|
||||
if (SoftwarePrefetchHintDistance >= 0) {
|
||||
__ cmp(cnt2, prefetchLoopExitCondition);
|
||||
__ br(__ LT, SMALL_LOOP);
|
||||
__ bind(LARGE_LOOP_PREFETCH);
|
||||
__ prfm(Address(tmp2, SoftwarePrefetchHintDistance));
|
||||
__ mov(tmp4, 2);
|
||||
__ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
|
||||
__ bind(LARGE_LOOP_PREFETCH_REPEAT1);
|
||||
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
|
||||
__ subs(tmp4, tmp4, 1);
|
||||
__ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT1);
|
||||
__ prfm(Address(cnt1, SoftwarePrefetchHintDistance));
|
||||
__ mov(tmp4, 2);
|
||||
__ bind(LARGE_LOOP_PREFETCH_REPEAT2);
|
||||
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
|
||||
__ subs(tmp4, tmp4, 1);
|
||||
__ br(__ GT, LARGE_LOOP_PREFETCH_REPEAT2);
|
||||
__ sub(cnt2, cnt2, 64);
|
||||
__ cmp(cnt2, prefetchLoopExitCondition);
|
||||
__ br(__ GE, LARGE_LOOP_PREFETCH);
|
||||
}
|
||||
__ cbz(cnt2, LOAD_LAST); // no characters left except last load
|
||||
__ subs(cnt2, cnt2, 16);
|
||||
__ br(__ LT, TAIL);
|
||||
__ b(SMALL_LOOP_ENTER);
|
||||
__ bind(SMALL_LOOP); // smaller loop
|
||||
__ subs(cnt2, cnt2, 16);
|
||||
__ bind(SMALL_LOOP_ENTER);
|
||||
compare_string_16_x_LU(tmpL, tmpU, DIFF1, DIFF2);
|
||||
__ br(__ GE, SMALL_LOOP);
|
||||
__ cbz(cnt2, LOAD_LAST);
|
||||
__ bind(TAIL); // 1..15 characters left
|
||||
__ cmp(cnt2, -8);
|
||||
__ br(__ GT, TAIL_LOAD_16);
|
||||
__ ldrd(vtmp, Address(tmp2));
|
||||
__ zip1(vtmp3, __ T8B, vtmp, vtmpZ);
|
||||
|
||||
__ ldr(tmpU, Address(__ post(cnt1, 8)));
|
||||
__ fmovd(tmpL, vtmp3);
|
||||
__ eor(rscratch2, tmp3, tmpL);
|
||||
__ cbnz(rscratch2, DIFF2);
|
||||
__ umov(tmpL, vtmp3, __ D, 1);
|
||||
__ eor(rscratch2, tmpU, tmpL);
|
||||
__ cbnz(rscratch2, DIFF1);
|
||||
__ b(LOAD_LAST);
|
||||
__ bind(TAIL_LOAD_16);
|
||||
__ ldrq(vtmp, Address(tmp2));
|
||||
__ ldr(tmpU, Address(__ post(cnt1, 8)));
|
||||
__ zip1(vtmp3, __ T16B, vtmp, vtmpZ);
|
||||
__ zip2(vtmp, __ T16B, vtmp, vtmpZ);
|
||||
__ fmovd(tmpL, vtmp3);
|
||||
__ eor(rscratch2, tmp3, tmpL);
|
||||
__ cbnz(rscratch2, DIFF2);
|
||||
|
||||
__ ldr(tmp3, Address(__ post(cnt1, 8)));
|
||||
__ umov(tmpL, vtmp3, __ D, 1);
|
||||
__ eor(rscratch2, tmpU, tmpL);
|
||||
__ cbnz(rscratch2, DIFF1);
|
||||
|
||||
__ ldr(tmpU, Address(__ post(cnt1, 8)));
|
||||
__ fmovd(tmpL, vtmp);
|
||||
__ eor(rscratch2, tmp3, tmpL);
|
||||
__ cbnz(rscratch2, DIFF2);
|
||||
|
||||
__ umov(tmpL, vtmp, __ D, 1);
|
||||
__ eor(rscratch2, tmpU, tmpL);
|
||||
__ cbnz(rscratch2, DIFF1);
|
||||
__ b(LOAD_LAST);
|
||||
__ bind(DIFF2);
|
||||
__ mov(tmpU, tmp3);
|
||||
__ bind(DIFF1);
|
||||
__ pop(spilled_regs, sp);
|
||||
__ b(CALCULATE_DIFFERENCE);
|
||||
__ bind(LOAD_LAST);
|
||||
__ pop(spilled_regs, sp);
|
||||
|
||||
__ ldrs(vtmp, Address(strL));
|
||||
__ ldr(tmpU, Address(strU));
|
||||
__ zip1(vtmp, __ T8B, vtmp, vtmpZ);
|
||||
__ fmovd(tmpL, vtmp);
|
||||
|
||||
__ eor(rscratch2, tmpU, tmpL);
|
||||
__ cbz(rscratch2, DONE);
|
||||
|
||||
// Find the first different characters in the longwords and
|
||||
// compute their difference.
|
||||
__ bind(CALCULATE_DIFFERENCE);
|
||||
__ rev(rscratch2, rscratch2);
|
||||
__ clz(rscratch2, rscratch2);
|
||||
__ andr(rscratch2, rscratch2, -16);
|
||||
__ lsrv(tmp1, tmp1, rscratch2);
|
||||
__ uxthw(tmp1, tmp1);
|
||||
__ lsrv(rscratch1, rscratch1, rscratch2);
|
||||
__ uxthw(rscratch1, rscratch1);
|
||||
__ subw(result, tmp1, rscratch1);
|
||||
__ bind(DONE);
|
||||
__ ret(lr);
|
||||
return entry;
|
||||
}
|
||||
|
||||
// r0 = result
|
||||
// r1 = str1
|
||||
// r2 = cnt1
|
||||
// r3 = str2
|
||||
// r4 = cnt2
|
||||
// r10 = tmp1
|
||||
// r11 = tmp2
|
||||
address generate_compare_long_string_same_encoding(bool isLL) {
|
||||
__ align(CodeEntryAlignment);
|
||||
StubCodeMark mark(this, "StubRoutines", isLL
|
||||
? "compare_long_string_same_encoding LL"
|
||||
: "compare_long_string_same_encoding UU");
|
||||
address entry = __ pc();
|
||||
Register result = r0, str1 = r1, cnt1 = r2, str2 = r3, cnt2 = r4,
|
||||
tmp1 = r10, tmp2 = r11;
|
||||
Label SMALL_LOOP, LARGE_LOOP_PREFETCH, CHECK_LAST, DIFF2, TAIL,
|
||||
LENGTH_DIFF, DIFF, LAST_CHECK_AND_LENGTH_DIFF,
|
||||
DIFF_LAST_POSITION, DIFF_LAST_POSITION2;
|
||||
// exit from large loop when less than 64 bytes left to read or we're about
|
||||
// to prefetch memory behind array border
|
||||
int largeLoopExitCondition = MAX(64, SoftwarePrefetchHintDistance)/(isLL ? 1 : 2);
|
||||
// cnt1/cnt2 contains amount of characters to compare. cnt1 can be re-used
|
||||
// update cnt2 counter with already loaded 8 bytes
|
||||
__ sub(cnt2, cnt2, wordSize/(isLL ? 1 : 2));
|
||||
// update pointers, because of previous read
|
||||
__ add(str1, str1, wordSize);
|
||||
__ add(str2, str2, wordSize);
|
||||
if (SoftwarePrefetchHintDistance >= 0) {
|
||||
__ bind(LARGE_LOOP_PREFETCH);
|
||||
__ prfm(Address(str1, SoftwarePrefetchHintDistance));
|
||||
__ prfm(Address(str2, SoftwarePrefetchHintDistance));
|
||||
compare_string_16_bytes_same(DIFF, DIFF2);
|
||||
compare_string_16_bytes_same(DIFF, DIFF2);
|
||||
__ sub(cnt2, cnt2, isLL ? 64 : 32);
|
||||
compare_string_16_bytes_same(DIFF, DIFF2);
|
||||
__ cmp(cnt2, largeLoopExitCondition);
|
||||
compare_string_16_bytes_same(DIFF, DIFF2);
|
||||
__ br(__ GT, LARGE_LOOP_PREFETCH);
|
||||
__ cbz(cnt2, LAST_CHECK_AND_LENGTH_DIFF); // no more chars left?
|
||||
// less than 16 bytes left?
|
||||
__ subs(cnt2, cnt2, isLL ? 16 : 8);
|
||||
__ br(__ LT, TAIL);
|
||||
}
|
||||
__ bind(SMALL_LOOP);
|
||||
compare_string_16_bytes_same(DIFF, DIFF2);
|
||||
__ subs(cnt2, cnt2, isLL ? 16 : 8);
|
||||
__ br(__ GE, SMALL_LOOP);
|
||||
__ bind(TAIL);
|
||||
__ adds(cnt2, cnt2, isLL ? 16 : 8);
|
||||
__ br(__ EQ, LAST_CHECK_AND_LENGTH_DIFF);
|
||||
__ subs(cnt2, cnt2, isLL ? 8 : 4);
|
||||
__ br(__ LE, CHECK_LAST);
|
||||
__ eor(rscratch2, tmp1, tmp2);
|
||||
__ cbnz(rscratch2, DIFF);
|
||||
__ ldr(tmp1, Address(__ post(str1, 8)));
|
||||
__ ldr(tmp2, Address(__ post(str2, 8)));
|
||||
__ sub(cnt2, cnt2, isLL ? 8 : 4);
|
||||
__ bind(CHECK_LAST);
|
||||
if (!isLL) {
|
||||
__ add(cnt2, cnt2, cnt2); // now in bytes
|
||||
}
|
||||
__ eor(rscratch2, tmp1, tmp2);
|
||||
__ cbnz(rscratch2, DIFF);
|
||||
__ ldr(rscratch1, Address(str1, cnt2));
|
||||
__ ldr(cnt1, Address(str2, cnt2));
|
||||
__ eor(rscratch2, rscratch1, cnt1);
|
||||
__ cbz(rscratch2, LENGTH_DIFF);
|
||||
// Find the first different characters in the longwords and
|
||||
// compute their difference.
|
||||
__ bind(DIFF2);
|
||||
__ rev(rscratch2, rscratch2);
|
||||
__ clz(rscratch2, rscratch2);
|
||||
__ andr(rscratch2, rscratch2, isLL ? -8 : -16);
|
||||
__ lsrv(rscratch1, rscratch1, rscratch2);
|
||||
if (isLL) {
|
||||
__ lsrv(cnt1, cnt1, rscratch2);
|
||||
__ uxtbw(rscratch1, rscratch1);
|
||||
__ uxtbw(cnt1, cnt1);
|
||||
} else {
|
||||
__ lsrv(cnt1, cnt1, rscratch2);
|
||||
__ uxthw(rscratch1, rscratch1);
|
||||
__ uxthw(cnt1, cnt1);
|
||||
}
|
||||
__ subw(result, rscratch1, cnt1);
|
||||
__ b(LENGTH_DIFF);
|
||||
__ bind(DIFF);
|
||||
__ rev(rscratch2, rscratch2);
|
||||
__ clz(rscratch2, rscratch2);
|
||||
__ andr(rscratch2, rscratch2, isLL ? -8 : -16);
|
||||
__ lsrv(tmp1, tmp1, rscratch2);
|
||||
if (isLL) {
|
||||
__ lsrv(tmp2, tmp2, rscratch2);
|
||||
__ uxtbw(tmp1, tmp1);
|
||||
__ uxtbw(tmp2, tmp2);
|
||||
} else {
|
||||
__ lsrv(tmp2, tmp2, rscratch2);
|
||||
__ uxthw(tmp1, tmp1);
|
||||
__ uxthw(tmp2, tmp2);
|
||||
}
|
||||
__ subw(result, tmp1, tmp2);
|
||||
__ b(LENGTH_DIFF);
|
||||
__ bind(LAST_CHECK_AND_LENGTH_DIFF);
|
||||
__ eor(rscratch2, tmp1, tmp2);
|
||||
__ cbnz(rscratch2, DIFF);
|
||||
__ bind(LENGTH_DIFF);
|
||||
__ ret(lr);
|
||||
return entry;
|
||||
}
|
||||
|
||||
void generate_compare_long_strings() {
|
||||
StubRoutines::aarch64::_compare_long_string_LL
|
||||
= generate_compare_long_string_same_encoding(true);
|
||||
StubRoutines::aarch64::_compare_long_string_UU
|
||||
= generate_compare_long_string_same_encoding(false);
|
||||
StubRoutines::aarch64::_compare_long_string_LU
|
||||
= generate_compare_long_string_different_encoding(true);
|
||||
StubRoutines::aarch64::_compare_long_string_UL
|
||||
= generate_compare_long_string_different_encoding(false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Arguments:
|
||||
*
|
||||
@ -5113,6 +5424,8 @@ class StubGenerator: public StubCodeGenerator {
|
||||
StubRoutines::aarch64::_large_array_equals = generate_large_array_equals();
|
||||
}
|
||||
|
||||
generate_compare_long_strings();
|
||||
|
||||
if (UseMultiplyToLenIntrinsic) {
|
||||
StubRoutines::_multiplyToLen = generate_multiplyToLen();
|
||||
}
|
||||
|
@ -48,6 +48,10 @@ address StubRoutines::aarch64::_zero_blocks = NULL;
|
||||
address StubRoutines::aarch64::_has_negatives = NULL;
|
||||
address StubRoutines::aarch64::_has_negatives_long = NULL;
|
||||
address StubRoutines::aarch64::_large_array_equals = NULL;
|
||||
address StubRoutines::aarch64::_compare_long_string_LL = NULL;
|
||||
address StubRoutines::aarch64::_compare_long_string_UU = NULL;
|
||||
address StubRoutines::aarch64::_compare_long_string_LU = NULL;
|
||||
address StubRoutines::aarch64::_compare_long_string_UL = NULL;
|
||||
bool StubRoutines::aarch64::_completed = false;
|
||||
|
||||
/**
|
||||
|
@ -66,6 +66,10 @@ class aarch64 {
|
||||
static address _has_negatives;
|
||||
static address _has_negatives_long;
|
||||
static address _large_array_equals;
|
||||
static address _compare_long_string_LL;
|
||||
static address _compare_long_string_LU;
|
||||
static address _compare_long_string_UL;
|
||||
static address _compare_long_string_UU;
|
||||
static bool _completed;
|
||||
|
||||
public:
|
||||
@ -136,6 +140,22 @@ class aarch64 {
|
||||
return _large_array_equals;
|
||||
}
|
||||
|
||||
static address compare_long_string_LL() {
|
||||
return _compare_long_string_LL;
|
||||
}
|
||||
|
||||
static address compare_long_string_LU() {
|
||||
return _compare_long_string_LU;
|
||||
}
|
||||
|
||||
static address compare_long_string_UL() {
|
||||
return _compare_long_string_UL;
|
||||
}
|
||||
|
||||
static address compare_long_string_UU() {
|
||||
return _compare_long_string_UU;
|
||||
}
|
||||
|
||||
static bool complete() {
|
||||
return _completed;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user