8340241: RISC-V: Returns mispredicted

Reviewed-by: fyang, luhenry
This commit is contained in:
Robbin Ehn 2024-10-21 13:15:52 +00:00
parent 07f550b85a
commit 66ddaaa359
18 changed files with 171 additions and 132 deletions

View File

@ -2892,8 +2892,9 @@ public:
// Unconditional branch instructions
// --------------------------
protected:
// All calls and jumps must go via MASM.
// All calls and jumps must go via MASM. Only use x1 (aka ra) as link register for now.
void jalr(Register Rd, Register Rs, const int32_t offset) {
assert(Rd != x5 && Rs != x5, "Register x5 must not be used for calls/jumps.");
/* jalr -> c.jr/c.jalr */
if (do_compress() && (offset == 0 && Rs != x0)) {
if (Rd == x1) {
@ -2908,6 +2909,7 @@ public:
}
void jal(Register Rd, const int32_t offset) {
assert(Rd != x5, "Register x5 must not be used for calls/jumps.");
/* jal -> c.j, note c.jal is RV32C only */
if (do_compress() &&
Rd == x0 &&
@ -2915,7 +2917,6 @@ public:
c_j(offset);
return;
}
_jal(Rd, offset);
}

View File

@ -93,7 +93,7 @@ void RangeCheckStub::emit_code(LIR_Assembler* ce) {
stub_id = C1StubId::throw_range_check_failed_id;
}
// t0 and t1 are used as args in generate_exception_throw
// so use ra as the tmp register for rt_call.
// so use x1/ra as the tmp register for rt_call.
__ rt_call(Runtime1::entry_for(stub_id), ra);
ce->add_call_info_here(_info);
ce->verify_oop_map(_info);
@ -275,7 +275,7 @@ void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
if (_obj->is_cpu_register()) {
__ mv(t0, _obj->as_register());
}
__ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), t1);
__ far_call(RuntimeAddress(Runtime1::entry_for(_stub)));
ce->add_call_info_here(_info);
debug_only(__ should_not_reach_here());
}

View File

@ -271,8 +271,8 @@ static void generate_c2_barrier_runtime_call(MacroAssembler* masm, G1BarrierStub
__ mv(c_rarg0, arg);
}
__ mv(c_rarg1, xthread);
__ mv(t0, runtime_path);
__ jalr(t0);
__ mv(t1, runtime_path);
__ jalr(t1);
}
void G1BarrierSetAssembler::g1_write_barrier_pre_c2(MacroAssembler* masm,

View File

@ -339,8 +339,8 @@ void XBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, X
XSaveLiveRegisters save_live_registers(masm, stub);
XSetupArguments setup_arguments(masm, stub);
__ mv(t0, stub->slow_path());
__ jalr(t0);
__ mv(t1, stub->slow_path());
__ jalr(t1);
}
// Stub exit

View File

@ -724,8 +724,8 @@ void ZBarrierSetAssembler::generate_c2_load_barrier_stub(MacroAssembler* masm, Z
{
SaveLiveRegisters save_live_registers(masm, stub);
ZSetupArguments setup_arguments(masm, stub);
__ mv(t0, stub->slow_path());
__ jalr(t0);
__ mv(t1, stub->slow_path());
__ jalr(t1);
}
// Stub exit
@ -758,15 +758,15 @@ void ZBarrierSetAssembler::generate_c2_store_barrier_stub(MacroAssembler* masm,
__ la(c_rarg0, stub->ref_addr());
if (stub->is_native()) {
__ la(t0, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_native_oop_field_without_healing_addr()));
__ la(t1, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_native_oop_field_without_healing_addr()));
} else if (stub->is_atomic()) {
__ la(t0, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_with_healing_addr()));
__ la(t1, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_with_healing_addr()));
} else if (stub->is_nokeepalive()) {
__ la(t0, RuntimeAddress(ZBarrierSetRuntime::no_keepalive_store_barrier_on_oop_field_without_healing_addr()));
__ la(t1, RuntimeAddress(ZBarrierSetRuntime::no_keepalive_store_barrier_on_oop_field_without_healing_addr()));
} else {
__ la(t0, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_without_healing_addr()));
__ la(t1, RuntimeAddress(ZBarrierSetRuntime::store_barrier_on_oop_field_without_healing_addr()));
}
__ jalr(t0);
__ jalr(t1);
}
// Stub exit

View File

@ -421,13 +421,13 @@ void InterpreterMacroAssembler::jump_from_interpreted(Register method) {
// interp_only_mode if these events CAN be enabled.
lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
beqz(t0, run_compiled_code);
ld(t0, Address(method, Method::interpreter_entry_offset()));
jr(t0);
ld(t1, Address(method, Method::interpreter_entry_offset()));
jr(t1);
bind(run_compiled_code);
}
ld(t0, Address(method, Method::from_interpreted_offset()));
jr(t0);
ld(t1, Address(method, Method::from_interpreted_offset()));
jr(t1);
}
// The following two routines provide a hook so that an implementation

View File

@ -457,8 +457,8 @@ void MacroAssembler::call_VM_base(Register oop_result,
RuntimeAddress target(StubRoutines::forward_exception_entry());
relocate(target.rspec(), [&] {
int32_t offset;
la(t0, target.target(), offset);
jr(t0, offset);
la(t1, target.target(), offset);
jr(t1, offset);
});
bind(ok);
}
@ -760,21 +760,21 @@ void MacroAssembler::emit_static_call_stub() {
// Jump to the entry point of the c2i stub.
int32_t offset = 0;
movptr(t0, 0, offset, t1); // lui + lui + slli + add
jr(t0, offset);
movptr(t1, 0, offset, t0); // lui + lui + slli + add
jr(t1, offset);
}
void MacroAssembler::call_VM_leaf_base(address entry_point,
int number_of_arguments,
Label *retaddr) {
int32_t offset = 0;
push_reg(RegSet::of(t0, xmethod), sp); // push << t0 & xmethod >> to sp
mv(t0, entry_point, offset);
jalr(t0, offset);
push_reg(RegSet::of(t1, xmethod), sp); // push << t1 & xmethod >> to sp
mv(t1, entry_point, offset);
jalr(t1, offset);
if (retaddr != nullptr) {
bind(*retaddr);
}
pop_reg(RegSet::of(t0, xmethod), sp); // pop << t0 & xmethod >> from sp
pop_reg(RegSet::of(t1, xmethod), sp); // pop << t1 & xmethod >> from sp
}
void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
@ -941,6 +941,7 @@ void MacroAssembler::li(Register Rd, int64_t imm) {
void MacroAssembler::load_link_jump(const address source, Register temp) {
assert(temp != noreg && temp != x0, "expecting a register");
assert(temp != x5, "temp register must not be x5.");
assert_cond(source != nullptr);
int64_t distance = source - pc();
assert(is_simm32(distance), "Must be");
@ -968,7 +969,8 @@ void MacroAssembler::j(const address dest, Register temp) {
if (is_simm21(distance) && ((distance % 2) == 0)) {
Assembler::jal(x0, distance);
} else {
assert(temp != noreg && temp != x0, "expecting a register");
assert(temp != noreg && temp != x0, "Expecting a register");
assert(temp != x1 && temp != x5, "temp register must not be x1/x5.");
int32_t offset = 0;
la(temp, dest, offset);
jr(temp, offset);
@ -1006,12 +1008,14 @@ void MacroAssembler::j(Label &lab, Register temp) {
void MacroAssembler::jr(Register Rd, int32_t offset) {
assert(Rd != noreg, "expecting a register");
assert(Rd != x1 && Rd != x5, "Rd register must not be x1/x5.");
Assembler::jalr(x0, Rd, offset);
}
void MacroAssembler::call(const address dest, Register temp) {
assert_cond(dest != nullptr);
assert(temp != noreg, "expecting a register");
assert(temp != x5, "temp register must not be x5.");
int32_t offset = 0;
la(temp, dest, offset);
jalr(temp, offset);
@ -1019,10 +1023,12 @@ void MacroAssembler::call(const address dest, Register temp) {
void MacroAssembler::jalr(Register Rs, int32_t offset) {
assert(Rs != noreg, "expecting a register");
assert(Rs != x5, "Rs register must not be x5.");
Assembler::jalr(x1, Rs, offset);
}
void MacroAssembler::rt_call(address dest, Register tmp) {
assert(tmp != x5, "tmp register must not be x5.");
CodeBlob *cb = CodeCache::find_blob(dest);
RuntimeAddress target(dest);
if (cb) {
@ -1762,7 +1768,7 @@ void MacroAssembler::pop_CPU_state(bool restore_vectors, int vector_size_in_byte
static int patch_offset_in_jal(address branch, int64_t offset) {
assert(Assembler::is_simm21(offset) && ((offset % 2) == 0),
"offset is too large to be patched in one jal instruction!\n");
"offset (%ld) is too large to be patched in one jal instruction!\n", offset);
Assembler::patch(branch, 31, 31, (offset >> 20) & 0x1); // offset[20] ==> branch[31]
Assembler::patch(branch, 30, 21, (offset >> 1) & 0x3ff); // offset[10:1] ==> branch[30:21]
Assembler::patch(branch, 20, 20, (offset >> 11) & 0x1); // offset[11] ==> branch[20]
@ -3658,6 +3664,7 @@ void MacroAssembler::far_jump(const Address &entry, Register tmp) {
}
void MacroAssembler::far_call(const Address &entry, Register tmp) {
assert(tmp != x5, "tmp register must not be x5.");
assert(CodeCache::find_blob(entry.target()) != nullptr,
"destination of far call not found in code cache");
assert(entry.rspec().type() == relocInfo::external_word_type
@ -4072,7 +4079,7 @@ void MacroAssembler::verify_secondary_supers_table(Register r_sub_klass,
Register tmp1,
Register tmp2,
Register tmp3) {
assert_different_registers(r_sub_klass, r_super_klass, tmp1, tmp2, tmp3, result, t0);
assert_different_registers(r_sub_klass, r_super_klass, tmp1, tmp2, tmp3, result, t0, t1);
const Register
r_array_base = tmp1, // X11
@ -4139,8 +4146,8 @@ void MacroAssembler::get_thread(Register thread) {
RegSet::range(x28, x31) + ra - thread;
push_reg(saved_regs, sp);
mv(ra, CAST_FROM_FN_PTR(address, Thread::current));
jalr(ra);
mv(t1, CAST_FROM_FN_PTR(address, Thread::current));
jalr(t1);
if (thread != c_rarg0) {
mv(thread, c_rarg0);
}
@ -4187,8 +4194,8 @@ void MacroAssembler::reserved_stack_check() {
// We have already removed our own frame.
// throw_delayed_StackOverflowError will think that it's been
// called by our caller.
la(t0, RuntimeAddress(SharedRuntime::throw_delayed_StackOverflowError_entry()));
jr(t0);
la(t1, RuntimeAddress(SharedRuntime::throw_delayed_StackOverflowError_entry()));
jr(t1);
should_not_reach_here();
bind(no_reserved_zone_enabling);
@ -4299,7 +4306,7 @@ address MacroAssembler::load_and_call(Address entry) {
}
#endif
relocate(entry.rspec(), [&] {
load_link_jump(target);
load_link_jump(target, t1);
});
postcond(pc() != badAddress);
@ -4309,7 +4316,7 @@ address MacroAssembler::load_and_call(Address entry) {
address MacroAssembler::ic_call(address entry, jint method_index) {
RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
IncompressibleRegion ir(this); // relocations
movptr(t1, (address)Universe::non_oop_word(), t0);
movptr(t0, (address)Universe::non_oop_word(), t1);
assert_cond(entry != nullptr);
return reloc_call(Address(entry, rh));
}
@ -4323,9 +4330,9 @@ int MacroAssembler::ic_check_size() {
int MacroAssembler::ic_check(int end_alignment) {
IncompressibleRegion ir(this);
Register receiver = j_rarg0;
Register data = t1;
Register data = t0;
Register tmp1 = t0; // t0 always scratch
Register tmp1 = t1; // scratch
// t2 is saved on call, thus should have been saved before this check.
// Hence we can clobber it.
Register tmp2 = t2;
@ -4423,8 +4430,8 @@ address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
// - load the call
// - call
Label target;
ld(t0, target); // auipc + ld
jr(t0); // jalr
ld(t1, target); // auipc + ld
jr(t1); // jalr
bind(target);
assert(offset() - stub_start_offset == MacroAssembler::NativeShortCall::trampoline_data_offset,
"should be");
@ -5148,11 +5155,11 @@ const int MacroAssembler::zero_words_block_size = 8;
// ptr: Address of a buffer to be zeroed.
// cnt: Count in HeapWords.
//
// ptr, cnt, and t0 are clobbered.
// ptr, cnt, t1, and t0 are clobbered.
address MacroAssembler::zero_words(Register ptr, Register cnt) {
assert(is_power_of_2(zero_words_block_size), "adjust this");
assert(ptr == x28 && cnt == x29, "mismatch in register usage");
assert_different_registers(cnt, t0);
assert_different_registers(cnt, t0, t1);
BLOCK_COMMENT("zero_words {");
@ -5170,6 +5177,7 @@ address MacroAssembler::zero_words(Register ptr, Register cnt) {
return nullptr;
}
} else {
// Clobbers t1
rt_call(zero_blocks.target());
}
}

View File

@ -627,7 +627,7 @@ class MacroAssembler: public Assembler {
void bgtz(Register Rs, const address dest);
private:
void load_link_jump(const address source, Register temp = t0);
void load_link_jump(const address source, Register temp);
void jump_link(const address dest, Register temp);
public:
// We try to follow risc-v asm menomics.
@ -635,18 +635,42 @@ class MacroAssembler: public Assembler {
// we often need to resort to movptr, li <48imm>.
// https://github.com/riscv-non-isa/riscv-asm-manual/blob/master/riscv-asm.md
// Hotspot only use the standard calling convention using x1/ra.
// The alternative calling convection using x5/t0 is not used.
// Using x5 as a temp causes the CPU to mispredict returns.
// JALR, return address stack updates:
// | rd is x1/x5 | rs1 is x1/x5 | rd=rs1 | RAS action
// | ----------- | ------------ | ------ |-------------
// | No | No | — | None
// | No | Yes | — | Pop
// | Yes | No | — | Push
// | Yes | Yes | No | Pop, then push
// | Yes | Yes | Yes | Push
//
// JAL, return address stack updates:
// | rd is x1/x5 | RAS action
// | ----------- | ----------
// | Yes | Push
// | No | None
//
// JUMPs uses Rd = x0/zero and Rs = x6/t1 or imm
// CALLS uses Rd = x1/ra and Rs = x6/t1 or imm (or x1/ra*)
// RETURNS uses Rd = x0/zero and Rs = x1/ra
// *use of x1/ra should not normally be used, special case only.
// jump: jal x0, offset
// For long reach uses temp register for:
// la + jr
void j(const address dest, Register temp = t0);
void j(const Address &adr, Register temp = t0);
void j(Label &l, Register temp = t0);
void j(const address dest, Register temp = t1);
void j(const Address &adr, Register temp = t1);
void j(Label &l, Register temp = noreg);
// jump register: jalr x0, offset(rs)
void jr(Register Rd, int32_t offset = 0);
// call: la + jalr x1
void call(const address dest, Register temp = t0);
void call(const address dest, Register temp = t1);
// jalr: jalr x1, offset(rs)
void jalr(Register Rs, int32_t offset = 0);
@ -654,7 +678,8 @@ class MacroAssembler: public Assembler {
// Emit a runtime call. Only invalidates the tmp register which
// is used to keep the entry address for jalr/movptr.
// Uses call() for intra code cache, else movptr + jalr.
void rt_call(address dest, Register tmp = t0);
// Clobebrs t1
void rt_call(address dest, Register tmp = t1);
// ret: jalr x0, 0(x1)
inline void ret() {
@ -1165,8 +1190,9 @@ public:
// - relocInfo::external_word_type
// - relocInfo::runtime_call_type
// - relocInfo::none
void far_call(const Address &entry, Register tmp = t0);
void far_jump(const Address &entry, Register tmp = t0);
// Clobbers t1 default.
void far_call(const Address &entry, Register tmp = t1);
void far_jump(const Address &entry, Register tmp = t1);
static int far_branch_size() {
return 2 * 4; // auipc + jalr, see far_call() & far_jump()

View File

@ -109,17 +109,17 @@ void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register meth
// compiled code in threads for which the event is enabled. Check here for
// interp_only_mode if these events CAN be enabled.
__ lwu(t0, Address(xthread, JavaThread::interp_only_mode_offset()));
__ beqz(t0, run_compiled_code);
__ ld(t0, Address(method, Method::interpreter_entry_offset()));
__ jr(t0);
__ lwu(t1, Address(xthread, JavaThread::interp_only_mode_offset()));
__ beqz(t1, run_compiled_code);
__ ld(t1, Address(method, Method::interpreter_entry_offset()));
__ jr(t1);
__ BIND(run_compiled_code);
}
const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
Method::from_interpreted_offset();
__ ld(t0,Address(method, entry_offset));
__ jr(t0);
__ ld(t1, Address(method, entry_offset));
__ jr(t1);
__ bind(L_no_such_method);
__ far_jump(RuntimeAddress(SharedRuntime::throw_AbstractMethodError_entry()));
}

View File

@ -91,10 +91,10 @@ bool NativeShortCallTrampolineStub::is_at(address addr) {
if (MacroAssembler::is_auipc_at(addr) &&
MacroAssembler::is_ld_at(addr + instr_size) &&
MacroAssembler::is_jalr_at(addr + 2 * instr_size) &&
(MacroAssembler::extract_rd(addr) == x5) &&
(MacroAssembler::extract_rd(addr + instr_size) == x5) &&
(MacroAssembler::extract_rs1(addr + instr_size) == x5) &&
(MacroAssembler::extract_rs1(addr + 2 * instr_size) == x5) &&
(MacroAssembler::extract_rd(addr) == x6) &&
(MacroAssembler::extract_rd(addr + instr_size) == x6) &&
(MacroAssembler::extract_rs1(addr + instr_size) == x6) &&
(MacroAssembler::extract_rs1(addr + 2 * instr_size) == x6) &&
(Assembler::extract(Assembler::ld_instr(addr + 4), 31, 20) == trampoline_data_offset)) {
return true;
}
@ -460,10 +460,10 @@ bool NativeFarCall::is_at(address addr) {
if (MacroAssembler::is_auipc_at(addr) &&
MacroAssembler::is_ld_at(addr + instr_size) &&
MacroAssembler::is_jalr_at(addr + 2 * instr_size) &&
(MacroAssembler::extract_rd(addr) == x5) &&
(MacroAssembler::extract_rd(addr + instr_size) == x5) &&
(MacroAssembler::extract_rs1(addr + instr_size) == x5) &&
(MacroAssembler::extract_rs1(addr + 2 * instr_size) == x5) &&
(MacroAssembler::extract_rd(addr) == x6) &&
(MacroAssembler::extract_rd(addr + instr_size) == x6) &&
(MacroAssembler::extract_rs1(addr + instr_size) == x6) &&
(MacroAssembler::extract_rs1(addr + 2 * instr_size) == x6) &&
(MacroAssembler::extract_rd(addr + 2 * instr_size) == x1)) {
return true;
}
@ -789,8 +789,8 @@ void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
Assembler::IncompressibleRegion ir(&a); // Fixed length: see NativeGeneralJump::get_instruction_size()
int32_t offset = 0;
a.movptr(t0, entry, offset, t1); // lui, lui, slli, add
a.jr(t0, offset); // jalr
a.movptr(t1, entry, offset, t0); // lui, lui, slli, add
a.jr(t1, offset); // jalr
ICache::invalidate_range(code_pos, instruction_size);
}

View File

@ -1261,11 +1261,11 @@ int MachCallRuntimeNode::ret_addr_offset() {
// jal(trampoline_stub)
// for real runtime callouts it will be 11 instructions
// see riscv_enc_java_to_runtime
// la(t1, retaddr) -> auipc + addi
// la(t0, RuntimeAddress(addr)) -> lui + addi + slli + addi + slli + addi
// la(t0, retaddr) -> auipc + addi
// la(t1, RuntimeAddress(addr)) -> lui + addi + slli + addi + slli + addi
// addi(sp, sp, -2 * wordSize) -> addi
// sd(t1, Address(sp, wordSize)) -> sd
// jalr(t0) -> jalr
// jalr(t1) -> jalr
CodeBlob *cb = CodeCache::find_blob(_entry_point);
if (cb != nullptr) {
if (UseTrampolines) {
@ -1822,13 +1822,13 @@ void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
assert_cond(st != nullptr);
st->print_cr("# MachUEPNode");
if (UseCompressedClassPointers) {
st->print_cr("\tlwu t0, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
st->print_cr("\tlwu t2, [t1 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
st->print_cr("\tlwu t1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
st->print_cr("\tlwu t2, [t0 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
} else {
st->print_cr("\tld t0, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
st->print_cr("\tld t2, [t1 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
st->print_cr("\tld t1, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
st->print_cr("\tld t2, [t0 + CompiledICData::speculated_klass_offset()]\t# compressed klass");
}
st->print_cr("\tbeq t0, t2, ic_hit");
st->print_cr("\tbeq t1, t2, ic_hit");
st->print_cr("\tj, SharedRuntime::_ic_miss_stub\t # Inline cache check");
st->print_cr("\tic_hit:");
}
@ -1857,8 +1857,8 @@ uint MachUEPNode::size(PhaseRegAlloc* ra_) const
// Emit exception handler code.
int HandlerImpl::emit_exception_handler(C2_MacroAssembler* masm)
{
// auipc t0, #exception_blob_entry_point
// jr (offset)t0
// auipc t1, #exception_blob_entry_point
// jr (offset)t1
// Note that the code buffer's insts_mark is always relative to insts.
// That's why we must use the macroassembler to generate a handler.
address base = __ start_a_stub(size_exception_handler());
@ -2504,12 +2504,12 @@ encode %{
__ post_call_nop();
} else {
Label retaddr;
__ la(t1, retaddr);
__ la(t0, RuntimeAddress(entry));
__ la(t0, retaddr);
__ la(t1, RuntimeAddress(entry));
// Leave a breadcrumb for JavaFrameAnchor::capture_last_Java_pc()
__ addi(sp, sp, -2 * wordSize);
__ sd(t1, Address(sp, wordSize));
__ jalr(t0);
__ sd(t0, Address(sp, wordSize));
__ jalr(t1);
__ bind(retaddr);
__ post_call_nop();
__ addi(sp, sp, 2 * wordSize);
@ -10098,11 +10098,11 @@ instruct partialSubtypeCheck(iRegP_R15 result, iRegP_R14 sub, iRegP_R10 super, i
%}
instruct partialSubtypeCheckConstSuper(iRegP_R14 sub, iRegP_R10 super_reg, immP super_con, iRegP_R15 result,
iRegP_R11 tmpR11, iRegP_R12 tmpR12, iRegP_R13 tmpR13, iRegP_R16 tmpR16)
iRegP_R11 tmpR11, iRegP_R12 tmpR12, iRegP_R13 tmpR13, iRegP_R16 tmpR16, rFlagsReg cr)
%{
predicate(UseSecondarySupersTable);
match(Set result (PartialSubtypeCheck sub (Binary super_reg super_con)));
effect(TEMP tmpR11, TEMP tmpR12, TEMP tmpR13, TEMP tmpR16);
effect(TEMP tmpR11, TEMP tmpR12, TEMP tmpR13, TEMP tmpR16, KILL cr);
ins_cost(7 * DEFAULT_COST); // needs to be less than competing nodes
format %{ "partialSubtypeCheck $result, $sub, $super_reg, $super_con" %}
@ -10379,12 +10379,12 @@ instruct stringL_indexof_char(iRegP_R11 str1, iRegI_R12 cnt1, iRegI_R13 ch,
// clearing of an array
instruct clearArray_reg_reg(iRegL_R29 cnt, iRegP_R28 base, iRegP_R30 tmp1,
iRegP_R31 tmp2, Universe dummy)
iRegP_R31 tmp2, rFlagsReg cr, Universe dummy)
%{
// temp registers must match the one used in StubGenerator::generate_zero_blocks()
predicate(UseBlockZeroing || !UseRVV);
match(Set dummy (ClearArray cnt base));
effect(USE_KILL cnt, USE_KILL base, TEMP tmp1, TEMP tmp2);
effect(USE_KILL cnt, USE_KILL base, TEMP tmp1, TEMP tmp2, KILL cr);
ins_cost(4 * DEFAULT_COST);
format %{ "ClearArray $cnt, $base\t#@clearArray_reg_reg" %}

View File

@ -351,7 +351,7 @@ void OptoRuntime::generate_exception_blob() {
// x10: exception handler
// We have a handler in x10 (could be deopt blob).
__ mv(t0, x10);
__ mv(t1, x10);
// Get the exception oop
__ ld(x10, Address(xthread, JavaThread::exception_oop_offset()));
@ -365,11 +365,11 @@ void OptoRuntime::generate_exception_blob() {
__ sd(zr, Address(xthread, JavaThread::exception_oop_offset()));
// x10: exception oop
// t0: exception handler
// t1: exception handler
// x14: exception pc
// Jump to handler
__ jr(t0);
__ jr(t1);
// Make sure all code is generated
masm->flush();

View File

@ -468,8 +468,8 @@ static void gen_c2i_adapter(MacroAssembler *masm,
__ mv(esp, sp); // Interp expects args on caller's expression stack
__ ld(t0, Address(xmethod, in_bytes(Method::interpreter_entry_offset())));
__ jr(t0);
__ ld(t1, Address(xmethod, in_bytes(Method::interpreter_entry_offset())));
__ jr(t1);
}
void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
@ -610,8 +610,7 @@ AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm
Label skip_fixup;
const Register receiver = j_rarg0;
const Register data = t1;
const Register tmp = t2; // A call-clobbered register not used for arg passing
const Register data = t0;
// -------------------------------------------------------------------------
// Generate a C2I adapter. On entry we know xmethod holds the Method* during calls
@ -1140,8 +1139,8 @@ static void gen_continuation_yield(MacroAssembler* masm,
Label ok;
__ beqz(t0, ok);
__ leave();
__ la(t0, RuntimeAddress(StubRoutines::forward_exception_entry()));
__ jr(t0);
__ la(t1, RuntimeAddress(StubRoutines::forward_exception_entry()));
__ jr(t1);
__ bind(ok);
__ leave();
@ -1452,8 +1451,6 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
// restoring them except fp. fp is the only callee save register
// as far as the interpreter and the compiler(s) are concerned.
const Register ic_reg = t1;
const Register receiver = j_rarg0;
__ verify_oop(receiver);
@ -1737,6 +1734,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
__ membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore);
__ sw(t0, Address(t1));
// Clobbers t1
__ rt_call(native_func);
__ bind(native_return);
@ -2631,8 +2629,8 @@ RuntimeStub* SharedRuntime::generate_resolve_blob(SharedStubId id, address desti
reg_saver.restore_live_registers(masm);
// We are back to the original state on entry and ready to go.
__ jr(t0);
__ mv(t1, t0);
__ jr(t1);
// Pending exception after the safepoint

View File

@ -3782,8 +3782,8 @@ class StubGenerator: public StubCodeGenerator {
Label thaw_success;
// t1 contains the size of the frames to thaw, 0 if overflow or no more frames
__ bnez(t1, thaw_success);
__ la(t0, RuntimeAddress(SharedRuntime::throw_StackOverflowError_entry()));
__ jr(t0);
__ la(t1, RuntimeAddress(SharedRuntime::throw_StackOverflowError_entry()));
__ jr(t1);
__ bind(thaw_success);
// make room for the thawed frames

View File

@ -166,7 +166,6 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
address fn = nullptr;
address entry_point = nullptr;
Register continuation = ra;
switch (kind) {
case Interpreter::java_lang_math_abs:
entry_point = __ pc();
@ -185,83 +184,82 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
__ fld(f10, Address(esp));
__ mv(sp, x19_sender_sp);
__ mv(x9, ra);
continuation = x9; // The first callee-saved register
if (StubRoutines::dsin() == nullptr) {
fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
} else {
fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
}
__ call(fn);
__ mv(ra, x9);
break;
case Interpreter::java_lang_math_cos :
entry_point = __ pc();
__ fld(f10, Address(esp));
__ mv(sp, x19_sender_sp);
__ mv(x9, ra);
continuation = x9; // The first callee-saved register
if (StubRoutines::dcos() == nullptr) {
fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
} else {
fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
}
__ call(fn);
__ mv(ra, x9);
break;
case Interpreter::java_lang_math_tan :
entry_point = __ pc();
__ fld(f10, Address(esp));
__ mv(sp, x19_sender_sp);
__ mv(x9, ra);
continuation = x9; // The first callee-saved register
if (StubRoutines::dtan() == nullptr) {
fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
} else {
fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
}
__ call(fn);
__ mv(ra, x9);
break;
case Interpreter::java_lang_math_log :
entry_point = __ pc();
__ fld(f10, Address(esp));
__ mv(sp, x19_sender_sp);
__ mv(x9, ra);
continuation = x9; // The first callee-saved register
if (StubRoutines::dlog() == nullptr) {
fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
} else {
fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
}
__ call(fn);
__ mv(ra, x9);
break;
case Interpreter::java_lang_math_log10 :
entry_point = __ pc();
__ fld(f10, Address(esp));
__ mv(sp, x19_sender_sp);
__ mv(x9, ra);
continuation = x9; // The first callee-saved register
if (StubRoutines::dlog10() == nullptr) {
fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
} else {
fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
}
__ call(fn);
__ mv(ra, x9);
break;
case Interpreter::java_lang_math_exp :
entry_point = __ pc();
__ fld(f10, Address(esp));
__ mv(sp, x19_sender_sp);
__ mv(x9, ra);
continuation = x9; // The first callee-saved register
if (StubRoutines::dexp() == nullptr) {
fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
} else {
fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
}
__ call(fn);
__ mv(ra, x9);
break;
case Interpreter::java_lang_math_pow :
entry_point = __ pc();
__ mv(x9, ra);
continuation = x9;
__ fld(f10, Address(esp, 2 * Interpreter::stackElementSize));
__ fld(f11, Address(esp));
__ mv(sp, x19_sender_sp);
@ -271,6 +269,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
}
__ call(fn);
__ mv(ra, x9);
break;
case Interpreter::java_lang_math_fmaD :
if (UseFMA) {
@ -296,7 +295,7 @@ address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::M
;
}
if (entry_point != nullptr) {
__ jr(continuation);
__ ret();
}
return entry_point;

View File

@ -705,7 +705,7 @@ void TemplateTable::wide_aload() {
}
void TemplateTable::index_check(Register array, Register index) {
// destroys x11, t0
// destroys x11, t0, t1
// sign extend index for use by indexed load
// check index
const Register length = t0;
@ -718,8 +718,8 @@ void TemplateTable::index_check(Register array, Register index) {
__ sign_extend(index, index, 32);
__ bltu(index, length, ok);
__ mv(x13, array);
__ mv(t0, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
__ jr(t0);
__ mv(t1, Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
__ jr(t1);
__ bind(ok);
}
@ -1313,8 +1313,8 @@ void TemplateTable::idiv() {
// explicitly check for div0
Label no_div0;
__ bnez(x10, no_div0);
__ mv(t0, Interpreter::_throw_ArithmeticException_entry);
__ jr(t0);
__ mv(t1, Interpreter::_throw_ArithmeticException_entry);
__ jr(t1);
__ bind(no_div0);
__ pop_i(x11);
// x10 <== x11 idiv x10
@ -1326,8 +1326,8 @@ void TemplateTable::irem() {
// explicitly check for div0
Label no_div0;
__ bnez(x10, no_div0);
__ mv(t0, Interpreter::_throw_ArithmeticException_entry);
__ jr(t0);
__ mv(t1, Interpreter::_throw_ArithmeticException_entry);
__ jr(t1);
__ bind(no_div0);
__ pop_i(x11);
// x10 <== x11 irem x10
@ -1345,8 +1345,8 @@ void TemplateTable::ldiv() {
// explicitly check for div0
Label no_div0;
__ bnez(x10, no_div0);
__ mv(t0, Interpreter::_throw_ArithmeticException_entry);
__ jr(t0);
__ mv(t1, Interpreter::_throw_ArithmeticException_entry);
__ jr(t1);
__ bind(no_div0);
__ pop_l(x11);
// x10 <== x11 ldiv x10
@ -1358,8 +1358,8 @@ void TemplateTable::lrem() {
// explicitly check for div0
Label no_div0;
__ bnez(x10, no_div0);
__ mv(t0, Interpreter::_throw_ArithmeticException_entry);
__ jr(t0);
__ mv(t1, Interpreter::_throw_ArithmeticException_entry);
__ jr(t1);
__ bind(no_div0);
__ pop_l(x11);
// x10 <== x11 lrem x10
@ -1768,8 +1768,8 @@ void TemplateTable::branch(bool is_jsr, bool is_wide) {
__ andi(sp, esp, -16);
// and begin the OSR nmethod
__ ld(t0, Address(x9, nmethod::osr_entry_point_offset()));
__ jr(t0);
__ ld(t1, Address(x9, nmethod::osr_entry_point_offset()));
__ jr(t1);
}
}
@ -2171,7 +2171,7 @@ void TemplateTable::_return(TosState state) {
void TemplateTable::resolve_cache_and_index_for_method(int byte_no,
Register Rcache,
Register index) {
const Register temp = x9;
const Register temp = x9; // s1
assert_different_registers(Rcache, index, temp);
assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
@ -3962,8 +3962,8 @@ void TemplateTable::wide() {
__ load_unsigned_byte(x9, at_bcp(1));
__ mv(t0, (address)Interpreter::_wentry_point);
__ shadd(t0, x9, t0, t1, 3);
__ ld(t0, Address(t0));
__ jr(t0);
__ ld(t1, Address(t0));
__ jr(t1);
}
// Multi arrays

View File

@ -267,8 +267,8 @@ address UpcallLinker::make_upcall_stub(jobject receiver, Symbol* signature,
__ push_cont_fastpath(xthread);
__ ld(t0, Address(xmethod, Method::from_compiled_offset()));
__ jalr(t0);
__ ld(t1, Address(xmethod, Method::from_compiled_offset()));
__ jalr(t1);
__ pop_cont_fastpath(xthread);

View File

@ -131,8 +131,8 @@ VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
// xmethod: Method*
// x12: receiver
address ame_addr = __ pc();
__ ld(t0, Address(xmethod, Method::from_compiled_offset()));
__ jr(t0);
__ ld(t1, Address(xmethod, Method::from_compiled_offset()));
__ jr(t1);
masm->flush();
bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, 0);
@ -160,6 +160,13 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
MacroAssembler* masm = new MacroAssembler(&cb);
assert_cond(masm != nullptr);
// Real entry arguments:
// t0: CompiledICData
// j_rarg0: Receiver
// Make sure the move of CompiledICData from t0 to t1 is the frist thing that happens.
// Otherwise we risk clobber t0 as it is used as scratch.
__ mv(t1, t0);
#if (!defined(PRODUCT) && defined(COMPILER2))
if (CountCompiledCalls) {
__ la(x18, ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
@ -170,8 +177,8 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// get receiver (need to skip return address on top of stack)
assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
// Entry arguments:
// t1: CompiledICData
// Arguments from this point:
// t1 (moved from t0): CompiledICData
// j_rarg0: Receiver
// This stub is called from compiled code which has no callee-saved registers,
@ -220,8 +227,8 @@ VtableStub* VtableStubs::create_itable_stub(int itable_index) {
// xmethod: Method*
// j_rarg0: receiver
address ame_addr = __ pc();
__ ld(t0, Address(xmethod, Method::from_compiled_offset()));
__ jr(t0);
__ ld(t1, Address(xmethod, Method::from_compiled_offset()));
__ jr(t1);
__ bind(L_no_such_interface);
// Handle IncompatibleClassChangeError in itable stubs.