diff --git a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp index 547e80c7e47..dc1a3d443ac 100644 --- a/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c1_MacroAssembler_riscv.cpp @@ -70,7 +70,7 @@ int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr } if (LockingMode == LM_LIGHTWEIGHT) { - lightweight_lock(obj, hdr, temp, t1, slow_case); + lightweight_lock(disp_hdr, obj, hdr, temp, t1, slow_case); } else if (LockingMode == LM_LEGACY) { Label done; // Load object header diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp index 8322b35e205..a75bfdfc9dc 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.cpp @@ -253,12 +253,13 @@ void C2_MacroAssembler::fast_unlock(Register objectReg, Register boxReg, // C2 uses the value of flag (0 vs !0) to determine the continuation. } -void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Register tmp2, Register tmp3) { +void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register box, + Register tmp1, Register tmp2, Register tmp3) { // Flag register, zero for success; non-zero for failure. Register flag = t1; assert(LockingMode == LM_LIGHTWEIGHT, "must be"); - assert_different_registers(obj, tmp1, tmp2, tmp3, flag, t0); + assert_different_registers(obj, box, tmp1, tmp2, tmp3, flag, t0); mv(flag, 1); @@ -269,6 +270,11 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Regis // Finish fast lock unsuccessfully. slow_path MUST branch to with flag != 0 Label slow_path; + if (UseObjectMonitorTable) { + // Clear cache in case fast locking succeeds. + sd(zr, Address(box, BasicLock::object_monitor_cache_offset_in_bytes())); + } + if (DiagnoseSyncOnValueBasedClasses != 0) { load_klass(tmp1, obj); lwu(tmp1, Address(tmp1, Klass::access_flags_offset())); @@ -277,6 +283,7 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Regis } const Register tmp1_mark = tmp1; + const Register tmp3_t = tmp3; { // Lightweight locking @@ -284,7 +291,6 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Regis Label push; const Register tmp2_top = tmp2; - const Register tmp3_t = tmp3; // Check if lock-stack is full. lwu(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset())); @@ -323,29 +329,67 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Regis { // Handle inflated monitor. bind(inflated); + const Register tmp1_monitor = tmp1; if (!UseObjectMonitorTable) { - // mark contains the tagged ObjectMonitor*. - const Register tmp1_tagged_monitor = tmp1_mark; - const uintptr_t monitor_tag = markWord::monitor_value; - const Register tmp2_owner_addr = tmp2; - const Register tmp3_owner = tmp3; - - // Compute owner address. - la(tmp2_owner_addr, Address(tmp1_tagged_monitor, (in_bytes(ObjectMonitor::owner_offset()) - monitor_tag))); - - // CAS owner (null => current thread). - cmpxchg(/*addr*/ tmp2_owner_addr, /*expected*/ zr, /*new*/ xthread, Assembler::int64, - /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ tmp3_owner); - beqz(tmp3_owner, locked); - - // Check if recursive. - bne(tmp3_owner, xthread, slow_path); - - // Recursive. - increment(Address(tmp1_tagged_monitor, in_bytes(ObjectMonitor::recursions_offset()) - monitor_tag), 1, tmp2, tmp3); + assert(tmp1_monitor == tmp1_mark, "should be the same here"); } else { - // OMCache lookup not supported yet. Take the slowpath. + Label monitor_found; + + // Load cache address + la(tmp3_t, Address(xthread, JavaThread::om_cache_oops_offset())); + + const int num_unrolled = 2; + for (int i = 0; i < num_unrolled; i++) { + ld(tmp1, Address(tmp3_t)); + beq(obj, tmp1, monitor_found); + add(tmp3_t, tmp3_t, in_bytes(OMCache::oop_to_oop_difference())); + } + + Label loop; + + // Search for obj in cache. + bind(loop); + + // Check for match. + ld(tmp1, Address(tmp3_t)); + beq(obj, tmp1, monitor_found); + + // Search until null encountered, guaranteed _null_sentinel at end. + add(tmp3_t, tmp3_t, in_bytes(OMCache::oop_to_oop_difference())); + bnez(tmp1, loop); + // Cache Miss. Take the slowpath. j(slow_path); + + bind(monitor_found); + ld(tmp1_monitor, Address(tmp3_t, OMCache::oop_to_monitor_difference())); + } + + const Register tmp2_owner_addr = tmp2; + const Register tmp3_owner = tmp3; + + const ByteSize monitor_tag = in_ByteSize(UseObjectMonitorTable ? 0 : checked_cast(markWord::monitor_value)); + const Address owner_address(tmp1_monitor, ObjectMonitor::owner_offset() - monitor_tag); + const Address recursions_address(tmp1_monitor, ObjectMonitor::recursions_offset() - monitor_tag); + + Label monitor_locked; + + // Compute owner address. + la(tmp2_owner_addr, owner_address); + + // CAS owner (null => current thread). + cmpxchg(/*addr*/ tmp2_owner_addr, /*expected*/ zr, /*new*/ xthread, Assembler::int64, + /*acquire*/ Assembler::aq, /*release*/ Assembler::relaxed, /*result*/ tmp3_owner); + beqz(tmp3_owner, monitor_locked); + + // Check if recursive. + bne(tmp3_owner, xthread, slow_path); + + // Recursive. + increment(recursions_address, 1, tmp2, tmp3); + + bind(monitor_locked); + if (UseObjectMonitorTable) { + sd(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes())); } } @@ -370,18 +414,18 @@ void C2_MacroAssembler::fast_lock_lightweight(Register obj, Register tmp1, Regis // C2 uses the value of flag (0 vs !0) to determine the continuation. } -void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Register tmp2, - Register tmp3) { +void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register box, + Register tmp1, Register tmp2, Register tmp3) { // Flag register, zero for success; non-zero for failure. Register flag = t1; assert(LockingMode == LM_LIGHTWEIGHT, "must be"); - assert_different_registers(obj, tmp1, tmp2, tmp3, flag, t0); + assert_different_registers(obj, box, tmp1, tmp2, tmp3, flag, t0); mv(flag, 1); // Handle inflated monitor. - Label inflated, inflated_load_monitor; + Label inflated, inflated_load_mark; // Finish fast unlock successfully. unlocked MUST branch to with flag == 0 Label unlocked; // Finish fast unlock unsuccessfully. MUST branch to with flag != 0 @@ -392,6 +436,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg const Register tmp3_t = tmp3; { // Lightweight unlock + Label push_and_slow_path; // Check if obj is top of lock-stack. lwu(tmp2_top, Address(xthread, JavaThread::lock_stack_top_offset())); @@ -399,7 +444,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg add(tmp3_t, xthread, tmp2_top); ld(tmp3_t, Address(tmp3_t)); // Top of lock stack was not obj. Must be monitor. - bne(obj, tmp3_t, inflated_load_monitor); + bne(obj, tmp3_t, inflated_load_mark); // Pop lock-stack. DEBUG_ONLY(add(tmp3_t, xthread, tmp2_top);) @@ -416,8 +461,11 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg ld(tmp1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); // Check header for monitor (0b10). + // Because we got here by popping (meaning we pushed in locked) + // there will be no monitor in the box. So we need to push back the obj + // so that the runtime can fix any potential anonymous owner. test_bit(tmp3_t, tmp1_mark, exact_log2(markWord::monitor_value)); - bnez(tmp3_t, inflated); + bnez(tmp3_t, UseObjectMonitorTable ? push_and_slow_path : inflated); // Try to unlock. Transition lock bits 0b00 => 0b01 assert(oopDesc::mark_offset_in_bytes() == 0, "required to avoid lea"); @@ -426,6 +474,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg /*acquire*/ Assembler::relaxed, /*release*/ Assembler::rl, /*result*/ tmp3_t); beq(tmp1_mark, tmp3_t, unlocked); + bind(push_and_slow_path); // Compare and exchange failed. // Restore lock-stack and handle the unlock in runtime. DEBUG_ONLY(add(tmp3_t, xthread, tmp2_top);) @@ -436,7 +485,7 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg } { // Handle inflated monitor. - bind(inflated_load_monitor); + bind(inflated_load_mark); ld(tmp1_mark, Address(obj, oopDesc::mark_offset_in_bytes())); #ifdef ASSERT test_bit(tmp3_t, tmp1_mark, exact_log2(markWord::monitor_value)); @@ -458,54 +507,55 @@ void C2_MacroAssembler::fast_unlock_lightweight(Register obj, Register tmp1, Reg bind(check_done); #endif + const Register tmp1_monitor = tmp1; + if (!UseObjectMonitorTable) { - // mark contains the tagged ObjectMonitor*. - const Register tmp1_monitor = tmp1_mark; - const uintptr_t monitor_tag = markWord::monitor_value; - + assert(tmp1_monitor == tmp1_mark, "should be the same here"); // Untag the monitor. - sub(tmp1_monitor, tmp1_mark, monitor_tag); - - const Register tmp2_recursions = tmp2; - Label not_recursive; - - // Check if recursive. - ld(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset())); - beqz(tmp2_recursions, not_recursive); - - // Recursive unlock. - addi(tmp2_recursions, tmp2_recursions, -1); - sd(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset())); - j(unlocked); - - bind(not_recursive); - - Label release; - const Register tmp2_owner_addr = tmp2; - - // Compute owner address. - la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset())); - - // Check if the entry lists are empty. - ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset())); - ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset())); - orr(t0, t0, tmp3_t); - beqz(t0, release); - - // The owner may be anonymous and we removed the last obj entry in - // the lock-stack. This loses the information about the owner. - // Write the thread to the owner field so the runtime knows the owner. - sd(xthread, Address(tmp2_owner_addr)); - j(slow_path); - - bind(release); - // Set owner to null. - membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); - sd(zr, Address(tmp2_owner_addr)); + add(tmp1_monitor, tmp1_mark, -(int)markWord::monitor_value); } else { - // OMCache lookup not supported yet. Take the slowpath. - j(slow_path); + ld(tmp1_monitor, Address(box, BasicLock::object_monitor_cache_offset_in_bytes())); + // No valid pointer below alignof(ObjectMonitor*). Take the slow path. + mv(tmp3_t, alignof(ObjectMonitor*)); + bltu(tmp1_monitor, tmp3_t, slow_path); } + + const Register tmp2_recursions = tmp2; + Label not_recursive; + + // Check if recursive. + ld(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset())); + beqz(tmp2_recursions, not_recursive); + + // Recursive unlock. + addi(tmp2_recursions, tmp2_recursions, -1); + sd(tmp2_recursions, Address(tmp1_monitor, ObjectMonitor::recursions_offset())); + j(unlocked); + + bind(not_recursive); + + Label release; + const Register tmp2_owner_addr = tmp2; + + // Compute owner address. + la(tmp2_owner_addr, Address(tmp1_monitor, ObjectMonitor::owner_offset())); + + // Check if the entry lists are empty. + ld(t0, Address(tmp1_monitor, ObjectMonitor::EntryList_offset())); + ld(tmp3_t, Address(tmp1_monitor, ObjectMonitor::cxq_offset())); + orr(t0, t0, tmp3_t); + beqz(t0, release); + + // The owner may be anonymous and we removed the last obj entry in + // the lock-stack. This loses the information about the owner. + // Write the thread to the owner field so the runtime knows the owner. + sd(xthread, Address(tmp2_owner_addr)); + j(slow_path); + + bind(release); + // Set owner to null. + membar(MacroAssembler::LoadStore | MacroAssembler::StoreStore); + sd(zr, Address(tmp2_owner_addr)); } bind(unlocked); diff --git a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp index 07041fe0850..4d7f756923c 100644 --- a/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/c2_MacroAssembler_riscv.hpp @@ -47,8 +47,8 @@ void fast_lock(Register object, Register box, Register tmp1, Register tmp2, Register tmp3); void fast_unlock(Register object, Register box, Register tmp1, Register tmp2); // Code used by cmpFastLockLightweight and cmpFastUnlockLightweight mach instructions in .ad file. - void fast_lock_lightweight(Register object, Register tmp1, Register tmp2, Register tmp3); - void fast_unlock_lightweight(Register object, Register tmp1, Register tmp2, Register tmp3); + void fast_lock_lightweight(Register object, Register box, Register tmp1, Register tmp2, Register tmp3); + void fast_unlock_lightweight(Register object, Register box, Register tmp1, Register tmp2, Register tmp3); void string_compare(Register str1, Register str2, Register cnt1, Register cnt2, Register result, diff --git a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp index 17b75b30264..06b7b780d13 100644 --- a/src/hotspot/cpu/riscv/interp_masm_riscv.cpp +++ b/src/hotspot/cpu/riscv/interp_masm_riscv.cpp @@ -756,7 +756,7 @@ void InterpreterMacroAssembler::lock_object(Register lock_reg) } if (LockingMode == LM_LIGHTWEIGHT) { - lightweight_lock(obj_reg, tmp, tmp2, tmp3, slow_case); + lightweight_lock(lock_reg, obj_reg, tmp, tmp2, tmp3, slow_case); j(count); } else if (LockingMode == LM_LEGACY) { // Load (object->mark() | 1) into swap_reg diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp index 32ccba6b0ce..c5516336ebc 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.cpp @@ -5792,9 +5792,9 @@ void MacroAssembler::test_bit(Register Rd, Register Rs, uint32_t bit_pos) { // - obj: the object to be locked // - tmp1, tmp2, tmp3: temporary registers, will be destroyed // - slow: branched to if locking fails -void MacroAssembler::lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { +void MacroAssembler::lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow) { assert(LockingMode == LM_LIGHTWEIGHT, "only used with new lightweight locking"); - assert_different_registers(obj, tmp1, tmp2, tmp3, t0); + assert_different_registers(basic_lock, obj, tmp1, tmp2, tmp3, t0); Label push; const Register top = tmp1; @@ -5805,6 +5805,11 @@ void MacroAssembler::lightweight_lock(Register obj, Register tmp1, Register tmp2 // instruction emitted as it is part of C1's null check semantics. ld(mark, Address(obj, oopDesc::mark_offset_in_bytes())); + if (UseObjectMonitorTable) { + // Clear cache in case fast locking succeeds. + sd(zr, Address(basic_lock, BasicObjectLock::lock_offset() + in_ByteSize((BasicLock::object_monitor_cache_offset_in_bytes())))); + } + // Check if the lock-stack is full. lwu(top, Address(xthread, JavaThread::lock_stack_top_offset())); mv(t, (unsigned)LockStack::end_offset()); diff --git a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp index 3c1add90367..b0404929f46 100644 --- a/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp +++ b/src/hotspot/cpu/riscv/macroAssembler_riscv.hpp @@ -1602,7 +1602,7 @@ private: void store_conditional(Register dst, Register new_val, Register addr, enum operand_size size, Assembler::Aqrl release); public: - void lightweight_lock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow); + void lightweight_lock(Register basic_lock, Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow); void lightweight_unlock(Register obj, Register tmp1, Register tmp2, Register tmp3, Label& slow); public: diff --git a/src/hotspot/cpu/riscv/riscv.ad b/src/hotspot/cpu/riscv/riscv.ad index d3e2b0549e9..db010c9c6c8 100644 --- a/src/hotspot/cpu/riscv/riscv.ad +++ b/src/hotspot/cpu/riscv/riscv.ad @@ -10553,33 +10553,33 @@ instruct cmpFastUnlock(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iR ins_pipe(pipe_serial); %} -instruct cmpFastLockLightweight(rFlagsReg cr, iRegP object, iRegP_R10 box, iRegPNoSp tmp1, iRegPNoSp tmp2) +instruct cmpFastLockLightweight(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) %{ predicate(LockingMode == LM_LIGHTWEIGHT); match(Set cr (FastLock object box)); - effect(TEMP tmp1, TEMP tmp2, USE_KILL box); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); ins_cost(10 * DEFAULT_COST); - format %{ "fastlock $object,$box\t! kills $box,$tmp1,$tmp2 #@cmpFastLockLightweight" %} + format %{ "fastlock $object,$box\t! kills $tmp1,$tmp2,$tmp3 #@cmpFastLockLightweight" %} ins_encode %{ - __ fast_lock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register); + __ fast_lock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); %} ins_pipe(pipe_serial); %} -instruct cmpFastUnlockLightweight(rFlagsReg cr, iRegP object, iRegP_R10 box, iRegPNoSp tmp1, iRegPNoSp tmp2) +instruct cmpFastUnlockLightweight(rFlagsReg cr, iRegP object, iRegP box, iRegPNoSp tmp1, iRegPNoSp tmp2, iRegPNoSp tmp3) %{ predicate(LockingMode == LM_LIGHTWEIGHT); match(Set cr (FastUnlock object box)); - effect(TEMP tmp1, TEMP tmp2, USE_KILL box); + effect(TEMP tmp1, TEMP tmp2, TEMP tmp3); ins_cost(10 * DEFAULT_COST); - format %{ "fastunlock $object,$box\t! kills $box,$tmp1,$tmp2, #@cmpFastUnlockLightweight" %} + format %{ "fastunlock $object,$box\t! kills $tmp1,$tmp2,$tmp3 #@cmpFastUnlockLightweight" %} ins_encode %{ - __ fast_unlock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register); + __ fast_unlock_lightweight($object$$Register, $box$$Register, $tmp1$$Register, $tmp2$$Register, $tmp3$$Register); %} ins_pipe(pipe_serial); diff --git a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp index bed24e442e8..ffd904aed47 100644 --- a/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp +++ b/src/hotspot/cpu/riscv/sharedRuntime_riscv.cpp @@ -1702,7 +1702,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm, __ bnez(swap_reg, slow_path_lock); } else { assert(LockingMode == LM_LIGHTWEIGHT, "must be"); - __ lightweight_lock(obj_reg, swap_reg, tmp, lock_tmp, slow_path_lock); + __ lightweight_lock(lock_reg, obj_reg, swap_reg, tmp, lock_tmp, slow_path_lock); } __ bind(count); diff --git a/src/hotspot/share/runtime/basicLock.inline.hpp b/src/hotspot/share/runtime/basicLock.inline.hpp index fb1cee8de8f..c04c8e5b117 100644 --- a/src/hotspot/share/runtime/basicLock.inline.hpp +++ b/src/hotspot/share/runtime/basicLock.inline.hpp @@ -39,7 +39,7 @@ inline void BasicLock::set_displaced_header(markWord header) { inline ObjectMonitor* BasicLock::object_monitor_cache() const { assert(UseObjectMonitorTable, "must be"); -#if defined(X86) || defined(AARCH64) +#if defined(X86) || defined(AARCH64) || defined(RISCV64) return reinterpret_cast(get_metadata()); #else // Other platforms do not make use of the cache yet,