8293007: riscv: failed to build after JDK-8290025

Reviewed-by: fyang, fjiang, shade
This commit is contained in:
Yadong Wang 2022-08-30 01:17:41 +00:00 committed by Fei Yang
parent 9424d6d487
commit e016363b54
16 changed files with 257 additions and 74 deletions

View File

@ -310,7 +310,7 @@ void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
// Insert nmethod entry barrier into frame.
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(this);
bs->nmethod_entry_barrier(this, NULL /* slow_path */, NULL /* continuation */, NULL /* guard */);
}
void C1_MacroAssembler::remove_frame(int framesize) {

View File

@ -28,6 +28,7 @@
#include "asm/assembler.inline.hpp"
#include "opto/c2_MacroAssembler.hpp"
#include "opto/intrinsicnode.hpp"
#include "opto/output.hpp"
#include "opto/subnode.hpp"
#include "runtime/stubRoutines.hpp"
@ -241,6 +242,35 @@ void C2_MacroAssembler::string_indexof_char(Register str1, Register cnt1,
typedef void (MacroAssembler::* load_chr_insn)(Register rd, const Address &adr, Register temp);
void C2_MacroAssembler::emit_entry_barrier_stub(C2EntryBarrierStub* stub) {
// make guard value 4-byte aligned so that it can be accessed by atomic instructions on riscv
int alignment_bytes = align(4);
bind(stub->slow_path());
int32_t _offset = 0;
movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), _offset);
jalr(ra, t0, _offset);
j(stub->continuation());
bind(stub->guard());
relocate(entry_guard_Relocation::spec());
assert(offset() % 4 == 0, "bad alignment");
emit_int32(0); // nmethod guard value
// make sure the stub with a fixed code size
if (alignment_bytes == 2) {
assert(UseRVC, "bad alignment");
c_nop();
} else {
assert(alignment_bytes == 0, "bad alignment");
nop();
}
}
int C2_MacroAssembler::entry_barrier_stub_size() {
return 8 * 4 + 4; // 4 bytes for alignment margin
}
// Search for needle in haystack and return index or -1
// x10: result
// x11: haystack

View File

@ -36,8 +36,8 @@
VectorRegister vrs,
bool is_latin, Label& DONE);
public:
void emit_entry_barrier_stub(C2EntryBarrierStub* stub) {}
static int entry_barrier_stub_size() { return 0; }
void emit_entry_barrier_stub(C2EntryBarrierStub* stub);
static int entry_barrier_stub_size();
void string_compare(Register str1, Register str2,
Register cnt1, Register cnt2, Register result,

View File

@ -157,12 +157,8 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
__ j(done);
__ bind(runtime);
// save the live input values
RegSet saved = RegSet::of(pre_val);
if (tosca_live) { saved += RegSet::of(x10); }
if (obj != noreg) { saved += RegSet::of(obj); }
__ push_reg(saved, sp);
__ push_call_clobbered_registers();
if (expand_call) {
assert(pre_val != c_rarg1, "smashed arg");
@ -171,7 +167,7 @@ void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
__ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
}
__ pop_reg(saved, sp);
__ pop_call_clobbered_registers();
__ bind(done);

View File

@ -178,38 +178,104 @@ void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm,
__ sd(tmp1, Address(xthread, in_bytes(JavaThread::allocated_bytes_offset())));
}
void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm) {
static volatile uint32_t _patching_epoch = 0;
address BarrierSetAssembler::patching_epoch_addr() {
return (address)&_patching_epoch;
}
void BarrierSetAssembler::increment_patching_epoch() {
Atomic::inc(&_patching_epoch);
}
void BarrierSetAssembler::clear_patching_epoch() {
_patching_epoch = 0;
}
void BarrierSetAssembler::nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard) {
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
if (bs_nm == NULL) {
return;
}
// RISCV atomic operations require that the memory address be naturally aligned.
__ align(4);
Label local_guard;
NMethodPatchingType patching_type = nmethod_patching_type();
Label skip, guard;
Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()));
if (slow_path == NULL) {
guard = &local_guard;
__ lwu(t0, guard);
// RISCV atomic operations require that the memory address be naturally aligned.
__ align(4);
}
// Subsequent loads of oops must occur after load of guard value.
// BarrierSetNMethod::disarm sets guard with release semantics.
__ membar(MacroAssembler::LoadLoad);
__ lwu(t1, thread_disarmed_addr);
__ beq(t0, t1, skip);
__ lwu(t0, *guard);
int32_t offset = 0;
__ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset);
__ jalr(ra, t0, offset);
__ j(skip);
switch (patching_type) {
case NMethodPatchingType::conc_data_patch:
// Subsequent loads of oops must occur after load of guard value.
// BarrierSetNMethod::disarm sets guard with release semantics.
__ membar(MacroAssembler::LoadLoad); // fall through to stw_instruction_and_data_patch
case NMethodPatchingType::stw_instruction_and_data_patch:
{
// With STW patching, no data or instructions are updated concurrently,
// which means there isn't really any need for any fencing for neither
// data nor instruction modification happening concurrently. The
// instruction patching is synchronized with global icache_flush() by
// the write hart on riscv. So here we can do a plain conditional
// branch with no fencing.
Address thread_disarmed_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()));
__ lwu(t1, thread_disarmed_addr);
break;
}
case NMethodPatchingType::conc_instruction_and_data_patch:
{
// If we patch code we need both a code patching and a loadload
// fence. It's not super cheap, so we use a global epoch mechanism
// to hide them in a slow path.
// The high level idea of the global epoch mechanism is to detect
// when any thread has performed the required fencing, after the
// last nmethod was disarmed. This implies that the required
// fencing has been performed for all preceding nmethod disarms
// as well. Therefore, we do not need any further fencing.
__ la(t1, ExternalAddress((address)&_patching_epoch));
// Embed an artificial data dependency to order the guard load
// before the epoch load.
__ srli(ra, t0, 32);
__ orr(t1, t1, ra);
// Read the global epoch value.
__ lwu(t1, t1);
// Combine the guard value (low order) with the epoch value (high order).
__ slli(t1, t1, 32);
__ orr(t0, t0, t1);
// Compare the global values with the thread-local values
Address thread_disarmed_and_epoch_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()));
__ ld(t1, thread_disarmed_and_epoch_addr);
break;
}
default:
ShouldNotReachHere();
}
__ bind(guard);
if (slow_path == NULL) {
Label skip_barrier;
__ beq(t0, t1, skip_barrier);
assert(__ offset() % 4 == 0, "bad alignment");
__ emit_int32(0); // nmethod guard value. Skipped over in common case.
int32_t offset = 0;
__ movptr_with_offset(t0, StubRoutines::riscv::method_entry_barrier(), offset);
__ jalr(ra, t0, offset);
__ j(skip_barrier);
__ bind(skip);
__ bind(local_guard);
assert(__ offset() % 4 == 0, "bad alignment");
__ emit_int32(0); // nmethod guard value. Skipped over in common case.
__ bind(skip_barrier);
} else {
__ beq(t0, t1, *continuation);
__ j(*slow_path);
__ bind(*continuation);
}
}
void BarrierSetAssembler::c2i_entry_barrier(MacroAssembler* masm) {

View File

@ -32,6 +32,12 @@
#include "memory/allocation.hpp"
#include "oops/access.hpp"
enum class NMethodPatchingType {
stw_instruction_and_data_patch,
conc_instruction_and_data_patch,
conc_data_patch
};
class BarrierSetAssembler: public CHeapObj<mtGC> {
private:
void incr_allocated_bytes(MacroAssembler* masm,
@ -63,9 +69,14 @@ public:
virtual void barrier_stubs_init() {}
virtual void nmethod_entry_barrier(MacroAssembler* masm);
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::stw_instruction_and_data_patch; }
virtual void nmethod_entry_barrier(MacroAssembler* masm, Label* slow_path, Label* continuation, Label* guard);
virtual void c2i_entry_barrier(MacroAssembler* masm);
virtual ~BarrierSetAssembler() {}
static address patching_epoch_addr();
static void clear_patching_epoch();
static void increment_patching_epoch();
};
#endif // CPU_RISCV_GC_SHARED_BARRIERSETASSEMBLER_RISCV_HPP

View File

@ -26,6 +26,7 @@
#include "precompiled.hpp"
#include "code/codeCache.hpp"
#include "code/nativeInst.hpp"
#include "gc/shared/barrierSetAssembler.hpp"
#include "gc/shared/barrierSetNMethod.hpp"
#include "logging/log.hpp"
#include "memory/resourceArea.hpp"
@ -36,21 +37,57 @@
#include "utilities/align.hpp"
#include "utilities/debug.hpp"
static int slow_path_size(nmethod* nm) {
// The slow path code is out of line with C2.
// Leave a jal to the stub in the fast path.
return nm->is_compiled_by_c2() ? 1 : 8;
}
static int entry_barrier_offset(nmethod* nm) {
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
switch (bs_asm->nmethod_patching_type()) {
case NMethodPatchingType::stw_instruction_and_data_patch:
return -4 * (4 + slow_path_size(nm));
case NMethodPatchingType::conc_data_patch:
return -4 * (5 + slow_path_size(nm));
case NMethodPatchingType::conc_instruction_and_data_patch:
return -4 * (15 + slow_path_size(nm));
}
ShouldNotReachHere();
return 0;
}
class NativeNMethodBarrier: public NativeInstruction {
address instruction_address() const { return addr_at(0); }
int *guard_addr() {
/* auipc + lwu + fence + lwu + beq + lui + addi + slli + addi + slli + jalr + j */
return reinterpret_cast<int*>(instruction_address() + 12 * 4);
int local_guard_offset(nmethod* nm) {
// It's the last instruction
return (-entry_barrier_offset(nm)) - 4;
}
int *guard_addr(nmethod* nm) {
if (nm->is_compiled_by_c2()) {
// With c2 compiled code, the guard is out-of-line in a stub
// We find it using the RelocIterator.
RelocIterator iter(nm);
while (iter.next()) {
if (iter.type() == relocInfo::entry_guard_type) {
entry_guard_Relocation* const reloc = iter.entry_guard_reloc();
return reinterpret_cast<int*>(reloc->addr());
}
}
ShouldNotReachHere();
}
return reinterpret_cast<int*>(instruction_address() + local_guard_offset(nm));
}
public:
int get_value() {
return Atomic::load_acquire(guard_addr());
int get_value(nmethod* nm) {
return Atomic::load_acquire(guard_addr(nm));
}
void set_value(int value) {
Atomic::release_store(guard_addr(), value);
void set_value(nmethod* nm, int value) {
Atomic::release_store(guard_addr(nm), value);
}
void verify() const;
@ -64,21 +101,12 @@ struct CheckInsn {
};
static const struct CheckInsn barrierInsn[] = {
{ 0x00000fff, 0x00000297, "auipc t0, 0 "},
{ 0x000fffff, 0x0002e283, "lwu t0, 48(t0) "},
{ 0xffffffff, 0x0aa0000f, "fence ir, ir "},
{ 0x000fffff, 0x000be303, "lwu t1, 112(xthread)"},
{ 0x01fff07f, 0x00628063, "beq t0, t1, skip "},
{ 0x00000fff, 0x000002b7, "lui t0, imm0 "},
{ 0x000fffff, 0x00028293, "addi t0, t0, imm1 "},
{ 0xffffffff, 0x00b29293, "slli t0, t0, 11 "},
{ 0x000fffff, 0x00028293, "addi t0, t0, imm2 "},
{ 0xffffffff, 0x00629293, "slli t0, t0, 6 "},
{ 0x000fffff, 0x000280e7, "jalr ra, imm3(t0) "},
{ 0x00000fff, 0x0000006f, "j skip "}
{ 0x00000fff, 0x00000297, "auipc t0, 0 "},
{ 0x000fffff, 0x0002e283, "lwu t0, guard_offset(t0) "},
/* ...... */
/* ...... */
/* guard: */
/* 32bit nmethod guard value */
/* skip: */
};
// The encodings must match the instructions emitted by
@ -136,19 +164,8 @@ void BarrierSetNMethod::deoptimize(nmethod* nm, address* return_address_ptr) {
new_frame->pc = SharedRuntime::get_handle_wrong_method_stub();
}
// This is the offset of the entry barrier from where the frame is completed.
// If any code changes between the end of the verified entry where the entry
// barrier resides, and the completion of the frame, then
// NativeNMethodCmpBarrier::verify() will immediately complain when it does
// not find the expected native instruction at this offset, which needs updating.
// Note that this offset is invariant of PreserveFramePointer.
// see BarrierSetAssembler::nmethod_entry_barrier
// auipc + lwu + fence + lwu + beq + movptr_with_offset(5 instructions) + jalr + j + int32
static const int entry_barrier_offset = -4 * 13;
static NativeNMethodBarrier* native_nmethod_barrier(nmethod* nm) {
address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset;
address barrier_address = nm->code_begin() + nm->frame_complete_offset() + entry_barrier_offset(nm);
NativeNMethodBarrier* barrier = reinterpret_cast<NativeNMethodBarrier*>(barrier_address);
debug_only(barrier->verify());
return barrier;
@ -159,14 +176,39 @@ void BarrierSetNMethod::disarm(nmethod* nm) {
return;
}
// Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
// The patching epoch is incremented before the nmethod is disarmed. Disarming
// is performed with a release store. In the nmethod entry barrier, the values
// are read in the opposite order, such that the load of the nmethod guard
// acquires the patching epoch. This way, the guard is guaranteed to block
// entries to the nmethod, util it has safely published the requirement for
// further fencing by mutators, before they are allowed to enter.
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
bs_asm->increment_patching_epoch();
barrier->set_value(disarmed_value());
// Disarms the nmethod guard emitted by BarrierSetAssembler::nmethod_entry_barrier.
// Symmetric "LD; FENCE IR, IR" is in the nmethod barrier.
NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
barrier->set_value(nm, disarmed_value());
}
void BarrierSetNMethod::arm(nmethod* nm, int arm_value) {
Unimplemented();
if (!supports_entry_barrier(nm)) {
return;
}
if (arm_value == disarmed_value()) {
// The patching epoch is incremented before the nmethod is disarmed. Disarming
// is performed with a release store. In the nmethod entry barrier, the values
// are read in the opposite order, such that the load of the nmethod guard
// acquires the patching epoch. This way, the guard is guaranteed to block
// entries to the nmethod, until it has safely published the requirement for
// further fencing by mutators, before they are allowed to enter.
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
bs_asm->increment_patching_epoch();
}
NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
barrier->set_value(nm, arm_value);
}
bool BarrierSetNMethod::is_armed(nmethod* nm) {
@ -175,5 +217,5 @@ bool BarrierSetNMethod::is_armed(nmethod* nm) {
}
NativeNMethodBarrier* barrier = native_nmethod_barrier(nm);
return barrier->get_value() != disarmed_value();
return barrier->get_value(nm) != disarmed_value();
}

View File

@ -63,6 +63,8 @@ public:
void iu_barrier(MacroAssembler* masm, Register dst, Register tmp);
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::conc_data_patch; }
#ifdef COMPILER1
void gen_pre_barrier_stub(LIR_Assembler* ce, ShenandoahPreBarrierStub* stub);
void gen_load_reference_barrier_stub(LIR_Assembler* ce, ShenandoahLoadReferenceBarrierStub* stub);

View File

@ -78,6 +78,8 @@ public:
Register tmp,
Label& slowpath);
virtual NMethodPatchingType nmethod_patching_type() { return NMethodPatchingType::conc_data_patch; }
#ifdef COMPILER1
void generate_c1_load_barrier_test(LIR_Assembler* ce,
LIR_Opr ref) const;

View File

@ -88,9 +88,11 @@ static void pass_arg3(MacroAssembler* masm, Register arg) {
}
}
void MacroAssembler::align(int modulus, int extra_offset) {
int MacroAssembler::align(int modulus, int extra_offset) {
CompressibleRegion cr(this);
intptr_t before = offset();
while ((offset() + extra_offset) % modulus != 0) { nop(); }
return (int)(offset() - before);
}
void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
@ -1667,7 +1669,9 @@ void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
// nmethod entry barrier necessitate using the constant pool. They have to be
// ordered with respected to oop access.
if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL || !immediate) {
// Using immediate literals would necessitate fence.i.
BarrierSet* bs = BarrierSet::barrier_set();
if ((bs->barrier_set_nmethod() != NULL && bs->barrier_set_assembler()->nmethod_patching_type() == NMethodPatchingType::conc_data_patch) || !immediate) {
address dummy = address(uintptr_t(pc()) & -wordSize); // A nearby aligned address
ld_constant(dst, Address(dummy, rspec));
} else

View File

@ -47,7 +47,7 @@ class MacroAssembler: public Assembler {
void safepoint_poll(Label& slow_path, bool at_return, bool acquire, bool in_nmethod);
// Alignment
void align(int modulus, int extra_offset = 0);
int align(int modulus, int extra_offset = 0);
// Stack frame creation/removal
// Note that SP must be updated to the right place before saving/restoring RA and FP

View File

@ -1342,7 +1342,24 @@ void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
if (C->stub_function() == NULL) {
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
bs->nmethod_entry_barrier(&_masm);
if (BarrierSet::barrier_set()->barrier_set_nmethod() != NULL) {
// Dummy labels for just measuring the code size
Label dummy_slow_path;
Label dummy_continuation;
Label dummy_guard;
Label* slow_path = &dummy_slow_path;
Label* continuation = &dummy_continuation;
Label* guard = &dummy_guard;
if (!Compile::current()->output()->in_scratch_emit_size()) {
// Use real labels from actual stub when not emitting code for purpose of measuring its size
C2EntryBarrierStub* stub = Compile::current()->output()->entry_barrier_table()->add_entry_barrier();
slow_path = &stub->slow_path();
continuation = &stub->continuation();
guard = &stub->guard();
}
// In the C2 code, we move the non-hot part of nmethod entry barriers out-of-line to a stub.
bs->nmethod_entry_barrier(&_masm, slow_path, continuation, guard);
}
}
if (VerifyStackAtCalls) {

View File

@ -1325,7 +1325,7 @@ nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
assert_cond(bs != NULL);
bs->nmethod_entry_barrier(masm);
bs->nmethod_entry_barrier(masm, NULL /* slow_path */, NULL /* continuation */, NULL /* guard */);
// Frame is now completed as far as size and linkage.
int frame_complete = ((intptr_t)__ pc()) - start;

View File

@ -2341,6 +2341,17 @@ class StubGenerator: public StubCodeGenerator {
address start = __ pc();
BarrierSetAssembler* bs_asm = BarrierSet::barrier_set()->barrier_set_assembler();
if (bs_asm->nmethod_patching_type() == NMethodPatchingType::conc_instruction_and_data_patch) {
BarrierSetNMethod* bs_nm = BarrierSet::barrier_set()->barrier_set_nmethod();
Address thread_epoch_addr(xthread, in_bytes(bs_nm->thread_disarmed_offset()) + 4);
__ la(t1, ExternalAddress(bs_asm->patching_epoch_addr()));
__ lwu(t1, t1);
__ sw(t1, thread_epoch_addr);
__ membar(__ LoadLoad);
}
__ set_last_Java_frame(sp, fp, ra, t0);
__ enter();

View File

@ -130,9 +130,11 @@ void BarrierSetNMethod::arm_all_nmethods() {
BarrierSetNMethodArmClosure cl(_current_phase);
Threads::threads_do(&cl);
#if defined(AARCH64) || defined(RISCV)
// We clear the patching epoch when disarming nmethods, so that
// the counter won't overflow.
AARCH64_PORT_ONLY(BarrierSetAssembler::clear_patching_epoch());
BarrierSetAssembler::clear_patching_epoch();
#endif
}
int BarrierSetNMethod::nmethod_stub_entry_barrier(address* return_address_ptr) {

View File

@ -118,7 +118,7 @@ public:
class C2EntryBarrierStub: public ResourceObj {
Label _slow_path;
Label _continuation;
Label _guard; // Used on AArch64
Label _guard; // Used on AArch64 and RISCV
public:
C2EntryBarrierStub() :