8345067: C2: enable implicit null checks for ZGC reads

Reviewed-by: aboldtch, kvn, epeter
This commit is contained in:
Roberto Castañeda Lozano 2025-06-09 06:23:17 +00:00
parent 6c616c71ec
commit 91f12600d2
16 changed files with 363 additions and 54 deletions

View File

@ -3921,6 +3921,10 @@ ins_attrib ins_alignment(4); // Required alignment attribute (must
// compute_padding() function must be
// provided for the instruction
// Whether this node is expanded during code emission into a sequence of
// instructions and the first instruction can perform an implicit null check.
ins_attrib ins_is_late_expanded_null_check_candidate(false);
//----------OPERANDS-----------------------------------------------------------
// Operand definitions must precede instruction definitions for correct parsing
// in the ADLC because operands constitute user defined types which are used in

View File

@ -106,6 +106,13 @@ instruct zLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
match(Set dst (LoadP mem));
predicate(UseZGC && !needs_acquiring_load(n) && n->as_Load()->barrier_data() != 0);
effect(TEMP dst, KILL cr);
// The main load is a candidate to implement implicit null checks, as long as
// legitimize_address() does not require a preceding lea instruction to
// materialize the memory operand. The absence of a preceding lea instruction
// is guaranteed for immLoffset8 memory operands, because these do not lead to
// out-of-range offsets (see definition of immLoffset8). Fortunately,
// immLoffset8 memory operands are the most common ones in practice.
ins_is_late_expanded_null_check_candidate(opnd_array(1)->opcode() == INDOFFL8);
ins_cost(4 * INSN_COST);
@ -117,7 +124,11 @@ instruct zLoadP(iRegPNoSp dst, memory8 mem, rFlagsReg cr)
// Fix up any out-of-range offsets.
assert_different_registers(rscratch2, as_Register($mem$$base));
assert_different_registers(rscratch2, $dst$$Register);
ref_addr = __ legitimize_address(ref_addr, 8, rscratch2);
int size = 8;
assert(!this->is_late_expanded_null_check_candidate() ||
!MacroAssembler::legitimize_address_requires_lea(ref_addr, size),
"an instruction that can be used for implicit null checking should emit the candidate memory access first");
ref_addr = __ legitimize_address(ref_addr, size, rscratch2);
}
__ ldr($dst$$Register, ref_addr);
z_load_barrier(masm, this, ref_addr, $dst$$Register, rscratch1);

View File

@ -129,17 +129,22 @@ class MacroAssembler: public Assembler {
a.lea(this, r);
}
// Whether materializing the given address for a LDR/STR requires an
// additional lea instruction.
static bool legitimize_address_requires_lea(const Address &a, int size) {
return a.getMode() == Address::base_plus_offset &&
!Address::offset_ok_for_immed(a.offset(), exact_log2(size));
}
/* Sometimes we get misaligned loads and stores, usually from Unsafe
accesses, and these can exceed the offset range. */
Address legitimize_address(const Address &a, int size, Register scratch) {
if (a.getMode() == Address::base_plus_offset) {
if (! Address::offset_ok_for_immed(a.offset(), exact_log2(size))) {
if (legitimize_address_requires_lea(a, size)) {
block_comment("legitimize_address {");
lea(scratch, a);
block_comment("} legitimize_address");
return Address(scratch);
}
}
return a;
}

View File

@ -141,6 +141,7 @@ instruct zLoadP(iRegPdst dst, memoryAlg4 mem, flagsRegCR0 cr0)
%{
match(Set dst (LoadP mem));
effect(TEMP_DEF dst, KILL cr0);
ins_is_late_expanded_null_check_candidate(true);
ins_cost(MEMORY_REF_COST);
predicate((UseZGC && n->as_Load()->barrier_data() != 0)
@ -160,6 +161,7 @@ instruct zLoadP_acq(iRegPdst dst, memoryAlg4 mem, flagsRegCR0 cr0)
%{
match(Set dst (LoadP mem));
effect(TEMP_DEF dst, KILL cr0);
ins_is_late_expanded_null_check_candidate(true);
ins_cost(3 * MEMORY_REF_COST);
// Predicate on instruction order is implicitly present due to the predicate of the cheaper zLoadP operation

View File

@ -4036,6 +4036,10 @@ ins_attrib ins_field_cbuf_insts_offset(-1);
ins_attrib ins_field_load_ic_hi_node(0);
ins_attrib ins_field_load_ic_node(0);
// Whether this node is expanded during code emission into a sequence of
// instructions and the first instruction can perform an implicit null check.
ins_attrib ins_is_late_expanded_null_check_candidate(false);
//----------OPERANDS-----------------------------------------------------------
// Operand definitions must precede instruction definitions for correct
// parsing in the ADLC because operands constitute user defined types

View File

@ -96,6 +96,7 @@ instruct zLoadP(iRegPNoSp dst, memory mem, iRegPNoSp tmp, rFlagsReg cr)
match(Set dst (LoadP mem));
predicate(UseZGC && n->as_Load()->barrier_data() != 0);
effect(TEMP dst, TEMP tmp, KILL cr);
ins_is_late_expanded_null_check_candidate(true);
ins_cost(4 * DEFAULT_COST);

View File

@ -2619,6 +2619,10 @@ ins_attrib ins_alignment(4); // Required alignment attribute (must
// compute_padding() function must be
// provided for the instruction
// Whether this node is expanded during code emission into a sequence of
// instructions and the first instruction can perform an implicit null check.
ins_attrib ins_is_late_expanded_null_check_candidate(false);
//----------OPERANDS-----------------------------------------------------------
// Operand definitions must precede instruction definitions for correct parsing
// in the ADLC because operands constitute user defined types which are used in

View File

@ -118,6 +118,10 @@ instruct zLoadP(rRegP dst, memory mem, rFlagsReg cr)
predicate(UseZGC && n->as_Load()->barrier_data() != 0);
match(Set dst (LoadP mem));
effect(TEMP dst, KILL cr);
// The main load is a candidate to implement implicit null checks. The
// barrier's slow path includes an identical reload, which does not need to be
// registered in the exception table because it is dominated by the main one.
ins_is_late_expanded_null_check_candidate(true);
ins_cost(125);

View File

@ -2055,6 +2055,10 @@ ins_attrib ins_alignment(1); // Required alignment attribute (must
// compute_padding() function must be
// provided for the instruction
// Whether this node is expanded during code emission into a sequence of
// instructions and the first instruction can perform an implicit null check.
ins_attrib ins_is_late_expanded_null_check_candidate(false);
//----------OPERANDS-----------------------------------------------------------
// Operand definitions must precede instruction definitions for correct parsing
// in the ADLC because operands constitute user defined types which are used in

View File

@ -1626,6 +1626,8 @@ void ArchDesc::declareClasses(FILE *fp) {
while (attr != nullptr) {
if (strcmp (attr->_ident, "ins_is_TrapBasedCheckNode") == 0) {
fprintf(fp, " virtual bool is_TrapBasedCheckNode() const { return %s; }\n", attr->_val);
} else if (strcmp (attr->_ident, "ins_is_late_expanded_null_check_candidate") == 0) {
fprintf(fp, " virtual bool is_late_expanded_null_check_candidate() const { return %s; }\n", attr->_val);
} else if (strcmp (attr->_ident, "ins_cost") != 0 &&
strncmp(attr->_ident, "ins_field_", 10) != 0 &&
// Must match function in node.hpp: return type bool, no prefix "ins_".

View File

@ -464,6 +464,14 @@ class PhaseCFG : public Phase {
Node* catch_cleanup_find_cloned_def(Block* use_blk, Node* def, Block* def_blk, int n_clone_idx);
void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, int n_clone_idx);
// Ensure that n happens at b or above, i.e. at a block that dominates b.
// We expect n to be an orphan node without further inputs.
void ensure_node_is_at_block_or_above(Node* n, Block* b);
// Move node n from its current placement into the end of block b.
// Move also outgoing Mach projections.
void move_node_and_its_projections_to_block(Node* n, Block* b);
// Detect implicit-null-check opportunities. Basically, find null checks
// with suitable memory ops nearby. Use the memory op to do the null check.
// I can generate a memory op if there is not one nearby.

View File

@ -76,6 +76,36 @@ static bool needs_explicit_null_check_for_read(Node *val) {
return true;
}
void PhaseCFG::move_node_and_its_projections_to_block(Node* n, Block* b) {
assert(!is_CFG(n), "cannot move CFG node");
Block* old = get_block_for_node(n);
old->find_remove(n);
b->add_inst(n);
map_node_to_block(n, b);
// Check for Mach projections that also need to be moved.
for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
Node* out = n->fast_out(i);
if (!out->is_MachProj()) {
continue;
}
assert(!n->is_MachProj(), "nested projections are not allowed");
move_node_and_its_projections_to_block(out, b);
}
}
void PhaseCFG::ensure_node_is_at_block_or_above(Node* n, Block* b) {
assert(!is_CFG(n), "cannot move CFG node");
Block* current = get_block_for_node(n);
if (current->dominates(b)) {
return; // n is already placed above b, do nothing.
}
// We only expect nodes without further inputs, like MachTemp or load Base.
assert(n->req() == 0 || (n->req() == 1 && n->in(0) == (Node*)C->root()),
"need for recursive hoisting not expected");
assert(b->dominates(current), "precondition: can only move n to b if b dominates n");
move_node_and_its_projections_to_block(n, b);
}
//------------------------------implicit_null_check----------------------------
// Detect implicit-null-check opportunities. Basically, find null checks
// with suitable memory ops nearby. Use the memory op to do the null check.
@ -160,12 +190,14 @@ void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allo
Node *m = val->out(i);
if( !m->is_Mach() ) continue;
MachNode *mach = m->as_Mach();
if (mach->barrier_data() != 0) {
if (mach->barrier_data() != 0 &&
!mach->is_late_expanded_null_check_candidate()) {
// Using memory accesses with barriers to perform implicit null checks is
// not supported. These operations might expand into multiple assembly
// instructions during code emission, including new memory accesses (e.g.
// in G1's pre-barrier), which would invalidate the implicit null
// exception table.
// only supported if these are explicit marked as emitting a candidate
// memory access instruction at their initial address. If not marked as
// such, barrier-tagged operations might expand into one or several memory
// access instructions located at arbitrary offsets from the initial
// address, which would invalidate the implicit null exception table.
continue;
}
was_store = false;
@ -321,6 +353,14 @@ void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allo
// Ignore DecodeN val which could be hoisted to where needed.
if( is_decoden ) continue;
}
if (mach->in(j)->is_MachTemp()) {
assert(mach->in(j)->outcnt() == 1, "MachTemp nodes should not be shared");
// Ignore MachTemp inputs, they can be safely hoisted with the candidate.
// MachTemp nodes have no inputs themselves and are only used to reserve
// a scratch register for the implementation of the node (e.g. in
// late-expanded GC barriers).
continue;
}
// Block of memory-op input
Block *inb = get_block_for_node(mach->in(j));
Block *b = block; // Start from nul check
@ -388,38 +428,24 @@ void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allo
// Hoist it up to the end of the test block together with its inputs if they exist.
for (uint i = 2; i < val->req(); i++) {
// DecodeN has 2 regular inputs + optional MachTemp or load Base inputs.
Node *temp = val->in(i);
Block *tempb = get_block_for_node(temp);
if (!tempb->dominates(block)) {
assert(block->dominates(tempb), "sanity check: temp node placement");
// We only expect nodes without further inputs, like MachTemp or load Base.
assert(temp->req() == 0 || (temp->req() == 1 && temp->in(0) == (Node*)C->root()),
"need for recursive hoisting not expected");
tempb->find_remove(temp);
block->add_inst(temp);
map_node_to_block(temp, block);
}
}
valb->find_remove(val);
block->add_inst(val);
map_node_to_block(val, block);
// DecodeN on x86 may kill flags. Check for flag-killing projections
// that also need to be hoisted.
for (DUIterator_Fast jmax, j = val->fast_outs(jmax); j < jmax; j++) {
Node* n = val->fast_out(j);
if( n->is_MachProj() ) {
get_block_for_node(n)->find_remove(n);
block->add_inst(n);
map_node_to_block(n, block);
// Inputs of val may already be early enough, but if not move them together with val.
ensure_node_is_at_block_or_above(val->in(i), block);
}
move_node_and_its_projections_to_block(val, block);
}
}
// Move any MachTemp inputs to the end of the test block.
for (uint i = 0; i < best->req(); i++) {
Node* n = best->in(i);
if (n == nullptr || !n->is_MachTemp()) {
continue;
}
ensure_node_is_at_block_or_above(n, block);
}
// Hoist the memory candidate up to the end of the test block.
Block *old_block = get_block_for_node(best);
old_block->find_remove(best);
block->add_inst(best);
map_node_to_block(best, block);
move_node_and_its_projections_to_block(best, block);
// Move the control dependence if it is pinned to not-null block.
// Don't change it in other cases: null or dominating control.
@ -429,17 +455,6 @@ void PhaseCFG::implicit_null_check(Block* block, Node *proj, Node *val, int allo
best->set_req(0, proj->in(0)->in(0));
}
// Check for flag-killing projections that also need to be hoisted
// Should be DU safe because no edge updates.
for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) {
Node* n = best->fast_out(j);
if( n->is_MachProj() ) {
get_block_for_node(n)->find_remove(n);
block->add_inst(n);
map_node_to_block(n, block);
}
}
// proj==Op_True --> ne test; proj==Op_False --> eq test.
// One of two graph shapes got matched:
// (IfTrue (If (Bool NE (CmpP ptr null))))

View File

@ -386,6 +386,13 @@ public:
// Returns true if this node is a check that can be implemented with a trap.
virtual bool is_TrapBasedCheckNode() const { return false; }
// Whether this node is expanded during code emission into a sequence of
// instructions and the first instruction can perform an implicit null check.
virtual bool is_late_expanded_null_check_candidate() const {
return false;
}
void set_removed() { add_flag(Flag_is_removed_by_peephole); }
bool get_removed() { return (flags() & Flag_is_removed_by_peephole) != 0; }

View File

@ -2015,8 +2015,10 @@ void PhaseOutput::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_s
// Handle implicit null exception table updates
if (n->is_MachNullCheck()) {
assert(n->in(1)->as_Mach()->barrier_data() == 0,
"Implicit null checks on memory accesses with barriers are not yet supported");
MachNode* access = n->in(1)->as_Mach();
assert(access->barrier_data() == 0 ||
access->is_late_expanded_null_check_candidate(),
"Implicit null checks on memory accesses with barriers are only supported on nodes explicitly marked as null-check candidates");
uint block_num = block->non_connector_successor(0)->_pre_order;
_inc_table.append(inct_starts[inct_cnt++], blk_labels[block_num].loc_pos());
continue;

View File

@ -0,0 +1,231 @@
/*
* Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License version 2 only, as
* published by the Free Software Foundation.
*
* This code is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
* version 2 for more details (a copy is included in the LICENSE file that
* accompanied this code).
*
* You should have received a copy of the GNU General Public License version
* 2 along with this work; if not, write to the Free Software Foundation,
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
* or visit www.oracle.com if you need additional information or have any
* questions.
*/
package compiler.gcbarriers;
import compiler.lib.ir_framework.*;
import java.lang.invoke.VarHandle;
import java.lang.invoke.MethodHandles;
import java.lang.ref.Reference;
import java.lang.ref.ReferenceQueue;
import java.lang.ref.SoftReference;
import java.lang.ref.WeakReference;
import jdk.test.lib.Asserts;
/**
* @test
* @summary Test that implicit null checks are generated as expected for
different GC memory accesses.
* @library /test/lib /
* @run driver compiler.gcbarriers.TestImplicitNullChecks
*/
public class TestImplicitNullChecks {
static class Outer {
Object f;
}
static class OuterWithVolatileField {
volatile Object f;
}
static final VarHandle fVarHandle;
static {
MethodHandles.Lookup l = MethodHandles.lookup();
try {
fVarHandle = l.findVarHandle(Outer.class, "f", Object.class);
} catch (Exception e) {
throw new Error(e);
}
}
public static void main(String[] args) {
TestFramework.runWithFlags("-XX:CompileCommand=inline,java.lang.ref.*::*",
"-XX:-TieredCompilation");
}
@Test
@IR(applyIfOr = {"UseZGC", "true", "UseG1GC", "true"},
counts = {IRNode.NULL_CHECK, "1"},
phase = CompilePhase.FINAL_CODE)
static Object testLoad(Outer o) {
return o.f;
}
@Test
// On aarch64, volatile loads always use indirect memory operands, which
// leads to a pattern that cannot be exploited by the current C2 analysis.
// On PPC64, volatile loads are preceded by membar_volatile instructions,
// which also inhibits the current C2 analysis.
@IR(applyIfPlatformAnd = {"aarch64", "false", "ppc", "false"},
applyIfOr = {"UseZGC", "true", "UseG1GC", "true"},
counts = {IRNode.NULL_CHECK, "1"},
phase = CompilePhase.FINAL_CODE)
static Object testLoadVolatile(OuterWithVolatileField o) {
return o.f;
}
@Run(test = {"testLoad",
"testLoadVolatile"},
mode = RunMode.STANDALONE)
static void runLoadTests() {
{
Outer o = new Outer();
// Trigger compilation with implicit null check.
for (int i = 0; i < 10_000; i++) {
testLoad(o);
}
// Trigger null pointer exception.
o = null;
boolean nullPointerException = false;
try {
testLoad(o);
} catch (NullPointerException e) { nullPointerException = true; }
Asserts.assertTrue(nullPointerException);
}
{
OuterWithVolatileField o = new OuterWithVolatileField();
// Trigger compilation with implicit null check.
for (int i = 0; i < 10_000; i++) {
testLoadVolatile(o);
}
// Trigger null pointer exception.
o = null;
boolean nullPointerException = false;
try {
testLoadVolatile(o);
} catch (NullPointerException e) { nullPointerException = true; }
Asserts.assertTrue(nullPointerException);
}
}
@Test
// G1 and ZGC stores cannot be currently used to implement implicit null
// checks, because they expand into multiple memory access instructions that
// are not necessarily located at the initial instruction start address.
@IR(applyIfOr = {"UseZGC", "true", "UseG1GC", "true"},
failOn = IRNode.NULL_CHECK,
phase = CompilePhase.FINAL_CODE)
static void testStore(Outer o, Object o1) {
o.f = o1;
}
@Run(test = {"testStore"})
static void runStoreTests() {
{
Outer o = new Outer();
Object o1 = new Object();
testStore(o, o1);
}
}
@Test
// G1 and ZGC compare-and-exchange operations cannot be currently used to
// implement implicit null checks, because they expand into multiple memory
// access instructions that are not necessarily located at the initial
// instruction start address. The same holds for testCompareAndSwap and
// testGetAndSet below.
@IR(applyIfOr = {"UseZGC", "true", "UseG1GC", "true"},
failOn = IRNode.NULL_CHECK,
phase = CompilePhase.FINAL_CODE)
static Object testCompareAndExchange(Outer o, Object oldVal, Object newVal) {
return fVarHandle.compareAndExchange(o, oldVal, newVal);
}
@Test
@IR(applyIfOr = {"UseZGC", "true", "UseG1GC", "true"},
failOn = IRNode.NULL_CHECK,
phase = CompilePhase.FINAL_CODE)
static boolean testCompareAndSwap(Outer o, Object oldVal, Object newVal) {
return fVarHandle.compareAndSet(o, oldVal, newVal);
}
@Test
@IR(applyIfOr = {"UseZGC", "true", "UseG1GC", "true"},
failOn = IRNode.NULL_CHECK,
phase = CompilePhase.FINAL_CODE)
static Object testGetAndSet(Outer o, Object newVal) {
return fVarHandle.getAndSet(o, newVal);
}
@Run(test = {"testCompareAndExchange",
"testCompareAndSwap",
"testGetAndSet"})
static void runAtomicTests() {
{
Outer o = new Outer();
Object oldVal = new Object();
Object newVal = new Object();
testCompareAndExchange(o, oldVal, newVal);
}
{
Outer o = new Outer();
Object oldVal = new Object();
Object newVal = new Object();
testCompareAndSwap(o, oldVal, newVal);
}
{
Outer o = new Outer();
Object oldVal = new Object();
Object newVal = new Object();
testGetAndSet(o, newVal);
}
}
@Test
// G1 reference loads use indirect memory operands, which leads to a pattern
// that cannot be exploited by the current C2 analysis. The same holds for
// testLoadWeakReference.
@IR(applyIf = {"UseZGC", "true"},
counts = {IRNode.NULL_CHECK, "1"},
phase = CompilePhase.FINAL_CODE)
static Object testLoadSoftReference(SoftReference<Object> ref) {
return ref.get();
}
@Test
@IR(applyIf = {"UseZGC", "true"},
counts = {IRNode.NULL_CHECK, "1"},
phase = CompilePhase.FINAL_CODE)
static Object testLoadWeakReference(WeakReference<Object> ref) {
return ref.get();
}
@Run(test = {"testLoadSoftReference",
"testLoadWeakReference"})
static void runReferenceTests() {
{
Object o1 = new Object();
SoftReference<Object> sref = new SoftReference<Object>(o1);
Object o2 = testLoadSoftReference(sref);
}
{
Object o1 = new Object();
WeakReference<Object> wref = new WeakReference<Object>(o1);
Object o2 = testLoadWeakReference(wref);
}
}
}

View File

@ -1499,6 +1499,11 @@ public class IRNode {
trapNodes(NULL_ASSERT_TRAP, "null_assert");
}
public static final String NULL_CHECK = PREFIX + "NULL_CHECK" + POSTFIX;
static {
machOnlyNameRegex(NULL_CHECK, "NullCheck");
}
public static final String NULL_CHECK_TRAP = PREFIX + "NULL_CHECK_TRAP" + POSTFIX;
static {
trapNodes(NULL_CHECK_TRAP, "null_check");