8229499: Node budget assert in fuzzed test
Reviewed-by: thartmann, neliasso
This commit is contained in:
parent
c1865c4ad3
commit
8275b17b07
@ -671,26 +671,6 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
|
|||||||
loop->record_for_igvn();
|
loop->record_for_igvn();
|
||||||
}
|
}
|
||||||
|
|
||||||
// The Estimated Loop Unroll Size: UnrollFactor * (106% * BodySize + BC) + CC,
|
|
||||||
// where BC and CC are (totally) ad-hoc/magic "body" and "clone" constants,
|
|
||||||
// respectively, used to ensure that node usage estimates made are on the safe
|
|
||||||
// side, for the most part. This is a simplified version of the loop clone
|
|
||||||
// size calculation in est_loop_clone_sz(), defined for unroll factors larger
|
|
||||||
// than one (>1), performing an overflow check and returning 'UINT_MAX' in
|
|
||||||
// case of an overflow.
|
|
||||||
static uint est_loop_unroll_sz(uint factor, uint size) {
|
|
||||||
precond(0 < factor);
|
|
||||||
|
|
||||||
uint const bc = 5;
|
|
||||||
uint const cc = 7;
|
|
||||||
uint const sz = size + (size + 15) / 16;
|
|
||||||
uint estimate = factor * (sz + bc) + cc;
|
|
||||||
|
|
||||||
return (estimate - cc) / factor == sz + bc ? estimate : UINT_MAX;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define EMPTY_LOOP_SIZE 7 // Number of nodes in an empty loop.
|
|
||||||
|
|
||||||
//------------------------------policy_maximally_unroll------------------------
|
//------------------------------policy_maximally_unroll------------------------
|
||||||
// Calculate the exact loop trip-count and return TRUE if loop can be fully,
|
// Calculate the exact loop trip-count and return TRUE if loop can be fully,
|
||||||
// i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
|
// i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
|
||||||
@ -699,48 +679,42 @@ bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const {
|
|||||||
CountedLoopNode* cl = _head->as_CountedLoop();
|
CountedLoopNode* cl = _head->as_CountedLoop();
|
||||||
assert(cl->is_normal_loop(), "");
|
assert(cl->is_normal_loop(), "");
|
||||||
if (!cl->is_valid_counted_loop()) {
|
if (!cl->is_valid_counted_loop()) {
|
||||||
return false; // Malformed counted loop
|
return false; // Malformed counted loop.
|
||||||
}
|
}
|
||||||
if (!cl->has_exact_trip_count()) {
|
if (!cl->has_exact_trip_count()) {
|
||||||
// Trip count is not exact.
|
return false; // Trip count is not exact.
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
uint trip_count = cl->trip_count();
|
uint trip_count = cl->trip_count();
|
||||||
// Note, max_juint is used to indicate unknown trip count.
|
// Note, max_juint is used to indicate unknown trip count.
|
||||||
assert(trip_count > 1, "one iteration loop should be optimized out already");
|
assert(trip_count > 1, "one iteration loop should be optimized out already");
|
||||||
assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
|
assert(trip_count < max_juint, "exact trip_count should be less than max_juint.");
|
||||||
|
|
||||||
// If nodes are depleted, some transform has miscalculated its needs.
|
// If nodes are depleted, some transform has miscalculated its needs.
|
||||||
assert(!phase->exceeding_node_budget(), "sanity");
|
assert(!phase->exceeding_node_budget(), "sanity");
|
||||||
|
|
||||||
// Real policy: if we maximally unroll, does it get too big?
|
// Allow the unrolled body to get larger than the standard loop size limit.
|
||||||
// Allow the unrolled mess to get larger than standard loop
|
|
||||||
// size. After all, it will no longer be a loop.
|
|
||||||
uint body_size = _body.size();
|
|
||||||
uint unroll_limit = (uint)LoopUnrollLimit * 4;
|
uint unroll_limit = (uint)LoopUnrollLimit * 4;
|
||||||
assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
|
assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
|
||||||
if (trip_count > unroll_limit || body_size > unroll_limit) {
|
if (trip_count > unroll_limit || _body.size() > unroll_limit) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Take into account that after unroll conjoined heads and tails will fold,
|
uint new_body_size = est_loop_unroll_sz(trip_count);
|
||||||
// otherwise policy_unroll() may allow more unrolling than max unrolling.
|
|
||||||
uint new_body_size = est_loop_unroll_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
|
|
||||||
|
|
||||||
if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
|
if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fully unroll a loop with few iterations regardless next conditions since
|
// Fully unroll a loop with few iterations, regardless of other conditions,
|
||||||
// following loop optimizations will split such loop anyway (pre-main-post).
|
// since the following (general) loop optimizations will split such loop in
|
||||||
|
// any case (into pre-main-post).
|
||||||
if (trip_count <= 3) {
|
if (trip_count <= 3) {
|
||||||
return phase->may_require_nodes(new_body_size);
|
return phase->may_require_nodes(new_body_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (new_body_size > unroll_limit ||
|
// Reject if unrolling will result in too much node construction.
|
||||||
// Unrolling can result in a large amount of node construction
|
if (new_body_size > unroll_limit || phase->exceeding_node_budget(new_body_size)) {
|
||||||
phase->exceeding_node_budget(new_body_size)) {
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2471,6 +2471,39 @@ uint IdealLoopTree::est_loop_clone_sz(uint factor) const {
|
|||||||
|
|
||||||
assert((estimate - cc) / factor == sz + bc, "overflow");
|
assert((estimate - cc) / factor == sz + bc, "overflow");
|
||||||
|
|
||||||
|
return estimate + est_loop_flow_merge_sz();
|
||||||
|
}
|
||||||
|
|
||||||
|
// The Estimated Loop (full-) Unroll Size:
|
||||||
|
// UnrollFactor * (~106% * BodySize) + CC + FanOutTerm,
|
||||||
|
// where CC is a (totally) ad-hoc/magic "clone" constant, used to ensure that
|
||||||
|
// node usage estimates made are on the safe side, for the most part. This is
|
||||||
|
// a "light" version of the loop clone size calculation (above), based on the
|
||||||
|
// assumption that most of the loop-construct overhead will be unraveled when
|
||||||
|
// (fully) unrolled. Defined for unroll factors larger or equal to one (>=1),
|
||||||
|
// including an overflow check and returning UINT_MAX in case of an overflow.
|
||||||
|
uint IdealLoopTree::est_loop_unroll_sz(uint factor) const {
|
||||||
|
|
||||||
|
precond(factor > 0);
|
||||||
|
|
||||||
|
// Take into account that after unroll conjoined heads and tails will fold.
|
||||||
|
uint const b0 = _body.size() - EMPTY_LOOP_SIZE;
|
||||||
|
uint const cc = 7;
|
||||||
|
uint const sz = b0 + (b0 + 15) / 16;
|
||||||
|
uint estimate = factor * sz + cc;
|
||||||
|
|
||||||
|
if ((estimate - cc) / factor != sz) {
|
||||||
|
return UINT_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
return estimate + est_loop_flow_merge_sz();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Estimate the growth effect (in nodes) of merging control and data flow when
|
||||||
|
// cloning a loop body, based on the amount of control and data flow reaching
|
||||||
|
// outside of the (current) loop body.
|
||||||
|
uint IdealLoopTree::est_loop_flow_merge_sz() const {
|
||||||
|
|
||||||
uint ctrl_edge_out_cnt = 0;
|
uint ctrl_edge_out_cnt = 0;
|
||||||
uint data_edge_out_cnt = 0;
|
uint data_edge_out_cnt = 0;
|
||||||
|
|
||||||
@ -2494,23 +2527,20 @@ uint IdealLoopTree::est_loop_clone_sz(uint factor) const {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Add data and control count (x2.0) to estimate iff both are > 0. This is
|
// Use data and control count (x2.0) in estimate iff both are > 0. This is
|
||||||
// a rather pessimistic estimate for the most part, in particular for some
|
// a rather pessimistic estimate for the most part, in particular for some
|
||||||
// complex loops, but still not enough to capture all loops.
|
// complex loops, but still not enough to capture all loops.
|
||||||
if (ctrl_edge_out_cnt > 0 && data_edge_out_cnt > 0) {
|
if (ctrl_edge_out_cnt > 0 && data_edge_out_cnt > 0) {
|
||||||
estimate += 2 * (ctrl_edge_out_cnt + data_edge_out_cnt);
|
return 2 * (ctrl_edge_out_cnt + data_edge_out_cnt);
|
||||||
}
|
}
|
||||||
|
return 0;
|
||||||
return estimate;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef PRODUCT
|
#ifndef PRODUCT
|
||||||
//------------------------------dump_head--------------------------------------
|
//------------------------------dump_head--------------------------------------
|
||||||
// Dump 1 liner for loop header info
|
// Dump 1 liner for loop header info
|
||||||
void IdealLoopTree::dump_head() const {
|
void IdealLoopTree::dump_head() const {
|
||||||
for (uint i = 0; i < _nest; i++) {
|
tty->sp(2 * _nest);
|
||||||
tty->print(" ");
|
|
||||||
}
|
|
||||||
tty->print("Loop: N%d/N%d ", _head->_idx, _tail->_idx);
|
tty->print("Loop: N%d/N%d ", _head->_idx, _tail->_idx);
|
||||||
if (_irreducible) tty->print(" IRREDUCIBLE");
|
if (_irreducible) tty->print(" IRREDUCIBLE");
|
||||||
Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl);
|
Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl);
|
||||||
@ -4521,6 +4551,7 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list )
|
|||||||
Node* n = rpo_list[j-1];
|
Node* n = rpo_list[j-1];
|
||||||
if (!_nodes[n->_idx]) // Skip dead nodes
|
if (!_nodes[n->_idx]) // Skip dead nodes
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
if (get_loop(n) != loop) { // Wrong loop nest
|
if (get_loop(n) != loop) { // Wrong loop nest
|
||||||
if (get_loop(n)->_head == n && // Found nested loop?
|
if (get_loop(n)->_head == n && // Found nested loop?
|
||||||
get_loop(n)->_parent == loop)
|
get_loop(n)->_parent == loop)
|
||||||
@ -4529,8 +4560,7 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list )
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Dump controlling node
|
// Dump controlling node
|
||||||
for( uint x = 0; x < loop->_nest; x++ )
|
tty->sp(2 * loop->_nest);
|
||||||
tty->print(" ");
|
|
||||||
tty->print("C");
|
tty->print("C");
|
||||||
if (n == C->root()) {
|
if (n == C->root()) {
|
||||||
n->dump();
|
n->dump();
|
||||||
@ -4561,9 +4591,7 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list )
|
|||||||
tty->print_cr("*** BROKEN CTRL ACCESSOR! _nodes[k] is %p, ctrl is %p",
|
tty->print_cr("*** BROKEN CTRL ACCESSOR! _nodes[k] is %p, ctrl is %p",
|
||||||
_nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL);
|
_nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL);
|
||||||
}
|
}
|
||||||
for( uint j = 0; j < loop->_nest; j++ )
|
tty->sp(2 * loop->_nest + 1);
|
||||||
tty->print(" ");
|
|
||||||
tty->print(" ");
|
|
||||||
m->dump();
|
m->dump();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -4596,7 +4624,7 @@ void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node
|
|||||||
|
|
||||||
|
|
||||||
//=============================================================================
|
//=============================================================================
|
||||||
//------------------------------LoopTreeIterator-----------------------------------
|
//------------------------------LoopTreeIterator-------------------------------
|
||||||
|
|
||||||
// Advance to next loop tree using a preorder, left-to-right traversal.
|
// Advance to next loop tree using a preorder, left-to-right traversal.
|
||||||
void LoopTreeIterator::next() {
|
void LoopTreeIterator::next() {
|
||||||
|
@ -623,6 +623,8 @@ public:
|
|||||||
|
|
||||||
// Estimate the number of nodes required when cloning a loop (body).
|
// Estimate the number of nodes required when cloning a loop (body).
|
||||||
uint est_loop_clone_sz(uint factor) const;
|
uint est_loop_clone_sz(uint factor) const;
|
||||||
|
// Estimate the number of nodes required when unrolling a loop (body).
|
||||||
|
uint est_loop_unroll_sz(uint factor) const;
|
||||||
|
|
||||||
// Compute loop trip count if possible
|
// Compute loop trip count if possible
|
||||||
void compute_trip_count(PhaseIdealLoop* phase);
|
void compute_trip_count(PhaseIdealLoop* phase);
|
||||||
@ -659,6 +661,11 @@ public:
|
|||||||
void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
|
void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
private:
|
||||||
|
enum { EMPTY_LOOP_SIZE = 7 }; // Number of nodes in an empty loop.
|
||||||
|
|
||||||
|
// Estimate the number of nodes resulting from control and data flow merge.
|
||||||
|
uint est_loop_flow_merge_sz() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
// -----------------------------PhaseIdealLoop---------------------------------
|
// -----------------------------PhaseIdealLoop---------------------------------
|
||||||
@ -1017,9 +1024,9 @@ public:
|
|||||||
bool _has_irreducible_loops;
|
bool _has_irreducible_loops;
|
||||||
|
|
||||||
// Per-Node transform
|
// Per-Node transform
|
||||||
virtual Node *transform( Node *a_node ) { return 0; }
|
virtual Node* transform(Node* n) { return 0; }
|
||||||
|
|
||||||
bool is_counted_loop(Node* x, IdealLoopTree*& loop);
|
bool is_counted_loop(Node* n, IdealLoopTree* &loop);
|
||||||
IdealLoopTree* create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
|
IdealLoopTree* create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
|
||||||
IdealLoopTree* loop, float cl_prob, float le_fcnt,
|
IdealLoopTree* loop, float cl_prob, float le_fcnt,
|
||||||
Node*& entry_control, Node*& iffalse);
|
Node*& entry_control, Node*& iffalse);
|
||||||
@ -1319,7 +1326,7 @@ public:
|
|||||||
// same block. Split thru the Region.
|
// same block. Split thru the Region.
|
||||||
void do_split_if( Node *iff );
|
void do_split_if( Node *iff );
|
||||||
|
|
||||||
// Conversion of fill/copy patterns into intrisic versions
|
// Conversion of fill/copy patterns into intrinsic versions
|
||||||
bool do_intrinsify_fill();
|
bool do_intrinsify_fill();
|
||||||
bool intrinsify_fill(IdealLoopTree* lpt);
|
bool intrinsify_fill(IdealLoopTree* lpt);
|
||||||
bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
|
bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
|
||||||
@ -1439,6 +1446,7 @@ public:
|
|||||||
static int _loop_invokes; // Count of PhaseIdealLoop invokes
|
static int _loop_invokes; // Count of PhaseIdealLoop invokes
|
||||||
static int _loop_work; // Sum of PhaseIdealLoop x _unique
|
static int _loop_work; // Sum of PhaseIdealLoop x _unique
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const;
|
void rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -0,0 +1,76 @@
|
|||||||
|
/*
|
||||||
|
* Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
|
||||||
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
|
*
|
||||||
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
|
* under the terms of the GNU General Public License version 2 only, as
|
||||||
|
* published by the Free Software Foundation.
|
||||||
|
*
|
||||||
|
* This code is distributed in the hope that it will be useful, but WITHOUT
|
||||||
|
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||||
|
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||||
|
* version 2 for more details (a copy is included in the LICENSE file that
|
||||||
|
* accompanied this code).
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License version
|
||||||
|
* 2 along with this work; if not, write to the Free Software Foundation,
|
||||||
|
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||||
|
*
|
||||||
|
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
|
||||||
|
* or visit www.oracle.com if you need additional information or have any
|
||||||
|
* questions.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* @test
|
||||||
|
* @bug 8229499
|
||||||
|
* @summary Node estimate for loop unrolling is not correct/sufficient:
|
||||||
|
* assert(delta <= 2 * required) failed: Bad node estimate ...
|
||||||
|
*
|
||||||
|
* @requires !vm.graal.enabled
|
||||||
|
*
|
||||||
|
* @run main/othervm -XX:-TieredCompilation -XX:-BackgroundCompilation
|
||||||
|
* LoopUnrollBadNodeBudget
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class LoopUnrollBadNodeBudget {
|
||||||
|
|
||||||
|
int a;
|
||||||
|
long b;
|
||||||
|
int c;
|
||||||
|
int d(long e, short f, int g) {
|
||||||
|
int h, j = 2, k, l[][] = new int[a][];
|
||||||
|
for (h = 8; h < 58; ++h)
|
||||||
|
for (k = 1; 7 > k; ++k)
|
||||||
|
switch (h % 9 * 5 + 43) {
|
||||||
|
case 70:
|
||||||
|
case 65:
|
||||||
|
case 86:
|
||||||
|
case 81:
|
||||||
|
case 62:
|
||||||
|
case 69:
|
||||||
|
case 74:
|
||||||
|
g = j;
|
||||||
|
}
|
||||||
|
long m = u(l);
|
||||||
|
return (int)m;
|
||||||
|
}
|
||||||
|
void n(int p, int o) { d(b, (short)0, p); }
|
||||||
|
void r(String[] q) {
|
||||||
|
int i = 4;
|
||||||
|
n(i, c);
|
||||||
|
}
|
||||||
|
long u(int[][] a) {
|
||||||
|
long sum = 0;
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
public static void main(String[] t) {
|
||||||
|
try {
|
||||||
|
LoopUnrollBadNodeBudget s = new LoopUnrollBadNodeBudget();
|
||||||
|
for (int i = 5000; i > 0; i--)
|
||||||
|
s.r(t);
|
||||||
|
} catch (Exception ex) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user