8229499: Node budget assert in fuzzed test

Reviewed-by: thartmann, neliasso
2019-09-12 11:44:51 +02:00 · 2019-09-12 11:44:51 +02:00 · 8275b17b07
commit 8275b17b07
parent c1865c4ad3
4 changed files with 171 additions and 85 deletions
--- a/src/hotspot/share/opto/loopTransform.cpp
+++ b/src/hotspot/share/opto/loopTransform.cpp
@ -671,26 +671,6 @@ void PhaseIdealLoop::do_peeling(IdealLoopTree *loop, Node_List &old_new) {
  loop->record_for_igvn();
 }
 // The Estimated Loop Unroll Size: UnrollFactor * (106% * BodySize + BC) + CC,
 // where BC  and CC are  (totally) ad-hoc/magic "body" and  "clone" constants,
 // respectively, used to ensure that node usage estimates made are on the safe
 // side, for the  most part.  This is  a simplified version of  the loop clone
 // size calculation in est_loop_clone_sz(),  defined for unroll factors larger
 // than one  (>1), performing  an overflow check  and returning  'UINT_MAX' in
 // case of an overflow.
 static uint est_loop_unroll_sz(uint factor, uint size) {
  precond(0 < factor);
  uint const bc = 5;
  uint const cc = 7;
  uint const sz = size + (size + 15) / 16;
  uint estimate = factor * (sz + bc) + cc;
  return (estimate - cc) / factor == sz + bc ? estimate : UINT_MAX;
 }
 #define EMPTY_LOOP_SIZE 7   // Number of nodes in an empty loop.
 //------------------------------policy_maximally_unroll------------------------
 // Calculate the exact  loop trip-count and return TRUE if loop can be fully,
 // i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
@ -699,48 +679,42 @@ bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const {
  CountedLoopNode* cl = _head->as_CountedLoop();
  assert(cl->is_normal_loop(), "");
  if (!cl->is_valid_counted_loop()) {
-    return false; // Malformed counted loop
+    return false;   // Malformed counted loop.
  }
  if (!cl->has_exact_trip_count()) {
-    // Trip count is not exact.
+    return false;   // Trip count is not exact.
    return false;
  }
  uint trip_count = cl->trip_count();
  // Note, max_juint is used to indicate unknown trip count.
  assert(trip_count > 1, "one iteration loop should be optimized out already");
-  assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
+  assert(trip_count < max_juint, "exact trip_count should be less than max_juint.");
  // If nodes are depleted, some transform has miscalculated its needs.
  assert(!phase->exceeding_node_budget(), "sanity");
-  // Real policy: if we maximally unroll, does it get too big?
+  // Allow the unrolled body to get larger than the standard loop size limit.
  // Allow the unrolled mess to get larger than standard loop
  // size.  After all, it will no longer be a loop.
  uint body_size    = _body.size();
  uint unroll_limit = (uint)LoopUnrollLimit * 4;
  assert((intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
-  if (trip_count > unroll_limit || body_size > unroll_limit) {
+  if (trip_count > unroll_limit || _body.size() > unroll_limit) {
    return false;
  }
-  // Take into account that after unroll conjoined heads and tails will fold,
+  uint new_body_size = est_loop_unroll_sz(trip_count);
  // otherwise policy_unroll() may allow more unrolling than max unrolling.
  uint new_body_size = est_loop_unroll_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
  if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
    return false;
  }
-  // Fully unroll a loop with few iterations regardless next conditions since
+  // Fully unroll a loop with few iterations, regardless of other conditions,
-  // following loop optimizations will split such loop anyway (pre-main-post).
+  // since the following (general) loop optimizations will split such loop in
  // any case (into pre-main-post).
  if (trip_count <= 3) {
    return phase->may_require_nodes(new_body_size);
  }
-  if (new_body_size > unroll_limit ||
+  // Reject if unrolling will result in too much node construction.
-      // Unrolling can result in a large amount of node construction
+  if (new_body_size > unroll_limit || phase->exceeding_node_budget(new_body_size)) {
      phase->exceeding_node_budget(new_body_size)) {
    return false;
  }
--- a/src/hotspot/share/opto/loopnode.cpp
+++ b/src/hotspot/share/opto/loopnode.cpp
@ -2471,6 +2471,39 @@ uint IdealLoopTree::est_loop_clone_sz(uint factor) const {
  assert((estimate - cc) / factor == sz + bc, "overflow");
  return estimate + est_loop_flow_merge_sz();
 }
 // The Estimated Loop (full-) Unroll Size:
 //   UnrollFactor * (~106% * BodySize) + CC + FanOutTerm,
 // where CC is a (totally) ad-hoc/magic "clone" constant, used to ensure that
 // node usage estimates made are on the safe side, for the most part. This is
 // a "light" version of the loop clone size calculation (above), based on the
 // assumption that most of the loop-construct overhead will be unraveled when
 // (fully) unrolled. Defined for unroll factors larger or equal to one (>=1),
 // including an overflow check and returning UINT_MAX in case of an overflow.
 uint IdealLoopTree::est_loop_unroll_sz(uint factor) const {
  precond(factor > 0);
  // Take into account that after unroll conjoined heads and tails will fold.
  uint const b0 = _body.size() - EMPTY_LOOP_SIZE;
  uint const cc = 7;
  uint const sz = b0 + (b0 + 15) / 16;
  uint estimate = factor * sz + cc;
  if ((estimate - cc) / factor != sz) {
    return UINT_MAX;
  }
  return estimate + est_loop_flow_merge_sz();
 }
 // Estimate the growth effect (in nodes) of merging control and data flow when
 // cloning a loop body, based on the amount of  control and data flow reaching
 // outside of the (current) loop body.
 uint IdealLoopTree::est_loop_flow_merge_sz() const {
  uint ctrl_edge_out_cnt = 0;
  uint data_edge_out_cnt = 0;
@ -2494,23 +2527,20 @@ uint IdealLoopTree::est_loop_clone_sz(uint factor) const {
      }
    }
  }
-  // Add data and control count (x2.0) to estimate iff both are > 0. This is
+  // Use data and control count (x2.0) in estimate iff both are > 0. This is
  // a rather pessimistic estimate for the most part, in particular for some
  // complex loops, but still not enough to capture all loops.
  if (ctrl_edge_out_cnt > 0 && data_edge_out_cnt > 0) {
-    estimate += 2 * (ctrl_edge_out_cnt + data_edge_out_cnt);
+    return 2 * (ctrl_edge_out_cnt + data_edge_out_cnt);
  }
-
+  return 0;
  return estimate;
 }
 #ifndef PRODUCT
 //------------------------------dump_head--------------------------------------
 // Dump 1 liner for loop header info
 void IdealLoopTree::dump_head() const {
-  for (uint i = 0; i < _nest; i++) {
+  tty->sp(2 * _nest);
    tty->print("  ");
  }
  tty->print("Loop: N%d/N%d ", _head->_idx, _tail->_idx);
  if (_irreducible) tty->print(" IRREDUCIBLE");
  Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl);
@ -4521,6 +4551,7 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list )
    Node* n = rpo_list[j-1];
    if (!_nodes[n->_idx])      // Skip dead nodes
      continue;
    if (get_loop(n) != loop) { // Wrong loop nest
      if (get_loop(n)->_head == n &&    // Found nested loop?
          get_loop(n)->_parent == loop)
@ -4529,8 +4560,7 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list )
    }
    // Dump controlling node
-    for( uint x = 0; x < loop->_nest; x++ )
+    tty->sp(2 * loop->_nest);
      tty->print("  ");
    tty->print("C");
    if (n == C->root()) {
      n->dump();
@ -4561,9 +4591,7 @@ void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list )
            tty->print_cr("*** BROKEN CTRL ACCESSOR!  _nodes[k] is %p, ctrl is %p",
                          _nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL);
          }
-          for( uint j = 0; j < loop->_nest; j++ )
+          tty->sp(2 * loop->_nest + 1);
            tty->print("  ");
          tty->print(" ");
          m->dump();
        }
      }
@ -4596,7 +4624,7 @@ void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node
 //=============================================================================
-//------------------------------LoopTreeIterator-----------------------------------
+//------------------------------LoopTreeIterator-------------------------------
 // Advance to next loop tree using a preorder, left-to-right traversal.
 void LoopTreeIterator::next() {
--- a/src/hotspot/share/opto/loopnode.hpp
+++ b/src/hotspot/share/opto/loopnode.hpp
@ -623,6 +623,8 @@ public:
  // Estimate the number of nodes required when cloning a loop (body).
  uint est_loop_clone_sz(uint factor) const;
  // Estimate the number of nodes required when unrolling a loop (body).
  uint est_loop_unroll_sz(uint factor) const;
  // Compute loop trip count if possible
  void compute_trip_count(PhaseIdealLoop* phase);
@ -659,6 +661,11 @@ public:
  void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
 #endif
 private:
  enum { EMPTY_LOOP_SIZE = 7 }; // Number of nodes in an empty loop.
  // Estimate the number of nodes resulting from control and data flow merge.
  uint est_loop_flow_merge_sz() const;
 };
 // -----------------------------PhaseIdealLoop---------------------------------
@ -1017,9 +1024,9 @@ public:
  bool _has_irreducible_loops;
  // Per-Node transform
-  virtual Node *transform( Node *a_node ) { return 0; }
+  virtual Node* transform(Node* n) { return 0; }
-  bool is_counted_loop(Node* x, IdealLoopTree*& loop);
+  bool is_counted_loop(Node* n, IdealLoopTree* &loop);
  IdealLoopTree* create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
                                               IdealLoopTree* loop, float cl_prob, float le_fcnt,
                                               Node*& entry_control, Node*& iffalse);
@ -1319,7 +1326,7 @@ public:
  // same block.  Split thru the Region.
  void do_split_if( Node *iff );
-  // Conversion of fill/copy patterns into intrisic versions
+  // Conversion of fill/copy patterns into intrinsic versions
  bool do_intrinsify_fill();
  bool intrinsify_fill(IdealLoopTree* lpt);
  bool match_fill_loop(IdealLoopTree* lpt, Node*& store, Node*& store_value,
@ -1439,6 +1446,7 @@ public:
  static int _loop_invokes;     // Count of PhaseIdealLoop invokes
  static int _loop_work;        // Sum of PhaseIdealLoop x _unique
 #endif
  void rpo(Node* start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list) const;
 };
--- a/test/hotspot/jtreg/compiler/loopopts/LoopUnrollBadNodeBudget.java
+++ b/test/hotspot/jtreg/compiler/loopopts/LoopUnrollBadNodeBudget.java
@ -0,0 +1,76 @@
 /*
 * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
 /*
 * @test
 * @bug 8229499
 * @summary Node estimate for loop unrolling is not correct/sufficient:
 *          assert(delta <= 2 * required) failed: Bad node estimate ...
 *
 * @requires !vm.graal.enabled
 *
 * @run main/othervm -XX:-TieredCompilation -XX:-BackgroundCompilation
 *                   LoopUnrollBadNodeBudget
 *
 */
 public class LoopUnrollBadNodeBudget {
    int a;
    long b;
    int c;
    int d(long e, short f, int g) {
        int h, j = 2, k, l[][] = new int[a][];
        for (h = 8; h < 58; ++h)
            for (k = 1; 7 > k; ++k)
                switch (h % 9 * 5 + 43) {
                    case 70:
                    case 65:
                    case 86:
                    case 81:
                    case 62:
                    case 69:
                    case 74:
                        g = j;
                }
        long m = u(l);
        return (int)m;
    }
    void n(int p, int o) { d(b, (short)0, p); }
    void r(String[] q) {
        int i = 4;
        n(i, c);
    }
    long u(int[][] a) {
        long sum = 0;
        return sum;
    }
    public static void main(String[] t) {
        try {
            LoopUnrollBadNodeBudget s = new LoopUnrollBadNodeBudget();
            for (int i = 5000; i > 0; i--)
                s.r(t);
        } catch (Exception ex) {
        }
    }
 }