diff --git a/src/hotspot/share/opto/c2_globals.hpp b/src/hotspot/share/opto/c2_globals.hpp index fd55f2fd666..227817612ce 100644 --- a/src/hotspot/share/opto/c2_globals.hpp +++ b/src/hotspot/share/opto/c2_globals.hpp @@ -678,10 +678,12 @@ "Print progress during Iterative Global Value Numbering") \ \ develop(uint, VerifyIterativeGVN, 0, \ - "Verify Iterative Global Value Numbering" \ - "=XY, with Y: verify Def-Use modifications during IGVN" \ - " X: verify that type(n) == n->Value() after IGVN" \ - "X and Y in 0=off; 1=on") \ + "Verify Iterative Global Value Numbering =DCBA, with:" \ + " D: verify Node::Identity did not miss opportunities" \ + " C: verify Node::Ideal did not miss opportunities" \ + " B: verify that type(n) == n->Value() after IGVN" \ + " A: verify Def-Use modifications during IGVN" \ + "Each can be 0=off or 1=on") \ constraint(VerifyIterativeGVNConstraintFunc, AtParse) \ \ develop(bool, TraceCISCSpill, false, \ diff --git a/src/hotspot/share/opto/phaseX.cpp b/src/hotspot/share/opto/phaseX.cpp index 953afb0e830..1ba8e145e7d 100644 --- a/src/hotspot/share/opto/phaseX.cpp +++ b/src/hotspot/share/opto/phaseX.cpp @@ -1072,7 +1072,11 @@ void PhaseIterGVN::optimize() { #ifdef ASSERT void PhaseIterGVN::verify_optimize() { - if (is_verify_Value()) { + assert(_worklist.size() == 0, "igvn worklist must be empty before verify"); + + if (is_verify_Value() || + is_verify_Ideal() || + is_verify_Identity()) { ResourceMark rm; Unique_Node_List worklist; bool failure = false; @@ -1080,7 +1084,10 @@ void PhaseIterGVN::verify_optimize() { worklist.push(C->root()); for (uint j = 0; j < worklist.size(); ++j) { Node* n = worklist.at(j); - failure |= verify_node_value(n); + if (is_verify_Value()) { failure |= verify_Value_for(n); } + if (is_verify_Ideal()) { failure |= verify_Ideal_for(n, false); } + if (is_verify_Ideal()) { failure |= verify_Ideal_for(n, true); } + if (is_verify_Identity()) { failure |= verify_Identity_for(n); } // traverse all inputs and outputs for (uint i = 0; i < n->req(); i++) { if (n->in(i) != nullptr) { @@ -1097,6 +1104,27 @@ void PhaseIterGVN::verify_optimize() { // in the verification code above if that is not possible for some reason (like Load nodes). assert(!failure, "Missed optimization opportunity in PhaseIterGVN"); } + + verify_empty_worklist(nullptr); +} + +void PhaseIterGVN::verify_empty_worklist(Node* node) { + // Verify that the igvn worklist is empty. If no optimization happened, then + // nothing needs to be on the worklist. + if (_worklist.size() == 0) { return; } + + stringStream ss; // Print as a block without tty lock. + for (uint j = 0; j < _worklist.size(); j++) { + Node* n = _worklist.at(j); + ss.print("igvn.worklist[%d] ", j); + n->dump("\n", false, &ss); + } + if (_worklist.size() != 0 && node != nullptr) { + ss.print_cr("Previously optimized:"); + node->dump("\n", false, &ss); + } + tty->print_cr("%s", ss.as_string()); + assert(false, "igvn worklist must still be empty after verify"); } // Check that type(n) == n->Value(), return true if we have a failure. @@ -1104,7 +1132,7 @@ void PhaseIterGVN::verify_optimize() { // (1) Integer "widen" changes, but the range is the same. // (2) LoadNode performs deep traversals. Load is not notified for changes far away. // (3) CmpPNode performs deep traversals if it compares oopptr. CmpP is not notified for changes far away. -bool PhaseIterGVN::verify_node_value(Node* n) { +bool PhaseIterGVN::verify_Value_for(Node* n) { // If we assert inside type(n), because the type is still a null, then maybe // the node never went through gvn.transform, which would be a bug. const Type* told = type(n); @@ -1149,15 +1177,873 @@ bool PhaseIterGVN::verify_node_value(Node* n) { // after loop-opts, so that should take care of many of these cases. return false; } - tty->cr(); - tty->print_cr("Missed Value optimization:"); - n->dump_bfs(1, nullptr, ""); - tty->print_cr("Current type:"); - told->dump_on(tty); - tty->cr(); - tty->print_cr("Optimized type:"); - tnew->dump_on(tty); - tty->cr(); + + stringStream ss; // Print as a block without tty lock. + ss.cr(); + ss.print_cr("Missed Value optimization:"); + n->dump_bfs(1, nullptr, "", &ss); + ss.print_cr("Current type:"); + told->dump_on(&ss); + ss.cr(); + ss.print_cr("Optimized type:"); + tnew->dump_on(&ss); + ss.cr(); + tty->print_cr("%s", ss.as_string()); + return true; +} + +// Check that all Ideal optimizations that could be done were done. +// Returns true if it found missed optimization opportunities and +// false otherwise (no missed optimization, or skipped verification). +bool PhaseIterGVN::verify_Ideal_for(Node* n, bool can_reshape) { + // First, we check a list of exceptions, where we skip verification, + // because there are known cases where Ideal can optimize after IGVN. + // Some may be expected and cannot be fixed, and others should be fixed. + switch (n->Opcode()) { + // RangeCheckNode::Ideal looks up the chain for about 999 nodes + // (see "Range-Check scan limit"). So, it is possible that something + // is optimized in that input subgraph, and the RangeCheck was not + // added to the worklist because it would be too expensive to walk + // down the graph for 1000 nodes and put all on the worklist. + // + // Found with: + // java -XX:VerifyIterativeGVN=0100 -Xbatch --version + case Op_RangeCheck: + return false; + + // IfNode::Ideal does: + // Node* prev_dom = search_identical(dist, igvn); + // which means we seach up the CFG, traversing at most up to a distance. + // If anything happens rather far away from the If, we may not put the If + // back on the worklist. + // + // Found with: + // java -XX:VerifyIterativeGVN=0100 -Xcomp --version + case Op_If: + return false; + + // IfNode::simple_subsuming + // Looks for dominating test that subsumes the current test. + // Notification could be difficult because of larger distance. + // + // Found with: + // runtime/exceptionMsgs/ArrayIndexOutOfBoundsException/ArrayIndexOutOfBoundsExceptionTest.java#id1 + // -XX:VerifyIterativeGVN=1110 + case Op_CountedLoopEnd: + return false; + + // LongCountedLoopEndNode::Ideal + // Probably same issue as above. + // + // Found with: + // compiler/predicates/assertion/TestAssertionPredicates.java#NoLoopPredicationXbatch + // -XX:StressLongCountedLoop=2000000 -XX:+IgnoreUnrecognizedVMOptions -XX:VerifyIterativeGVN=1110 + case Op_LongCountedLoopEnd: + return false; + + // RegionNode::Ideal does "Skip around the useless IF diamond". + // 245 IfTrue === 244 + // 258 If === 245 257 + // 259 IfTrue === 258 [[ 263 ]] + // 260 IfFalse === 258 [[ 263 ]] + // 263 Region === 263 260 259 [[ 263 268 ]] + // to + // 245 IfTrue === 244 + // 263 Region === 263 245 _ [[ 263 268 ]] + // + // "Useless" means that there is no code in either branch of the If. + // I found a case where this was not done yet during IGVN. + // Why does the Region not get added to IGVN worklist when the If diamond becomes useless? + // + // Found with: + // java -XX:VerifyIterativeGVN=0100 -Xcomp --version + case Op_Region: + return false; + + // In AddNode::Ideal, we call "commute", which swaps the inputs so + // that smaller idx are first. Tracking it back, it led me to + // PhaseIdealLoop::remix_address_expressions which swapped the edges. + // + // Example: + // Before PhaseIdealLoop::remix_address_expressions + // 154 AddI === _ 12 144 + // After PhaseIdealLoop::remix_address_expressions + // 154 AddI === _ 144 12 + // After AddNode::Ideal + // 154 AddI === _ 12 144 + // + // I suspect that the node should be added to the IGVN worklist after + // PhaseIdealLoop::remix_address_expressions + // + // This is the only case I looked at, there may be others. Found like this: + // java -XX:VerifyIterativeGVN=0100 -Xbatch --version + // + // The following hit the same logic in PhaseIdealLoop::remix_address_expressions. + // + // Note: currently all of these fail also for other reasons, for example + // because of "commute" doing the reordering with the phi below. Once + // that is resolved, we can come back to this issue here. + // + // case Op_AddD: + // case Op_AddI: + // case Op_AddL: + // case Op_AddF: + // case Op_MulI: + // case Op_MulL: + // case Op_MulF: + // case Op_MulD: + // if (n->in(1)->_idx > n->in(2)->_idx) { + // // Expect "commute" to revert this case. + // return false; + // } + // break; // keep verifying + + // AddFNode::Ideal calls "commute", which can reorder the inputs for this: + // Check for tight loop increments: Loop-phi of Add of loop-phi + // It wants to take the phi into in(1): + // 471 Phi === 435 38 390 + // 390 AddF === _ 471 391 + // + // Other Associative operators are also affected equally. + // + // Investigate why this does not happen earlier during IGVN. + // + // Found with: + // test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java + // -XX:VerifyIterativeGVN=1110 + case Op_AddD: + //case Op_AddI: // Also affected for other reasons, see case further down. + //case Op_AddL: // Also affected for other reasons, see case further down. + case Op_AddF: + case Op_MulI: + case Op_MulL: + case Op_MulF: + case Op_MulD: + case Op_MinF: + case Op_MinD: + case Op_MaxF: + case Op_MaxD: + // XorINode::Ideal + // Found with: + // compiler/intrinsics/chacha/TestChaCha20.java + // -XX:VerifyIterativeGVN=1110 + case Op_XorI: + case Op_XorL: + // It seems we may have similar issues with the HF cases. + // Found with aarch64: + // compiler/vectorization/TestFloat16VectorOperations.java + // -XX:VerifyIterativeGVN=1110 + case Op_AddHF: + case Op_MulHF: + case Op_MaxHF: + case Op_MinHF: + return false; + + // In MulNode::Ideal the edges can be swapped to help value numbering: + // + // // We are OK if right is a constant, or right is a load and + // // left is a non-constant. + // if( !(t2->singleton() || + // (in(2)->is_Load() && !(t1->singleton() || in(1)->is_Load())) ) ) { + // if( t1->singleton() || // Left input is a constant? + // // Otherwise, sort inputs (commutativity) to help value numbering. + // (in(1)->_idx > in(2)->_idx) ) { + // swap_edges(1, 2); + // + // Why was this not done earlier during IGVN? + // + // Found with: + // test/hotspot/jtreg/gc/stress/gcbasher/TestGCBasherWithG1.java + // -XX:VerifyIterativeGVN=1110 + case Op_AndI: + // Same for AndL. + // Found with: + // compiler/intrinsics/bigInteger/MontgomeryMultiplyTest.java + // -XX:VerifyIterativeGVN=1110 + case Op_AndL: + return false; + + // SubLNode::Ideal does transform like: + // Convert "c1 - (y+c0)" into "(c1-c0) - y" + // + // In IGVN before verification: + // 8423 ConvI2L === _ 3519 [[ 8424 ]] #long:-2 + // 8422 ConvI2L === _ 8399 [[ 8424 ]] #long:3..256:www + // 8424 AddL === _ 8422 8423 [[ 8383 ]] !orig=[8382] + // 8016 ConL === 0 [[ 8383 ]] #long:0 + // 8383 SubL === _ 8016 8424 [[ 8156 ]] !orig=[8154] + // + // And then in verification: + // 8338 ConL === 0 [[ 8339 8424 ]] #long:-2 <----- Was constant folded. + // 8422 ConvI2L === _ 8399 [[ 8424 ]] #long:3..256:www + // 8424 AddL === _ 8422 8338 [[ 8383 ]] !orig=[8382] + // 8016 ConL === 0 [[ 8383 ]] #long:0 + // 8383 SubL === _ 8016 8424 [[ 8156 ]] !orig=[8154] + // + // So the form changed from: + // c1 - (y + [8423 ConvI2L]) + // to + // c1 - (y + -2) + // but the SubL was not added to the IGVN worklist. Investigate why. + // There could be other issues too. + // + // There seems to be a related AddL IGVN optimization that triggers + // the same SubL optimization, so investigate that too. + // + // Found with: + // java -XX:VerifyIterativeGVN=0100 -Xcomp --version + case Op_SubL: + return false; + + // SubINode::Ideal does + // Convert "x - (y+c0)" into "(x-y) - c0" AND + // Convert "c1 - (y+c0)" into "(c1-c0) - y" + // + // Investigate why this does not yet happen during IGVN. + // + // Found with: + // test/hotspot/jtreg/compiler/c2/IVTest.java + // -XX:VerifyIterativeGVN=1110 + case Op_SubI: + return false; + + // AddNode::IdealIL does transform like: + // Convert x + (con - y) into "(x - y) + con" + // + // In IGVN before verification: + // 8382 ConvI2L + // 8381 ConvI2L === _ 791 [[ 8383 ]] #long:0 + // 8383 SubL === _ 8381 8382 + // 8168 ConvI2L + // 8156 AddL === _ 8168 8383 [[ 8158 ]] + // + // And then in verification: + // 8424 AddL + // 8016 ConL === 0 [[ 8383 ]] #long:0 <--- Was constant folded. + // 8383 SubL === _ 8016 8424 + // 8168 ConvI2L + // 8156 AddL === _ 8168 8383 [[ 8158 ]] + // + // So the form changed from: + // x + (ConvI2L(0) - [8382 ConvI2L]) + // to + // x + (0 - [8424 AddL]) + // but the AddL was not added to the IGVN worklist. Investigate why. + // There could be other issues, too. For example with "commute", see above. + // + // Found with: + // java -XX:VerifyIterativeGVN=0100 -Xcomp --version + case Op_AddL: + return false; + + // SubTypeCheckNode::Ideal calls SubTypeCheckNode::verify_helper, which does + // Node* cmp = phase->transform(new CmpPNode(subklass, in(SuperKlass))); + // record_for_cleanup(cmp, phase); + // This verification code in the Ideal code creates new nodes, and checks + // if they fold in unexpected ways. This means some nodes are created and + // added to the worklist, even if the SubTypeCheck is not optimized. This + // goes agains the assumption of the verification here, which assumes that + // if the node is not optimized, then no new nodes should be created, and + // also no nodes should be added to the worklist. + // I see two options: + // 1) forbid what verify_helper does, because for each Ideal call it + // uses memory and that is suboptimal. But it is not clear how that + // verification can be done otherwise. + // 2) Special case the verification here. Probably the new nodes that + // were just created are dead, i.e. they are not connected down to + // root. We could verify that, and remove those nodes from the graph + // by setting all their inputs to nullptr. And of course we would + // have to remove those nodes from the worklist. + // Maybe there are other options too, I did not dig much deeper yet. + // + // Found with: + // java -XX:VerifyIterativeGVN=0100 -Xbatch --version + case Op_SubTypeCheck: + return false; + + // LoopLimitNode::Ideal when stride is constant power-of-2, we can do a lowering + // to other nodes: Conv, Add, Sub, Mul, And ... + // + // 107 ConI === 0 [[ ... ]] #int:2 + // 84 LoadRange === _ 7 83 + // 50 ConI === 0 [[ ... ]] #int:0 + // 549 LoopLimit === _ 50 84 107 + // + // I stepped backward, to see how the node was generated, and I found that it was + // created in PhaseIdealLoop::exact_limit and not changed since. It is added to the + // IGVN worklist. I quickly checked when it goes into LoopLimitNode::Ideal after + // that, and it seems we want to skip lowering it until after loop-opts, but never + // add call record_for_post_loop_opts_igvn. This would be an easy fix, but there + // could be other issues too. + // + // Fond with: + // java -XX:VerifyIterativeGVN=0100 -Xcomp --version + case Op_LoopLimit: + return false; + + // PhiNode::Ideal calls split_flow_path, which tries to do this: + // "This optimization tries to find two or more inputs of phi with the same constant + // value. It then splits them into a separate Phi, and according Region." + // + // Example: + // 130 DecodeN === _ 129 + // 50 ConP === 0 [[ 18 91 99 18 ]] #null + // 18 Phi === 14 50 130 50 [[ 133 ]] #java/lang/Object * Oop:java/lang/Object * + // + // turns into: + // + // 50 ConP === 0 [[ 99 91 18 ]] #null + // 130 DecodeN === _ 129 [[ 18 ]] + // 18 Phi === 14 130 50 [[ 133 ]] #java/lang/Object * Oop:java/lang/Object * + // + // We would have to investigate why this optimization does not happen during IGVN. + // There could also be other issues - I did not investigate further yet. + // + // Found with: + // java -XX:VerifyIterativeGVN=0100 -Xcomp --version + case Op_Phi: + return false; + + // MemBarNode::Ideal does "Eliminate volatile MemBars for scalar replaced objects". + // For examle "The allocated object does not escape". + // + // It seems the difference to earlier calls to MemBarNode::Ideal, is that there + // alloc->as_Allocate()->does_not_escape_thread() returned false, but in verification + // it returned true. Why does the MemBarStoreStore not get added to the IGVN + // worklist when this change happens? + // + // Found with: + // java -XX:VerifyIterativeGVN=0100 -Xcomp --version + case Op_MemBarStoreStore: + return false; + + // ConvI2LNode::Ideal converts + // 648 AddI === _ 583 645 [[ 661 ]] + // 661 ConvI2L === _ 648 [[ 664 ]] #long:0..maxint-1:www + // into + // 772 ConvI2L === _ 645 [[ 773 ]] #long:-120..maxint-61:www + // 771 ConvI2L === _ 583 [[ 773 ]] #long:60..120:www + // 773 AddL === _ 771 772 [[ ]] + // + // We have to investigate why this does not happen during IGVN in this case. + // There could also be other issues - I did not investigate further yet. + // + // Found with: + // java -XX:VerifyIterativeGVN=0100 -Xcomp --version + case Op_ConvI2L: + return false; + + // AddNode::IdealIL can do this transform (and similar other ones): + // Convert "a*b+a*c into a*(b+c) + // The example had AddI(MulI(a, b), MulI(a, c)). Why did this not happen + // during IGVN? There was a mutation for one of the MulI, and only + // after that the pattern was as needed for the optimization. The MulI + // was added to the IGVN worklist, but not the AddI. This probably + // can be fixed by adding the correct pattern in add_users_of_use_to_worklist. + // + // Found with: + // test/hotspot/jtreg/compiler/loopopts/superword/ReductionPerf.java + // -XX:VerifyIterativeGVN=1110 + case Op_AddI: + return false; + + // ArrayCopyNode::Ideal + // calls ArrayCopyNode::prepare_array_copy + // calls Compile::conv_I2X_index -> is called with sizetype = intcon(0), I think that + // is not expected, and we create a range int:0..-1 + // calls Compile::constrained_convI2L -> creates ConvI2L(intcon(1), int:0..-1) + // note: the type is already empty! + // calls PhaseIterGVN::transform + // calls PhaseIterGVN::transform_old + // calls PhaseIterGVN::subsume_node -> subsume ConvI2L with TOP + // calls Unique_Node_List::push -> pushes TOP to worklist + // + // Once we get back to ArrayCopyNode::prepare_array_copy, we get back TOP, and + // return false. This means we eventually return nullptr from ArrayCopyNode::Ideal. + // + // Question: is it ok to push anything to the worklist during ::Ideal, if we will + // return nullptr, indicating nothing happened? + // Is it smart to do transform in Compile::constrained_convI2L, and then + // check for TOP in calls ArrayCopyNode::prepare_array_copy? + // Should we just allow TOP to land on the worklist, as an exception? + // + // Found with: + // compiler/arraycopy/TestArrayCopyAsLoadsStores.java + // -XX:VerifyIterativeGVN=1110 + case Op_ArrayCopy: + return false; + + // CastLLNode::Ideal + // calls ConstraintCastNode::optimize_integer_cast -> pushes CastLL through SubL + // + // Could be a notification issue, where updates inputs of CastLL do not notify + // down through SubL to CastLL. + // + // Found With: + // compiler/c2/TestMergeStoresMemorySegment.java#byte-array + // -XX:VerifyIterativeGVN=1110 + case Op_CastLL: + return false; + + // Similar case happens to CastII + // + // Found With: + // compiler/c2/TestScalarReplacementMaxLiveNodes.java + // -XX:VerifyIterativeGVN=1110 + case Op_CastII: + return false; + + // MaxLNode::Ideal + // calls AddNode::Ideal + // calls commute -> decides to swap edges + // + // Another notification issue, because we check inputs of inputs? + // MaxL -> Phi -> Loop + // MaxL -> Phi -> MaxL + // + // Found with: + // compiler/c2/irTests/TestIfMinMax.java + // -XX:VerifyIterativeGVN=1110 + case Op_MaxL: + case Op_MinL: + return false; + + // OrINode::Ideal + // calls AddNode::Ideal + // calls commute -> left is Load, right not -> commute. + // + // Not sure why notification does not work here, seems like + // the depth is only 1, so it should work. Needs investigation. + // + // Found with: + // compiler/codegen/TestCharVect2.java#id0 + // -XX:VerifyIterativeGVN=1110 + case Op_OrI: + case Op_OrL: + return false; + + // Bool -> constant folded to 1. + // Issue with notification? + // + // Found with: + // compiler/c2/irTests/TestVectorizationMismatchedAccess.java + // -XX:VerifyIterativeGVN=1110 + case Op_Bool: + return false; + + // LShiftLNode::Ideal + // Looks at pattern: "(x + x) << c0", converts it to "x << (c0 + 1)" + // Probably a notification issue. + // + // Found with: + // compiler/conversions/TestMoveConvI2LOrCastIIThruAddIs.java + // -ea -esa -XX:CompileThreshold=100 -XX:+UnlockExperimentalVMOptions -server -XX:-TieredCompilation -XX:+IgnoreUnrecognizedVMOptions -XX:VerifyIterativeGVN=1110 + case Op_LShiftL: + return false; + + // LShiftINode::Ideal + // pattern: ((x + con1) << con2) -> x << con2 + con1 << con2 + // Could be issue with notification of inputs of inputs + // + // Side-note: should cases like these not be shared between + // LShiftI and LShiftL? + // + // Found with: + // compiler/escapeAnalysis/Test6689060.java + // -XX:+IgnoreUnrecognizedVMOptions -XX:VerifyIterativeGVN=1110 -ea -esa -XX:CompileThreshold=100 -XX:+UnlockExperimentalVMOptions -server -XX:-TieredCompilation -XX:+IgnoreUnrecognizedVMOptions -XX:VerifyIterativeGVN=1110 + case Op_LShiftI: + return false; + + // AddPNode::Ideal seems to do set_req without removing lock first. + // Found with various vector tests tier1-tier3. + case Op_AddP: + return false; + + // StrIndexOfNode::Ideal + // Found in tier1-3. + case Op_StrIndexOf: + case Op_StrIndexOfChar: + return false; + + // StrEqualsNode::Identity + // + // Found (linux x64 only?) with: + // serviceability/sa/ClhsdbThreadContext.java + // -XX:+UnlockExperimentalVMOptions -XX:LockingMode=1 -XX:+IgnoreUnrecognizedVMOptions -XX:VerifyIterativeGVN=1110 + case Op_StrEquals: + return false; + + // AryEqNode::Ideal + // Not investigated. Reshapes itself and adds lots of nodes to the worklist. + // + // Found with: + // vmTestbase/vm/mlvm/meth/stress/compiler/i2c_c2i/Test.java + // -XX:+UnlockDiagnosticVMOptions -XX:-TieredCompilation -XX:+StressUnstableIfTraps -XX:+IgnoreUnrecognizedVMOptions -XX:VerifyIterativeGVN=1110 + case Op_AryEq: + return false; + + // MergeMemNode::Ideal + // Found in tier1-3. Did not investigate further yet. + case Op_MergeMem: + return false; + + // URShiftINode::Ideal + // Found in tier1-3. Did not investigate further yet. + case Op_URShiftI: + return false; + + // CMoveINode::Ideal + // Found in tier1-3. Did not investigate further yet. + case Op_CMoveI: + return false; + + // CmpPNode::Ideal calls isa_const_java_mirror + // and generates new constant nodes, even if no progress is made. + // We can probably rewrite this so that only types are generated. + // It seems that object types are not hashed, we could investigate + // if that is an option as well. + // + // Found with: + // java -XX:VerifyIterativeGVN=1110 -Xcomp --version + case Op_CmpP: + return false; + + // MinINode::Ideal + // Did not investigate, but there are some patterns that might + // need more notification. + case Op_MinI: + case Op_MaxI: // preemptively removed it as well. + return false; + } + + if (n->is_Load()) { + // LoadNode::Ideal uses tries to find an earlier memory state, and + // checks can_see_stored_value for it. + // + // Investigate why this was not already done during IGVN. + // A similar issue happens with Identity. + // + // There seem to be other cases where loads go up some steps, like + // LoadNode::Ideal going up 10x steps to find dominating load. + // + // Found with: + // test/hotspot/jtreg/compiler/arraycopy/TestCloneAccess.java + // -XX:VerifyIterativeGVN=1110 + return false; + } + + if (n->is_Store()) { + // StoreNode::Ideal can do this: + // // Capture an unaliased, unconditional, simple store into an initializer. + // // Or, if it is independent of the allocation, hoist it above the allocation. + // That replaces the Store with a MergeMem. + // + // We have to investigate why this does not happen during IGVN in this case. + // There could also be other issues - I did not investigate further yet. + // + // Found with: + // java -XX:VerifyIterativeGVN=0100 -Xcomp --version + return false; + } + + if (n->is_Vector()) { + // VectorNode::Ideal swaps edges, but only for ops + // that are deemed commutable. But swap_edges + // requires the hash to be invariant when the edges + // are swapped, which is not implemented for these + // vector nodes. This seems not to create any trouble + // usually, but we can also get graphs where in the + // end the nodes are not all commuted, so there is + // definitively an issue here. + // + // Probably we have two options: kill the hash, or + // properly make the hash commutation friendly. + // + // Found with: + // compiler/vectorapi/TestMaskedMacroLogicVector.java + // -XX:+IgnoreUnrecognizedVMOptions -XX:VerifyIterativeGVN=1110 -XX:+UseParallelGC -XX:+UseNUMA + return false; + } + + if (n->is_Region()) { + // LoopNode::Ideal calls RegionNode::Ideal. + // CountedLoopNode::Ideal calls RegionNode::Ideal too. + // But I got an issue because RegionNode::optimize_trichotomy + // then modifies another node, and pushes nodes to the worklist + // Not sure if this is ok, modifying another node like that. + // Maybe it is, then we need to look into what to do with + // the nodes that are now on the worklist, maybe just clear + // them out again. But maybe modifying other nodes like that + // is also bad design. In the end, we return nullptr for + // the current CountedLoop. But the extra nodes on the worklist + // trip the asserts later on. + // + // Found with: + // compiler/eliminateAutobox/TestShortBoxing.java + // -ea -esa -XX:CompileThreshold=100 -XX:+UnlockExperimentalVMOptions -server -XX:-TieredCompilation -XX:+IgnoreUnrecognizedVMOptions -XX:VerifyIterativeGVN=1110 + return false; + } + + if (n->is_CallJava()) { + // CallStaticJavaNode::Ideal + // Led to a crash: + // assert((is_CallStaticJava() && cg->is_mh_late_inline()) || (is_CallDynamicJava() && cg->is_virtual_late_inline())) failed: mismatch + // + // Did not investigate yet, could be a bug. + // Or maybe it does not expect to be called during verification. + // + // Found with: + // test/jdk/jdk/incubator/vector/VectorRuns.java + // -XX:VerifyIterativeGVN=1110 + + // CallDynamicJavaNode::Ideal, and I think also for CallStaticJavaNode::Ideal + // and possibly their subclasses. + // During late inlining it can call CallJavaNode::register_for_late_inline + // That means we do more rounds of late inlining, but might fail. + // Then we do IGVN again, and register the node again for late inlining. + // This creates an endless cycle. Everytime we try late inlining, we + // are also creating more nodes, especially SafePoint and MergeMem. + // These nodes are immediately rejected when the inlining fails in the + // do_late_inline_check, but they still grow the memory, until we hit + // the MemLimit and crash. + // The assumption here seems that CallDynamicJavaNode::Ideal does not get + // called repeatedly, and eventually we terminate. I fear this is not + // a great assumption to make. We should investigate more. + // + // Found with: + // compiler/loopopts/superword/TestDependencyOffsets.java#vanilla-U + // -XX:+IgnoreUnrecognizedVMOptions -XX:VerifyIterativeGVN=1110 + return false; + } + + // The number of nodes shoud not increase. + uint old_unique = C->unique(); + + Node* i = n->Ideal(this, can_reshape); + // If there was no new Idealization, we are probably happy. + if (i == nullptr) { + if (old_unique < C->unique()) { + stringStream ss; // Print as a block without tty lock. + ss.cr(); + ss.print_cr("Ideal optimization did not make progress but created new unused nodes."); + ss.print_cr(" old_unique = %d, unique = %d", old_unique, C->unique()); + n->dump_bfs(1, nullptr, "", &ss); + tty->print_cr("%s", ss.as_string()); + return true; + } + + verify_empty_worklist(n); + + // Everything is good. + return false; + } + + // We just saw a new Idealization which was not done during IGVN. + stringStream ss; // Print as a block without tty lock. + ss.cr(); + ss.print_cr("Missed Ideal optimization (can_reshape=%s):", can_reshape ? "true": "false"); + if (i == n) { + ss.print_cr("The node was reshaped by Ideal."); + } else { + ss.print_cr("The node was replaced by Ideal."); + ss.print_cr("Old node:"); + n->dump_bfs(1, nullptr, "", &ss); + } + ss.print_cr("The result after Ideal:"); + i->dump_bfs(1, nullptr, "", &ss); + tty->print_cr("%s", ss.as_string()); + return true; +} + +// Check that all Identity optimizations that could be done were done. +// Returns true if it found missed optimization opportunities and +// false otherwise (no missed optimization, or skipped verification). +bool PhaseIterGVN::verify_Identity_for(Node* n) { + // First, we check a list of exceptions, where we skip verification, + // because there are known cases where Ideal can optimize after IGVN. + // Some may be expected and cannot be fixed, and others should be fixed. + switch (n->Opcode()) { + // SafePointNode::Identity can remove SafePoints, but wants to wait until + // after loopopts: + // // Transforming long counted loops requires a safepoint node. Do not + // // eliminate a safepoint until loop opts are over. + // if (in(0)->is_Proj() && !phase->C->major_progress()) { + // + // I think the check for major_progress does delay it until after loopopts + // but it does not ensure that the node is on the IGVN worklist after + // loopopts. I think we should try to instead check for + // phase->C->post_loop_opts_phase() and call record_for_post_loop_opts_igvn. + // + // Found with: + // java -XX:VerifyIterativeGVN=1000 -Xcomp --version + case Op_SafePoint: + return false; + + // MergeMemNode::Identity replaces the MergeMem with its base_memory if it + // does not record any other memory splits. + // + // I did not deeply investigate, but it looks like MergeMemNode::Identity + // never got called during IGVN for this node, investigate why. + // + // Found with: + // java -XX:VerifyIterativeGVN=1000 -Xcomp --version + case Op_MergeMem: + return false; + + // ConstraintCastNode::Identity finds casts that are the same, except that + // the control is "higher up", i.e. dominates. The call goes via + // ConstraintCastNode::dominating_cast to PhaseGVN::is_dominator_helper, + // which traverses up to 100 idom steps. If anything gets optimized somewhere + // away from the cast, but within 100 idom steps, the cast may not be + // put on the IGVN worklist any more. + // + // Found with: + // java -XX:VerifyIterativeGVN=1000 -Xcomp --version + case Op_CastPP: + case Op_CastII: + case Op_CastLL: + return false; + + // Same issue for CheckCastPP, uses ConstraintCastNode::Identity and + // checks dominator, which may be changed, but too far up for notification + // to work. + // + // Found with: + // compiler/c2/irTests/TestSkeletonPredicates.java + // -XX:VerifyIterativeGVN=1110 + case Op_CheckCastPP: + return false; + + // In SubNode::Identity, we do: + // Convert "(X+Y) - Y" into X and "(X+Y) - X" into Y + // In the example, the AddI had an input replaced, the AddI is + // added to the IGVN worklist, but the SubI is one link further + // down and is not added. I checked add_users_of_use_to_worklist + // where I would expect the SubI would be added, and I cannot + // find the pattern, only this one: + // If changed AddI/SubI inputs, check CmpU for range check optimization. + // + // Fix this "notification" issue and check if there are any other + // issues. + // + // Found with: + // java -XX:VerifyIterativeGVN=1000 -Xcomp --version + case Op_SubI: + case Op_SubL: + return false; + + // PhiNode::Identity checks for patterns like: + // r = (x != con) ? x : con; + // that can be constant folded to "x". + // + // Call goes through PhiNode::is_cmove_id and CMoveNode::is_cmove_id. + // I suspect there was some earlier change to one of the inputs, but + // not all relevant outputs were put on the IGVN worklist. + // + // Found with: + // test/hotspot/jtreg/gc/stress/gcbasher/TestGCBasherWithG1.java + // -XX:VerifyIterativeGVN=1110 + case Op_Phi: + return false; + + // ConvI2LNode::Identity does + // convert I2L(L2I(x)) => x + // + // Investigate why this did not already happen during IGVN. + // + // Found with: + // compiler/loopopts/superword/TestDependencyOffsets.java#vanilla-A + // -XX:VerifyIterativeGVN=1110 + case Op_ConvI2L: + return false; + + // MaxNode::find_identity_operation + // Finds patterns like Max(A, Max(A, B)) -> Max(A, B) + // This can be a 2-hop search, so maybe notification is not + // good enough. + // + // Found with: + // compiler/codegen/TestBooleanVect.java + // -XX:VerifyIterativeGVN=1110 + case Op_MaxL: + case Op_MinL: + case Op_MaxI: + case Op_MinI: + case Op_MaxF: + case Op_MinF: + case Op_MaxHF: + case Op_MinHF: + case Op_MaxD: + case Op_MinD: + return false; + + + // AddINode::Identity + // Converts (x-y)+y to x + // Could be issue with notification + // + // Turns out AddL does the same. + // + // Found with: + // compiler/c2/Test6792161.java + // -ea -esa -XX:CompileThreshold=100 -XX:+UnlockExperimentalVMOptions -server -XX:-TieredCompilation -XX:+IgnoreUnrecognizedVMOptions -XX:VerifyIterativeGVN=1110 + case Op_AddI: + case Op_AddL: + return false; + + // AbsINode::Identity + // Not investigated yet. + case Op_AbsI: + return false; + } + + if (n->is_Load()) { + // LoadNode::Identity tries to look for an earlier store value via + // can_see_stored_value. I found an example where this led to + // an Allocation, where we could assume the value was still zero. + // So the LoadN can be replaced with a zerocon. + // + // Investigate why this was not already done during IGVN. + // A similar issue happens with Ideal. + // + // Found with: + // java -XX:VerifyIterativeGVN=1000 -Xcomp --version + return false; + } + + if (n->is_Store()) { + // StoreNode::Identity + // Not investigated, but found missing optimization for StoreI. + // Looks like a StoreI is replaced with an InitializeNode. + // + // Found with: + // applications/ctw/modules/java_base_2.java + // -ea -esa -XX:CompileThreshold=100 -XX:+UnlockExperimentalVMOptions -server -XX:-TieredCompilation -Djava.awt.headless=true -XX:+IgnoreUnrecognizedVMOptions -XX:VerifyIterativeGVN=1110 + return false; + } + + if (n->is_Vector()) { + // Found with tier1-3. Not investigated yet. + // The observed issue was with AndVNode::Identity + return false; + } + + Node* i = n->Identity(this); + // If we cannot find any other Identity, we are happy. + if (i == n) { + verify_empty_worklist(n); + return false; + } + + // The verification just found a new Identity that was not found during IGVN. + stringStream ss; // Print as a block without tty lock. + ss.cr(); + ss.print_cr("Missed Identity optimization:"); + ss.print_cr("Old node:"); + n->dump_bfs(1, nullptr, "", &ss); + ss.print_cr("New node:"); + i->dump_bfs(1, nullptr, "", &ss); + tty->print_cr("%s", ss.as_string()); return true; } #endif @@ -1890,12 +2776,12 @@ void PhaseCCP::analyze() { #ifdef ASSERT // For every node n on verify list, check if type(n) == n->Value() -// We have a list of exceptions, see comments in verify_node_value. +// We have a list of exceptions, see comments in verify_Value_for. void PhaseCCP::verify_analyze(Unique_Node_List& worklist_verify) { bool failure = false; while (worklist_verify.size()) { Node* n = worklist_verify.pop(); - failure |= verify_node_value(n); + failure |= verify_Value_for(n); } // If we get this assert, check why the reported nodes were not processed again in CCP. // We should either make sure that these nodes are properly added back to the CCP worklist diff --git a/src/hotspot/share/opto/phaseX.hpp b/src/hotspot/share/opto/phaseX.hpp index c2a0f0dbb77..648e911e783 100644 --- a/src/hotspot/share/opto/phaseX.hpp +++ b/src/hotspot/share/opto/phaseX.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -494,7 +494,10 @@ public: void optimize(); #ifdef ASSERT void verify_optimize(); - bool verify_node_value(Node* n); + bool verify_Value_for(Node* n); + bool verify_Ideal_for(Node* n, bool can_reshape); + bool verify_Identity_for(Node* n); + void verify_empty_worklist(Node* n); #endif #ifndef PRODUCT @@ -593,6 +596,14 @@ public: // '-XX:VerifyIterativeGVN=10' return ((VerifyIterativeGVN % 100) / 10) == 1; } + static bool is_verify_Ideal() { + // '-XX:VerifyIterativeGVN=100' + return ((VerifyIterativeGVN % 1000) / 100) == 1; + } + static bool is_verify_Identity() { + // '-XX:VerifyIterativeGVN=1000' + return ((VerifyIterativeGVN % 10000) / 1000) == 1; + } protected: // Sub-quadratic implementation of '-XX:VerifyIterativeGVN=1' (Use-Def verification). julong _verify_counter; diff --git a/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp b/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp index d9972b21ea1..18aa4a56d71 100644 --- a/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp +++ b/src/hotspot/share/runtime/flags/jvmFlagConstraintsCompiler.cpp @@ -299,8 +299,9 @@ JVMFlag::Error TypeProfileLevelConstraintFunc(uint value, bool verbose) { } JVMFlag::Error VerifyIterativeGVNConstraintFunc(uint value, bool verbose) { + const int max_modes = 4; uint original_value = value; - for (int i = 0; i < 2; i++) { + for (int i = 0; i < max_modes; i++) { if (value % 10 > 1) { JVMFlag::printError(verbose, "Invalid value (" UINT32_FORMAT ") " @@ -312,7 +313,7 @@ JVMFlag::Error VerifyIterativeGVNConstraintFunc(uint value, bool verbose) { if (value != 0) { JVMFlag::printError(verbose, "Invalid value (" UINT32_FORMAT ") " - "for VerifyIterativeGVN: maximal 2 digits\n", original_value); + "for VerifyIterativeGVN: maximal %d digits\n", original_value, max_modes); return JVMFlag::VIOLATES_CONSTRAINT; } return JVMFlag::SUCCESS; diff --git a/test/hotspot/jtreg/compiler/c2/TestVerifyIterativeGVN.java b/test/hotspot/jtreg/compiler/c2/TestVerifyIterativeGVN.java index f3f589f8fb1..83f3540226f 100644 --- a/test/hotspot/jtreg/compiler/c2/TestVerifyIterativeGVN.java +++ b/test/hotspot/jtreg/compiler/c2/TestVerifyIterativeGVN.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, 2023, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2020, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -25,9 +25,9 @@ * @test * @bug 8238756 * @requires vm.debug == true & vm.flavor == "server" - * @summary Run with -Xcomp to test -XX:VerifyIterativeGVN=11 in debug builds. + * @summary Run with -Xcomp to test -XX:VerifyIterativeGVN=1111 in debug builds. * - * @run main/othervm/timeout=300 -Xbatch -Xcomp -XX:VerifyIterativeGVN=11 compiler.c2.TestVerifyIterativeGVN + * @run main/othervm/timeout=300 -Xcomp -XX:VerifyIterativeGVN=1111 compiler.c2.TestVerifyIterativeGVN */ package compiler.c2;