Teach the function cloner (and thus the inliner) to simplify PHINodes

aggressively. There are lots of dire warnings about this being expensive that seem to predate switching to the TrackingVH-based value remapper that is automatically updated on RAUW. This makes it easy to not just prune single-entry PHIs, but to fully simplify PHIs, and to recursively simplify the newly inlined code to propagate PHINode simplifications. This introduces a bit of a thorny problem though. We may end up simplifying a branch condition to a constant when we fold PHINodes, and we would like to nuke any dead blocks resulting from this so that time isn't wasted continually analyzing them, but this isn't easy. Deleting basic blocks *after* they are fully cloned and mapped into the new function currently requires manually updating the value map. The last piece of the simplification-during-inlining puzzle will require either switching to WeakVH mappings or some other piece of refactoring. I've left a FIXME in the testcase about this. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153410 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-06 09:44:39 +00:00 · 2012-03-25 10:34:54 +00:00 · 2012-03-25 10:34:54 +00:00 · f8c8a9cbb4
commit f8c8a9cbb4
parent 0417d7dca0
2 changed files with 51 additions and 15 deletions
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@ -498,13 +498,19 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
        ++OldI;
      }
    }
-    // NOTE: We cannot eliminate single entry phi nodes here, because of
-    // VMap.  Single entry phi nodes can have multiple VMap entries
-    // pointing at them.  Thus, deleting one would require scanning the VMap
-    // to update any entries in it that would require that.  This would be
-    // really slow.
  }
-  
+
+  // Make a second pass over the PHINodes now that all of them have been
+  // remapped into the new function, simplifying the PHINode and performing any
+  // recursive simplifications exposed. This will transparently update the
+  // TrackingVH in the VMap. Notably, we rely on that so that if we coalesce
+  // two PHINodes, the iteration over the old PHIs remains valid, and the
+  // mapping will just map us to the new node (which may not even be a PHI
+  // node).
+  for (unsigned Idx = 0, Size = PHIToResolve.size(); Idx != Size; ++Idx)
+    if (PHINode *PN = dyn_cast<PHINode>(VMap[PHIToResolve[Idx]]))
+      recursivelySimplifyInstruction(PN, TD);
+
  // Now that the inlined function body has been fully constructed, go through
  // and zap unconditional fall-through branches.  This happen all the time when
  // specializing code: code specialization turns conditional branches into
@ -514,15 +520,15 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
    BranchInst *BI = dyn_cast<BranchInst>(I->getTerminator());
    if (!BI || BI->isConditional()) { ++I; continue; }
    
-    // Note that we can't eliminate uncond branches if the destination has
-    // single-entry PHI nodes.  Eliminating the single-entry phi nodes would
-    // require scanning the VMap to update any entries that point to the phi
-    // node.
    BasicBlock *Dest = BI->getSuccessor(0);
-    if (!Dest->getSinglePredecessor() || isa<PHINode>(Dest->begin())) {
+    if (!Dest->getSinglePredecessor()) {
      ++I; continue;
    }
-    
+
+    // We shouldn't be able to get single-entry PHI nodes here, as instsimplify
+    // above should have zapped all of them..
+    assert(!isa<PHINode>(Dest->begin()));
+
    // We know all single-entry PHI nodes in the inlined function have been
    // removed, so we just need to splice the blocks.
    BI->eraseFromParent();
--- a/test/Transforms/Inline/inline_cleanup.ll
+++ b/test/Transforms/Inline/inline_cleanup.ll
@ -74,7 +74,7 @@ entry:

 declare void @f(i32 %x)

-define void @inner2(i32 %x, i32 %y, i32 %z) {
+define void @inner2(i32 %x, i32 %y, i32 %z, i1 %b) {
 entry:
  %cmp1 = icmp ne i32 %x, 0
  br i1 %cmp1, label %then1, label %end1
@ -102,17 +102,47 @@ then3:
  br label %end3

 end3:
+  br i1 %b, label %end3.1, label %end3.2
+
+end3.1:
+  %x3.1 = or i32 %x, 10
+  br label %end3.3
+
+end3.2:
+  %x3.2 = or i32 %x, 10
+  br label %end3.3
+
+end3.3:
+  %x3.3 = phi i32 [ %x3.1, %end3.1 ], [ %x3.2, %end3.2 ]
+  %cmp4 = icmp slt i32 %x3.3, 1
+  br i1 %cmp4, label %then4, label %end4
+
+then4:
+  call void @f(i32 %x3.3)
+  br label %end4
+
+end4:
  ret void
 }

-define void @outer2(i32 %z) {
+define void @outer2(i32 %z, i1 %b) {
 ; Ensure that after inlining, none of the blocks with a call to @f actually
 ; make it through inlining.
 ; CHECK: define void @outer2
 ; CHECK-NOT: call
+;
+; FIXME: Currently, we aren't smart enough to delete the last dead basic block.
+; However, we do make the condition a constant. Check that at least until we can
+; start removing the block itself.
+; CHECK: br i1 false, label %[[LABEL:[a-z0-9_.]+]],
+; CHECK-NOT: call
+; CHECK: [[LABEL]]:
+; CHECK-NEXT: call void @f(i32 10)
+; CHECK-NOT: call
+;
 ; CHECK: ret void

 entry:
-  call void @inner2(i32 0, i32 -1, i32 %z)
+  call void @inner2(i32 0, i32 -1, i32 %z, i1 %b)
  ret void
 }