Teach jump threading to duplicate small blocks when the branch

condition is a xor with a phi node.  This eliminates nonsense
like this from 176.gcc in several places:

 LBB166_84:
        testl   %eax, %eax
-       setne   %al
-       xorb    %cl, %al
-       notb    %al
-       testb   $1, %al
-       je      LBB166_85
+       je      LBB166_69
+       jmp     LBB166_85

This is rdar://7391699



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@93221 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2010-01-12 02:07:17 +00:00
parent 68c3def126
commit 2249a0b1bd
2 changed files with 141 additions and 14 deletions

View File

@ -89,7 +89,7 @@ namespace {
bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs, bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB); BasicBlock *SuccBB);
bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
BasicBlock *PredBB); const SmallVectorImpl<BasicBlock *> &PredBBs);
typedef SmallVectorImpl<std::pair<ConstantInt*, typedef SmallVectorImpl<std::pair<ConstantInt*,
BasicBlock*> > PredValueInfo; BasicBlock*> > PredValueInfo;
@ -103,6 +103,7 @@ namespace {
bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB); bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
bool ProcessBranchOnPHI(PHINode *PN); bool ProcessBranchOnPHI(PHINode *PN);
bool ProcessBranchOnXOR(BinaryOperator *BO);
bool SimplifyPartiallyRedundantLoad(LoadInst *LI); bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
}; };
@ -603,6 +604,13 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator())) if (PN->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return ProcessBranchOnPHI(PN); return ProcessBranchOnPHI(PN);
// If this is an otherwise-unfoldable branch on a XOR, see if we can simplify.
if (CondInst->getOpcode() == Instruction::Xor &&
CondInst->getParent() == BB && isa<BranchInst>(BB->getTerminator()))
return ProcessBranchOnXOR(cast<BinaryOperator>(CondInst));
// TODO: If we have: "br (X > 0)" and we have a predecessor where we know // TODO: If we have: "br (X > 0)" and we have a predecessor where we know
// "(X == 4)", thread through this block. // "(X == 4)", thread through this block.
@ -1073,6 +1081,11 @@ bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) { bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
BasicBlock *BB = PN->getParent(); BasicBlock *BB = PN->getParent();
// TODO: We could make use of this to do it once for blocks with common PHI
// values.
SmallVector<BasicBlock*, 1> PredBBs;
PredBBs.resize(1);
// If any of the predecessor blocks end in an unconditional branch, we can // If any of the predecessor blocks end in an unconditional branch, we can
// *duplicate* the conditional branch into that block in order to further // *duplicate* the conditional branch into that block in order to further
// encourage jump threading and to eliminate cases where we have branch on a // encourage jump threading and to eliminate cases where we have branch on a
@ -1080,15 +1093,96 @@ bool JumpThreading::ProcessBranchOnPHI(PHINode *PN) {
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
BasicBlock *PredBB = PN->getIncomingBlock(i); BasicBlock *PredBB = PN->getIncomingBlock(i);
if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator())) if (BranchInst *PredBr = dyn_cast<BranchInst>(PredBB->getTerminator()))
if (PredBr->isUnconditional() && if (PredBr->isUnconditional()) {
PredBBs[0] = PredBB;
// Try to duplicate BB into PredBB. // Try to duplicate BB into PredBB.
DuplicateCondBranchOnPHIIntoPred(BB, PredBB)) if (DuplicateCondBranchOnPHIIntoPred(BB, PredBBs))
return true; return true;
} }
}
return false; return false;
} }
/// ProcessBranchOnXOR - We have an otherwise unthreadable conditional branch on
/// a xor instruction in the current block. See if there are any
/// simplifications we can do based on inputs to the xor.
///
bool JumpThreading::ProcessBranchOnXOR(BinaryOperator *BO) {
BasicBlock *BB = BO->getParent();
// If either the LHS or RHS of the xor is a constant, don't do this
// optimization.
if (isa<ConstantInt>(BO->getOperand(0)) ||
isa<ConstantInt>(BO->getOperand(1)))
return false;
// If we have a xor as the branch input to this block, and we know that the
// LHS or RHS of the xor in any predecessor is true/false, then we can clone
// the condition into the predecessor and fix that value to true, saving some
// logical ops on that path and encouraging other paths to simplify.
//
// This copies something like this:
//
// BB:
// %X = phi i1 [1], [%X']
// %Y = icmp eq i32 %A, %B
// %Z = xor i1 %X, %Y
// br i1 %Z, ...
//
// Into:
// BB':
// %Y = icmp ne i32 %A, %B
// br i1 %Z, ...
SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> XorOpValues;
bool isLHS = true;
if (!ComputeValueKnownInPredecessors(BO->getOperand(0), BB, XorOpValues)) {
assert(XorOpValues.empty());
if (!ComputeValueKnownInPredecessors(BO->getOperand(1), BB, XorOpValues))
return false;
isLHS = false;
}
assert(!XorOpValues.empty() &&
"ComputeValueKnownInPredecessors returned true with no values");
// Scan the information to see which is most popular: true or false. The
// predecessors can be of the set true, false, or undef.
unsigned NumTrue = 0, NumFalse = 0;
for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
if (!XorOpValues[i].first) continue; // Ignore undefs for the count.
if (XorOpValues[i].first->isZero())
++NumFalse;
else
++NumTrue;
}
// Determine which value to split on, true, false, or undef if neither.
ConstantInt *SplitVal = 0;
if (NumTrue > NumFalse)
SplitVal = ConstantInt::getTrue(BB->getContext());
else if (NumTrue != 0 || NumFalse != 0)
SplitVal = ConstantInt::getFalse(BB->getContext());
// Collect all of the blocks that this can be folded into so that we can
// factor this once and clone it once.
SmallVector<BasicBlock*, 8> BlocksToFoldInto;
for (unsigned i = 0, e = XorOpValues.size(); i != e; ++i) {
if (XorOpValues[i].first != SplitVal && XorOpValues[i].first != 0) continue;
BlocksToFoldInto.push_back(XorOpValues[i].second);
}
// Try to duplicate BB into PredBB.
if (DuplicateCondBranchOnPHIIntoPred(BB, BlocksToFoldInto)) {
// errs() << "CLONE XOR COND: " << *BB << "Into PRED: " << *BlocksToFoldInto[0];
return true;
}
return false;
}
/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new /// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
/// predecessor to the PHIBB block. If it has PHI nodes, add entries for /// predecessor to the PHIBB block. If it has PHI nodes, add entries for
/// NewPred using the entries from OldPred (suitably mapped). /// NewPred using the entries from OldPred (suitably mapped).
@ -1277,13 +1371,15 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB,
/// improves the odds that the branch will be on an analyzable instruction like /// improves the odds that the branch will be on an analyzable instruction like
/// a compare. /// a compare.
bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB, bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
BasicBlock *PredBB) { const SmallVectorImpl<BasicBlock *> &PredBBs) {
assert(!PredBBs.empty() && "Can't handle an empty set");
// If BB is a loop header, then duplicating this block outside the loop would // If BB is a loop header, then duplicating this block outside the loop would
// cause us to transform this into an irreducible loop, don't do this. // cause us to transform this into an irreducible loop, don't do this.
// See the comments above FindLoopHeaders for justifications and caveats. // See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) { if (LoopHeaders.count(BB)) {
DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName() DEBUG(dbgs() << " Not duplicating loop header '" << BB->getName()
<< "' into predecessor block '" << PredBB->getName() << "' into predecessor block '" << PredBBs[0]->getName()
<< "' - it might create an irreducible loop!\n"); << "' - it might create an irreducible loop!\n");
return false; return false;
} }
@ -1295,12 +1391,32 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
return false; return false;
} }
// And finally, do it! Start by factoring the predecessors is needed.
BasicBlock *PredBB;
if (PredBBs.size() == 1)
PredBB = PredBBs[0];
else {
DEBUG(dbgs() << " Factoring out " << PredBBs.size()
<< " common predecessors.\n");
PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
".thr_comm", this);
}
// Okay, we decided to do this! Clone all the instructions in BB onto the end // Okay, we decided to do this! Clone all the instructions in BB onto the end
// of PredBB. // of PredBB.
DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '" DEBUG(dbgs() << " Duplicating block '" << BB->getName() << "' into end of '"
<< PredBB->getName() << "' to eliminate branch on phi. Cost: " << PredBB->getName() << "' to eliminate branch on phi. Cost: "
<< DuplicationCost << " block is:" << *BB << "\n"); << DuplicationCost << " block is:" << *BB << "\n");
// Unless PredBB ends with an unconditional branch, split the edge so that we
// can just clone the bits from BB into the end of the new PredBB.
BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
if (!OldPredBranch->isUnconditional()) {
PredBB = SplitEdge(PredBB, BB, this);
OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
}
// We are going to have to map operands from the original BB block into the // We are going to have to map operands from the original BB block into the
// PredBB block. Evaluate PHI nodes in BB. // PredBB block. Evaluate PHI nodes in BB.
DenseMap<Instruction*, Value*> ValueMapping; DenseMap<Instruction*, Value*> ValueMapping;
@ -1309,8 +1425,6 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI) for (; PHINode *PN = dyn_cast<PHINode>(BI); ++BI)
ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB); ValueMapping[PN] = PN->getIncomingValueForBlock(PredBB);
BranchInst *OldPredBranch = cast<BranchInst>(PredBB->getTerminator());
// Clone the non-phi instructions of BB into PredBB, keeping track of the // Clone the non-phi instructions of BB into PredBB, keeping track of the
// mapping and using it to remap operands in the cloned instructions. // mapping and using it to remap operands in the cloned instructions.
for (; BI != BB->end(); ++BI) { for (; BI != BB->end(); ++BI) {

View File

@ -383,11 +383,11 @@ return:
} }
;;; Duplicate condition to avoid xor of cond. ;; Duplicate condition to avoid xor of cond.
;;; TODO: Make this happen. ;; rdar://7391699
define i32 @testXX(i1 %cond, i1 %cond2) { define i32 @test13(i1 %cond, i1 %cond2) {
Entry: Entry:
; CHECK: @testXX ; CHECK: @test13
%v1 = call i32 @f1() %v1 = call i32 @f1()
br i1 %cond, label %Merge, label %F1 br i1 %cond, label %Merge, label %F1
@ -396,7 +396,8 @@ F1:
Merge: Merge:
%B = phi i1 [true, %Entry], [%cond2, %F1] %B = phi i1 [true, %Entry], [%cond2, %F1]
%M = icmp eq i32 %v1, 192 %C = phi i32 [192, %Entry], [%v1, %F1]
%M = icmp eq i32 %C, 192
%N = xor i1 %B, %M %N = xor i1 %B, %M
br i1 %N, label %T2, label %F2 br i1 %N, label %T2, label %F2
@ -405,6 +406,18 @@ T2:
F2: F2:
ret i32 %v1 ret i32 %v1
;; FIXME: CONSTANT FOLD on clone and when phi gets eliminated.
; CHECK: Entry.Merge_crit_edge:
; CHECK-NEXT: %M1 = icmp eq i32 192, 192
; CHECK-NEXT: %N2 = xor i1 true, %M1
; CHECK-NEXT: br i1 %N2, label %T2, label %F2
; CHECK: Merge:
; CHECK-NEXT: %M = icmp eq i32 %v1, 192
; CHECK-NEXT: %N = xor i1 %cond2, %M
; CHECK-NEXT: br i1 %N, label %T2, label %F2
} }