diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 7f35244f196..dbb0be410a8 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -222,6 +222,21 @@ public: return true; } + /// \brief Return if the target supports combining a + /// chain like: + /// \code + /// %andResult = and %val1, #imm-with-one-bit-set; + /// %icmpResult = icmp %andResult, 0 + /// br i1 %icmpResult, label %dest1, label %dest2 + /// \endcode + /// into a single machine instruction of a form like: + /// \code + /// brOnBitSet %register, #bitNumber, dest + /// \endcode + bool isMaskAndBranchFoldingLegal() const { + return MaskAndBranchFoldingIsLegal; + } + /// Return the ValueType of the result of SETCC operations. Also used to /// obtain the target's preferred type for the condition operand of SELECT and /// BRCOND nodes. In the case of BRCOND the argument passed is MVT::Other @@ -1724,6 +1739,10 @@ protected: /// the branch is usually predicted right. bool PredictableSelectIsExpensive; + /// MaskAndBranchFoldingIsLegal - Indicates if the target supports folding + /// a mask of a single bit, a compare, and a branch into a single instruction. + bool MaskAndBranchFoldingIsLegal; + protected: /// Return true if the value types that can be represented by the specified /// register class are all legal. diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index a3961e80031..72d6ac56362 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -60,6 +60,7 @@ STATISTIC(NumExtUses, "Number of uses of [s|z]ext instructions optimized"); STATISTIC(NumRetsDup, "Number of return instructions duplicated"); STATISTIC(NumDbgValueMoved, "Number of debug value instructions moved"); STATISTIC(NumSelectsExpanded, "Number of selects turned into branches"); +STATISTIC(NumAndCmpsMoved, "Number of and/cmp's pushed into branches"); static cl::opt DisableBranchOpts( "disable-cgp-branch-opts", cl::Hidden, cl::init(false), @@ -69,6 +70,10 @@ static cl::opt DisableSelectToBranch( "disable-cgp-select2branch", cl::Hidden, cl::init(false), cl::desc("Disable select to branch conversion.")); +static cl::opt EnableAndCmpSinking( + "enable-andcmp-sinking", cl::Hidden, cl::init(true), + cl::desc("Enable sinkinig and/cmp into branches.")); + namespace { typedef SmallPtrSet SetOfInstrs; typedef DenseMap InstrToOrigTy; @@ -135,6 +140,7 @@ typedef DenseMap InstrToOrigTy; bool OptimizeShuffleVectorInst(ShuffleVectorInst *SI); bool DupRetToEnableTailCallOpts(BasicBlock *BB); bool PlaceDbgValues(Function &F); + bool sinkAndCmp(Function &F); }; } @@ -190,6 +196,13 @@ bool CodeGenPrepare::runOnFunction(Function &F) { // find a node corresponding to the value. EverMadeChange |= PlaceDbgValues(F); + // If there is a mask, compare against zero, and branch that can be combined + // into a single target instruction, push the mask and compare into branch + // users. Do this before OptimizeBlock -> OptimizeInst -> + // OptimizeCmpExpression, which perturbs the pattern being searched for. + if (!DisableBranchOpts) + EverMadeChange |= sinkAndCmp(F); + bool MadeChange = true; while (MadeChange) { MadeChange = false; @@ -2926,3 +2939,70 @@ bool CodeGenPrepare::PlaceDbgValues(Function &F) { } return MadeChange; } + +// If there is a sequence that branches based on comparing a single bit +// against zero that can be combined into a single instruction, and the +// target supports folding these into a single instruction, sink the +// mask and compare into the branch uses. Do this before OptimizeBlock -> +// OptimizeInst -> OptimizeCmpExpression, which perturbs the pattern being +// searched for. +bool CodeGenPrepare::sinkAndCmp(Function &F) { + if (!EnableAndCmpSinking) + return false; + if (!TLI || !TLI->isMaskAndBranchFoldingLegal()) + return false; + bool MadeChange = false; + for (Function::iterator I = F.begin(), E = F.end(); I != E; ) { + BasicBlock *BB = I++; + + // Does this BB end with the following? + // %andVal = and %val, #single-bit-set + // %icmpVal = icmp %andResult, 0 + // br i1 %cmpVal label %dest1, label %dest2" + BranchInst *Brcc = dyn_cast(BB->getTerminator()); + if (!Brcc || !Brcc->isConditional()) + continue; + ICmpInst *Cmp = dyn_cast(Brcc->getOperand(0)); + if (!Cmp || Cmp->getParent() != BB) + continue; + ConstantInt *Zero = dyn_cast(Cmp->getOperand(1)); + if (!Zero || !Zero->isZero()) + continue; + Instruction *And = dyn_cast(Cmp->getOperand(0)); + if (!And || And->getOpcode() != Instruction::And || And->getParent() != BB) + continue; + ConstantInt* Mask = dyn_cast(And->getOperand(1)); + if (!Mask || !Mask->getUniqueInteger().isPowerOf2()) + continue; + DEBUG(dbgs() << "found and; icmp ?,0; brcc\n"); DEBUG(BB->dump()); + + // Push the "and; icmp" for any users that are conditional branches. + // Since there can only be one branch use per BB, we don't need to keep + // track of which BBs we insert into. + for (Value::use_iterator UI = Cmp->use_begin(), E = Cmp->use_end(); + UI != E; ) { + Use &TheUse = *UI; + // Find brcc use. + BranchInst *BrccUser = dyn_cast(*UI); + ++UI; + if (!BrccUser || !BrccUser->isConditional()) + continue; + BasicBlock *UserBB = BrccUser->getParent(); + if (UserBB == BB) continue; + DEBUG(dbgs() << "found Brcc use\n"); + + // Sink the "and; icmp" to use. + MadeChange = true; + BinaryOperator *NewAnd = + BinaryOperator::CreateAnd(And->getOperand(0), And->getOperand(1), "", + BrccUser); + CmpInst *NewCmp = + CmpInst::Create(Cmp->getOpcode(), Cmp->getPredicate(), NewAnd, Zero, + "", BrccUser); + TheUse = NewCmp; + ++NumAndCmpsMoved; + DEBUG(BrccUser->getParent()->dump()); + } + } + return MadeChange; +} diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index f1b79691913..d0c02002c3d 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -679,6 +679,7 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, Pow2DivIsCheap = false; JumpIsExpensive = false; PredictableSelectIsExpensive = false; + MaskAndBranchFoldingIsLegal = false; StackPointerRegisterToSaveRestore = 0; ExceptionPointerRegister = 0; ExceptionSelectorRegister = 0;