InstCombine: Be more agressive optimizing 'udiv' instrs with 'select' denoms

Real world code sometimes has the denominator of a 'udiv' be a
'select'.  LLVM can handle such cases but only when the 'select'
operands are symmetric in structure (both select operands are a constant
power of two or a left shift, etc.).  This falls apart if we are dealt a
'udiv' where the code is not symetric or if the select operands lead us
to more select instructions.

Instead, we should treat the LHS and each select operand as a distinct
divide operation and try to optimize them independently.  If we can
to simplify each operation, then we can replace the 'udiv' with, say, a
'lshr' that has a new select with a bunch of new operands for the
select.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@185257 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
David Majnemer 2013-06-29 08:40:07 +00:00
parent d4a9ebc734
commit f723e5d1c2
2 changed files with 127 additions and 56 deletions

View File

@ -705,6 +705,80 @@ static Value *dyn_castZExtVal(Value *V, Type *Ty) {
return 0;
}
const unsigned MaxDepth = 6;
// \brief Recursively visits the possible right hand operands of a udiv
// instruction, seeing through select instructions, to determine if we can
// replace the udiv with something simpler. If we find that an operand is not
// able to simplify the udiv, we abort the entire transformation.
//
// Inserts any intermediate instructions used for the simplification into
// NewInstrs and returns a new instruction that depends upon them.
static Instruction *visitUDivOperand(Value *Op0, Value *Op1,
const BinaryOperator &I,
SmallVectorImpl<Instruction *> &NewInstrs,
unsigned Depth = 0) {
{
// X udiv 2^C -> X >> C
// Check to see if this is an unsigned division with an exact power of 2,
// if so, convert to a right shift.
const APInt *C;
if (match(Op1, m_Power2(C))) {
BinaryOperator *LShr = BinaryOperator::CreateLShr(
Op0, ConstantInt::get(Op0->getType(), C->logBase2()));
if (I.isExact()) LShr->setIsExact();
return LShr;
}
}
if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
// X udiv C, where C >= signbit
if (C->getValue().isNegative()) {
ICmpInst *IC = new ICmpInst(ICmpInst::ICMP_ULT, Op0, C);
NewInstrs.push_back(IC);
return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
ConstantInt::get(I.getType(), 1));
}
}
// X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
{ const APInt *CI; Value *N;
if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) ||
match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) {
if (*CI != 1) {
N = BinaryOperator::CreateAdd(
N, ConstantInt::get(N->getType(), CI->logBase2()));
NewInstrs.push_back(cast<Instruction>(N));
}
if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1)) {
N = new ZExtInst(N, Z->getDestTy());
NewInstrs.push_back(cast<Instruction>(N));
}
BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N);
if (I.isExact()) LShr->setIsExact();
return LShr;
}
}
// The remaining tests are all recursive, so bail out if we hit the limit.
if (Depth++ == MaxDepth)
return 0;
if (SelectInst *SI = dyn_cast<SelectInst>(Op1))
if (Instruction *LHS =
visitUDivOperand(Op0, SI->getOperand(1), I, NewInstrs)) {
NewInstrs.push_back(LHS);
if (Instruction *RHS =
visitUDivOperand(Op0, SI->getOperand(2), I, NewInstrs)) {
NewInstrs.push_back(RHS);
return SelectInst::Create(SI->getCondition(), LHS, RHS);
}
}
return 0;
}
Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
@ -715,30 +789,6 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
if (Instruction *Common = commonIDivTransforms(I))
return Common;
{
// X udiv 2^C -> X >> C
// Check to see if this is an unsigned division with an exact power of 2,
// if so, convert to a right shift.
const APInt *C;
if (match(Op1, m_Power2(C))) {
BinaryOperator *LShr =
BinaryOperator::CreateLShr(Op0,
ConstantInt::get(Op0->getType(),
C->logBase2()));
if (I.isExact()) LShr->setIsExact();
return LShr;
}
}
if (ConstantInt *C = dyn_cast<ConstantInt>(Op1)) {
// X udiv C, where C >= signbit
if (C->getValue().isNegative()) {
Value *IC = Builder->CreateICmpULT(Op0, C);
return SelectInst::Create(IC, Constant::getNullValue(I.getType()),
ConstantInt::get(I.getType(), 1));
}
}
// (x lshr C1) udiv C2 --> x udiv (C2 << C1)
if (ConstantInt *C2 = dyn_cast<ConstantInt>(Op1)) {
Value *X;
@ -749,38 +799,6 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
}
}
// X udiv (C1 << N), where C1 is "1<<C2" --> X >> (N+C2)
{ const APInt *CI; Value *N;
if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) ||
match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) {
if (*CI != 1)
N = Builder->CreateAdd(N,
ConstantInt::get(N->getType(), CI->logBase2()));
if (ZExtInst *Z = dyn_cast<ZExtInst>(Op1))
N = Builder->CreateZExt(N, Z->getDestTy());
if (I.isExact())
return BinaryOperator::CreateExactLShr(Op0, N);
return BinaryOperator::CreateLShr(Op0, N);
}
}
// udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2)
// where C1&C2 are powers of two.
{ Value *Cond; const APInt *C1, *C2;
if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) {
// Construct the "on true" case of the select
Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t",
I.isExact());
// Construct the "on false" case of the select
Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f",
I.isExact());
// construct the select instruction and return it.
return SelectInst::Create(Cond, TSI, FSI);
}
}
// (zext A) udiv (zext B) --> zext (A udiv B)
if (ZExtInst *ZOp0 = dyn_cast<ZExtInst>(Op0))
if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy()))
@ -788,6 +806,21 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) {
I.isExact()),
I.getType());
// (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...))))
SmallVector<Instruction *, 4> NewInstrs;
Instruction *RetI = visitUDivOperand(Op0, Op1, I, NewInstrs);
for (unsigned i = 0, e = NewInstrs.size(); i != e; i++)
// If we managed to replace the UDiv completely, insert the new intermediate
// instructions before where the UDiv was.
// If we couldn't, we must clean up after ourselves by deleting the new
// instructions.
if (RetI)
NewInstrs[i]->insertBefore(&I);
else
delete NewInstrs[i];
if (RetI)
return RetI;
return 0;
}

View File

@ -35,3 +35,41 @@ define i64 @t3(i64 %x, i32 %y) nounwind {
%3 = udiv i64 %x, %2
ret i64 %3
}
define i32 @t4(i32 %x, i32 %y) nounwind {
; CHECK: t4
; CHECK-NOT: udiv
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %y, 5
; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 5, i32 %y
; CHECK-NEXT: [[SHR:%.*]] = lshr i32 %x, [[SEL]]
; CHECK-NEXT: ret i32 [[SHR]]
%1 = shl i32 1, %y
%2 = icmp ult i32 %1, 32
%3 = select i1 %2, i32 32, i32 %1
%4 = udiv i32 %x, %3
ret i32 %4
}
define i32 @t5(i1 %x, i1 %y, i32 %V) nounwind {
; CHECK: t5
; CHECK-NOT: udiv
; CHECK-NEXT: [[SEL1:%.*]] = select i1 %x, i32 5, i32 6
; CHECK-NEXT: [[SEL2:%.*]] = select i1 %y, i32 [[SEL1]], i32 %V
; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 %V, [[SEL2]]
; CHECK-NEXT: ret i32 [[LSHR]]
%1 = shl i32 1, %V
%2 = select i1 %x, i32 32, i32 64
%3 = select i1 %y, i32 %2, i32 %1
%4 = udiv i32 %V, %3
ret i32 %4
}
define i32 @t6(i32 %x, i32 %z) nounwind{
; CHECK: t6
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %x, 0
; CHECK-NOT: udiv i32 %z, %x
%x_is_zero = icmp eq i32 %x, 0
%divisor = select i1 %x_is_zero, i32 1, i32 %x
%y = udiv i32 %z, %divisor
ret i32 %y
}