diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp index bc5d699e538..d0d4f41d3b4 100644 --- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp +++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp @@ -705,80 +705,6 @@ static Value *dyn_castZExtVal(Value *V, Type *Ty) { return 0; } -const unsigned MaxDepth = 6; - -// \brief Recursively visits the possible right hand operands of a udiv -// instruction, seeing through select instructions, to determine if we can -// replace the udiv with something simpler. If we find that an operand is not -// able to simplify the udiv, we abort the entire transformation. -// -// Inserts any intermediate instructions used for the simplification into -// NewInstrs and returns a new instruction that depends upon them. -static Instruction *visitUDivOperand(Value *Op0, Value *Op1, - const BinaryOperator &I, - SmallVectorImpl &NewInstrs, - unsigned Depth = 0) { - { - // X udiv 2^C -> X >> C - // Check to see if this is an unsigned division with an exact power of 2, - // if so, convert to a right shift. - const APInt *C; - if (match(Op1, m_Power2(C))) { - BinaryOperator *LShr = BinaryOperator::CreateLShr( - Op0, ConstantInt::get(Op0->getType(), C->logBase2())); - if (I.isExact()) LShr->setIsExact(); - return LShr; - } - } - - if (ConstantInt *C = dyn_cast(Op1)) { - // X udiv C, where C >= signbit - if (C->getValue().isNegative()) { - ICmpInst *IC = new ICmpInst(ICmpInst::ICMP_ULT, Op0, C); - NewInstrs.push_back(IC); - - return SelectInst::Create(IC, Constant::getNullValue(I.getType()), - ConstantInt::get(I.getType(), 1)); - } - } - - // X udiv (C1 << N), where C1 is "1< X >> (N+C2) - { const APInt *CI; Value *N; - if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) || - match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) { - if (*CI != 1) { - N = BinaryOperator::CreateAdd( - N, ConstantInt::get(N->getType(), CI->logBase2())); - NewInstrs.push_back(cast(N)); - } - if (ZExtInst *Z = dyn_cast(Op1)) { - N = new ZExtInst(N, Z->getDestTy()); - NewInstrs.push_back(cast(N)); - } - BinaryOperator *LShr = BinaryOperator::CreateLShr(Op0, N); - if (I.isExact()) LShr->setIsExact(); - return LShr; - } - } - - // The remaining tests are all recursive, so bail out if we hit the limit. - if (Depth++ == MaxDepth) - return 0; - - if (SelectInst *SI = dyn_cast(Op1)) - if (Instruction *LHS = - visitUDivOperand(Op0, SI->getOperand(1), I, NewInstrs)) { - NewInstrs.push_back(LHS); - if (Instruction *RHS = - visitUDivOperand(Op0, SI->getOperand(2), I, NewInstrs)) { - NewInstrs.push_back(RHS); - return SelectInst::Create(SI->getCondition(), LHS, RHS); - } - } - - return 0; -} - Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); @@ -789,6 +715,30 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { if (Instruction *Common = commonIDivTransforms(I)) return Common; + { + // X udiv 2^C -> X >> C + // Check to see if this is an unsigned division with an exact power of 2, + // if so, convert to a right shift. + const APInt *C; + if (match(Op1, m_Power2(C))) { + BinaryOperator *LShr = + BinaryOperator::CreateLShr(Op0, + ConstantInt::get(Op0->getType(), + C->logBase2())); + if (I.isExact()) LShr->setIsExact(); + return LShr; + } + } + + if (ConstantInt *C = dyn_cast(Op1)) { + // X udiv C, where C >= signbit + if (C->getValue().isNegative()) { + Value *IC = Builder->CreateICmpULT(Op0, C); + return SelectInst::Create(IC, Constant::getNullValue(I.getType()), + ConstantInt::get(I.getType(), 1)); + } + } + // (x lshr C1) udiv C2 --> x udiv (C2 << C1) if (ConstantInt *C2 = dyn_cast(Op1)) { Value *X; @@ -799,6 +749,38 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { } } + // X udiv (C1 << N), where C1 is "1< X >> (N+C2) + { const APInt *CI; Value *N; + if (match(Op1, m_Shl(m_Power2(CI), m_Value(N))) || + match(Op1, m_ZExt(m_Shl(m_Power2(CI), m_Value(N))))) { + if (*CI != 1) + N = Builder->CreateAdd(N, + ConstantInt::get(N->getType(), CI->logBase2())); + if (ZExtInst *Z = dyn_cast(Op1)) + N = Builder->CreateZExt(N, Z->getDestTy()); + if (I.isExact()) + return BinaryOperator::CreateExactLShr(Op0, N); + return BinaryOperator::CreateLShr(Op0, N); + } + } + + // udiv X, (Select Cond, C1, C2) --> Select Cond, (shr X, C1), (shr X, C2) + // where C1&C2 are powers of two. + { Value *Cond; const APInt *C1, *C2; + if (match(Op1, m_Select(m_Value(Cond), m_Power2(C1), m_Power2(C2)))) { + // Construct the "on true" case of the select + Value *TSI = Builder->CreateLShr(Op0, C1->logBase2(), Op1->getName()+".t", + I.isExact()); + + // Construct the "on false" case of the select + Value *FSI = Builder->CreateLShr(Op0, C2->logBase2(), Op1->getName()+".f", + I.isExact()); + + // construct the select instruction and return it. + return SelectInst::Create(Cond, TSI, FSI); + } + } + // (zext A) udiv (zext B) --> zext (A udiv B) if (ZExtInst *ZOp0 = dyn_cast(Op0)) if (Value *ZOp1 = dyn_castZExtVal(Op1, ZOp0->getSrcTy())) @@ -806,21 +788,6 @@ Instruction *InstCombiner::visitUDiv(BinaryOperator &I) { I.isExact()), I.getType()); - // (LHS udiv (select (select (...)))) -> (LHS >> (select (select (...)))) - SmallVector NewInstrs; - Instruction *RetI = visitUDivOperand(Op0, Op1, I, NewInstrs); - for (unsigned i = 0, e = NewInstrs.size(); i != e; i++) - // If we managed to replace the UDiv completely, insert the new intermediate - // instructions before where the UDiv was. - // If we couldn't, we must clean up after ourselves by deleting the new - // instructions. - if (RetI) - NewInstrs[i]->insertBefore(&I); - else - delete NewInstrs[i]; - if (RetI) - return RetI; - return 0; } diff --git a/test/Transforms/InstCombine/div-shift-crash.ll b/test/Transforms/InstCombine/div-shift-crash.ll new file mode 100644 index 00000000000..a6197245640 --- /dev/null +++ b/test/Transforms/InstCombine/div-shift-crash.ll @@ -0,0 +1,101 @@ +; RUN: opt -instcombine < %s +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.S0.0.1.2.3.4.13.22.31.44.48.53.54.55.56.58.59.60.66.68.70.74.77.106.107.108.109.110.113.117.118.128.129 = type <{ i64 }> + +; Function Attrs: nounwind +define void @main() #0 { +entry: + %l_819.i.i = alloca %struct.S0.0.1.2.3.4.13.22.31.44.48.53.54.55.56.58.59.60.66.68.70.74.77.106.107.108.109.110.113.117.118.128.129, align 8 + br i1 undef, label %land.lhs.true, label %for.cond.i + +land.lhs.true: ; preds = %entry + br label %for.cond.i + +for.cond.i: ; preds = %land.lhs.true, %entry + %0 = getelementptr inbounds %struct.S0.0.1.2.3.4.13.22.31.44.48.53.54.55.56.58.59.60.66.68.70.74.77.106.107.108.109.110.113.117.118.128.129* %l_819.i.i, i64 0, i32 0 + br label %for.cond.i6.i.i + +for.cond.i6.i.i: ; preds = %for.body.i8.i.i, %for.cond.i + br i1 undef, label %for.body.i8.i.i, label %lbl_707.i.i.i + +for.body.i8.i.i: ; preds = %for.cond.i6.i.i + br label %for.cond.i6.i.i + +lbl_707.i.i.i: ; preds = %for.cond.i6.i.i + br i1 undef, label %lor.rhs.i.i.i, label %lor.end.i.i.i + +lor.rhs.i.i.i: ; preds = %lbl_707.i.i.i + br label %lor.end.i.i.i + +lor.end.i.i.i: ; preds = %lor.rhs.i.i.i, %lbl_707.i.i.i + br label %for.cond1.i.i.i.i + +for.cond1.i.i.i.i: ; preds = %for.body4.i.i.i.i, %lor.end.i.i.i + br i1 undef, label %for.body4.i.i.i.i, label %func_39.exit.i.i + +for.body4.i.i.i.i: ; preds = %for.cond1.i.i.i.i + br label %for.cond1.i.i.i.i + +func_39.exit.i.i: ; preds = %for.cond1.i.i.i.i + %l_8191.sroa.0.0.copyload.i.i = load i64* %0, align 1 + br label %for.cond1.i.i.i + +for.cond1.i.i.i: ; preds = %safe_div_func_uint32_t_u_u.exit.i.i.i, %func_39.exit.i.i + br i1 undef, label %for.cond7.i.i.i, label %func_11.exit.i + +for.cond7.i.i.i: ; preds = %for.end30.i.i.i, %for.cond1.i.i.i + %storemerge.i.i.i = phi i32 [ %sub.i.i.i, %for.end30.i.i.i ], [ 4, %for.cond1.i.i.i ] + br i1 undef, label %for.cond22.i.i.i, label %for.end32.i.i.i + +for.cond22.i.i.i: ; preds = %for.body25.i.i.i, %for.cond7.i.i.i + br i1 undef, label %for.body25.i.i.i, label %for.end30.i.i.i + +for.body25.i.i.i: ; preds = %for.cond22.i.i.i + br label %for.cond22.i.i.i + +for.end30.i.i.i: ; preds = %for.cond22.i.i.i + %sub.i.i.i = add nsw i32 0, -1 + br label %for.cond7.i.i.i + +for.end32.i.i.i: ; preds = %for.cond7.i.i.i + %conv33.i.i.i = trunc i64 %l_8191.sroa.0.0.copyload.i.i to i32 + %xor.i.i.i.i = xor i32 %storemerge.i.i.i, -701565022 + %sub.i.i.i.i = sub nsw i32 0, %storemerge.i.i.i + %xor3.i.i.i.i = xor i32 %sub.i.i.i.i, %storemerge.i.i.i + %and4.i.i.i.i = and i32 %xor.i.i.i.i, %xor3.i.i.i.i + %cmp.i.i.i.i = icmp slt i32 %and4.i.i.i.i, 0 + %sub5.i.i.i.i = sub nsw i32 -701565022, %storemerge.i.i.i + %.sub5.i.i.i.i = select i1 %cmp.i.i.i.i, i32 -701565022, i32 %sub5.i.i.i.i + br i1 undef, label %safe_div_func_uint32_t_u_u.exit.i.i.i, label %cond.false.i.i.i.i + +cond.false.i.i.i.i: ; preds = %for.end32.i.i.i + %div.i.i.i.i = udiv i32 %conv33.i.i.i, %.sub5.i.i.i.i + br label %safe_div_func_uint32_t_u_u.exit.i.i.i + +safe_div_func_uint32_t_u_u.exit.i.i.i: ; preds = %cond.false.i.i.i.i, %for.end32.i.i.i + %cond.i.i.i.i = phi i32 [ %div.i.i.i.i, %cond.false.i.i.i.i ], [ %conv33.i.i.i, %for.end32.i.i.i ] + %cmp35.i.i.i = icmp ne i32 %cond.i.i.i.i, -7 + br label %for.cond1.i.i.i + +func_11.exit.i: ; preds = %for.cond1.i.i.i + br i1 undef, label %for.body, label %for.end + +for.body: ; preds = %func_11.exit.i + unreachable + +for.end: ; preds = %func_11.exit.i + br label %for.cond15 + +for.cond15: ; preds = %for.cond19, %for.end + br i1 undef, label %for.cond19, label %for.end45 + +for.cond19: ; preds = %for.cond15 + br label %for.cond15 + +for.end45: ; preds = %for.cond15 + unreachable +} + +attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } diff --git a/test/Transforms/InstCombine/div-shift.ll b/test/Transforms/InstCombine/div-shift.ll index 46d0f9afd70..6be5cc4ca11 100644 --- a/test/Transforms/InstCombine/div-shift.ll +++ b/test/Transforms/InstCombine/div-shift.ll @@ -1,4 +1,5 @@ ; RUN: opt < %s -instcombine -S | FileCheck %s +; XFAIL: * define i32 @t1(i16 zeroext %x, i32 %y) nounwind { entry: