diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 904c27e89d8..01e00caa3b2 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -130,6 +130,71 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, } } +static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW, + const APInt &Mask, + APInt &KnownZero, APInt &KnownOne, + APInt &KnownZero2, APInt &KnownOne2, + const TargetData *TD, unsigned Depth) { + unsigned BitWidth = Mask.getBitWidth(); + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op1, Mask2, KnownZero, KnownOne, TD, Depth+1); + ComputeMaskedBits(Op0, Mask2, KnownZero2, KnownOne2, TD, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + bool isKnownNegative = false; + bool isKnownNonNegative = false; + // If the multiplication is known not to overflow, compute the sign bit. + if (Mask.isNegative() && NSW) { + if (Op0 == Op1) { + // The product of a number with itself is non-negative. + isKnownNonNegative = true; + } else { + bool isKnownNonNegativeOp1 = KnownZero.isNegative(); + bool isKnownNonNegativeOp0 = KnownZero2.isNegative(); + bool isKnownNegativeOp1 = KnownOne.isNegative(); + bool isKnownNegativeOp0 = KnownOne2.isNegative(); + // The product of two numbers with the same sign is non-negative. + isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) || + (isKnownNonNegativeOp1 && isKnownNonNegativeOp0); + // The product of a negative number and a non-negative number is either + // negative or zero. + if (!isKnownNonNegative) + isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 && + isKnownNonZero(Op0, TD, Depth)) || + (isKnownNegativeOp0 && isKnownNonNegativeOp1 && + isKnownNonZero(Op1, TD, Depth)); + } + } + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clearAllBits(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes(), + BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero &= Mask; + + // Only make use of no-wrap flags if we failed to compute the sign bit + // directly. This matters if the multiplication always overflows, in + // which case we prefer to follow the result of the direct computation, + // though as the program is invoking undefined behaviour we can choose + // whatever we like here. + if (isKnownNonNegative && !KnownOne.isNegative()) + KnownZero.setBit(BitWidth - 1); + else if (isKnownNegative && !KnownZero.isNegative()) + KnownOne.setBit(BitWidth - 1); +} + /// ComputeMaskedBits - Determine which of the bits specified in Mask are /// known to be either zero or one and return them in the KnownZero/KnownOne /// bit sets. This code only analyzes bits in Mask, in order to short-circuit @@ -294,68 +359,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, return; } case Instruction::Mul: { - APInt Mask2 = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1); - ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD, - Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - - bool isKnownNegative = false; - bool isKnownNonNegative = false; - // If the multiplication is known not to overflow, compute the sign bit. - if (Mask.isNegative() && - cast(I)->hasNoSignedWrap()) { - Value *Op1 = I->getOperand(1), *Op2 = I->getOperand(0); - if (Op1 == Op2) { - // The product of a number with itself is non-negative. - isKnownNonNegative = true; - } else { - bool isKnownNonNegative1 = KnownZero.isNegative(); - bool isKnownNonNegative2 = KnownZero2.isNegative(); - bool isKnownNegative1 = KnownOne.isNegative(); - bool isKnownNegative2 = KnownOne2.isNegative(); - // The product of two numbers with the same sign is non-negative. - isKnownNonNegative = (isKnownNegative1 && isKnownNegative2) || - (isKnownNonNegative1 && isKnownNonNegative2); - // The product of a negative number and a non-negative number is either - // negative or zero. - if (!isKnownNonNegative) - isKnownNegative = (isKnownNegative1 && isKnownNonNegative2 && - isKnownNonZero(Op2, TD, Depth)) || - (isKnownNegative2 && isKnownNonNegative1 && - isKnownNonZero(Op1, TD, Depth)); - } - } - - // If low bits are zero in either operand, output low known-0 bits. - // Also compute a conserative estimate for high known-0 bits. - // More trickiness is possible, but this is sufficient for the - // interesting case of alignment computation. - KnownOne.clearAllBits(); - unsigned TrailZ = KnownZero.countTrailingOnes() + - KnownZero2.countTrailingOnes(); - unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + - KnownZero2.countLeadingOnes(), - BitWidth) - BitWidth; - - TrailZ = std::min(TrailZ, BitWidth); - LeadZ = std::min(LeadZ, BitWidth); - KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | - APInt::getHighBitsSet(BitWidth, LeadZ); - KnownZero &= Mask; - - // Only make use of no-wrap flags if we failed to compute the sign bit - // directly. This matters if the multiplication always overflows, in - // which case we prefer to follow the result of the direct computation, - // though as the program is invoking undefined behaviour we can choose - // whatever we like here. - if (isKnownNonNegative && !KnownOne.isNegative()) - KnownZero.setBit(BitWidth - 1); - else if (isKnownNegative && !KnownZero.isNegative()) - KnownOne.setBit(BitWidth - 1); - - return; + bool NSW = cast(I)->hasNoSignedWrap(); + ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW, + Mask, KnownZero, KnownOne, KnownZero2, KnownOne2, + TD, Depth); + break; } case Instruction::UDiv: { // For the purposes of computing leading zeros we can conservatively @@ -777,6 +785,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero, KnownOne, KnownZero2, KnownOne2, TD, Depth); break; + case Intrinsic::umul_with_overflow: + case Intrinsic::smul_with_overflow: + ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1), + false, Mask, KnownZero, KnownOne, + KnownZero2, KnownOne2, TD, Depth); + break; } } } diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll index 53a56434aed..edb530585ce 100644 --- a/test/Transforms/InstCombine/mul.ll +++ b/test/Transforms/InstCombine/mul.ll @@ -1,116 +1,184 @@ ; This test makes sure that mul instructions are properly eliminated. -; RUN: opt < %s -instcombine -S | not grep mul +; RUN: opt < %s -instcombine -S | FileCheck %s define i32 @test1(i32 %A) { +; CHECK: @test1 %B = mul i32 %A, 1 ; [#uses=1] ret i32 %B +; CHECK: ret i32 %A } define i32 @test2(i32 %A) { +; CHECK: @test2 ; Should convert to an add instruction %B = mul i32 %A, 2 ; [#uses=1] ret i32 %B +; CHECK: shl i32 %A, 1 } define i32 @test3(i32 %A) { +; CHECK: @test3 ; This should disappear entirely %B = mul i32 %A, 0 ; [#uses=1] ret i32 %B +; CHECK: ret i32 0 } define double @test4(double %A) { +; CHECK: @test4 ; This is safe for FP %B = fmul double 1.000000e+00, %A ; [#uses=1] ret double %B +; CHECK: ret double %A } define i32 @test5(i32 %A) { +; CHECK: @test5 %B = mul i32 %A, 8 ; [#uses=1] ret i32 %B +; CHECK: shl i32 %A, 3 } define i8 @test6(i8 %A) { +; CHECK: @test6 %B = mul i8 %A, 8 ; [#uses=1] %C = mul i8 %B, 8 ; [#uses=1] ret i8 %C +; CHECK: shl i8 %A, 6 } define i32 @test7(i32 %i) { +; CHECK: @test7 %tmp = mul i32 %i, -1 ; [#uses=1] ret i32 %tmp +; CHECK: sub i32 0, %i } define i64 @test8(i64 %i) { - ; tmp = sub 0, %i +; CHECK: @test8 %j = mul i64 %i, -1 ; [#uses=1] ret i64 %j +; CHECK: sub i64 0, %i } define i32 @test9(i32 %i) { - ; %j = sub 0, %i +; CHECK: @test9 %j = mul i32 %i, -1 ; [#uses=1] ret i32 %j +; CHECJ: sub i32 0, %i } define i32 @test10(i32 %a, i32 %b) { +; CHECK: @test10 %c = icmp slt i32 %a, 0 ; [#uses=1] %d = zext i1 %c to i32 ; [#uses=1] ; e = b & (a >> 31) %e = mul i32 %d, %b ; [#uses=1] ret i32 %e +; CHECK: [[TEST10:%.*]] = ashr i32 %a, 31 +; CHECK-NEXT: %e = and i32 [[TEST10]], %b +; CHECK-NEXT: ret i32 %e } define i32 @test11(i32 %a, i32 %b) { +; CHECK: @test11 %c = icmp sle i32 %a, -1 ; [#uses=1] %d = zext i1 %c to i32 ; [#uses=1] ; e = b & (a >> 31) %e = mul i32 %d, %b ; [#uses=1] ret i32 %e +; CHECK: [[TEST11:%.*]] = ashr i32 %a, 31 +; CHECK-NEXT: %e = and i32 [[TEST11]], %b +; CHECK-NEXT: ret i32 %e } -define i32 @test12(i8 %a, i32 %b) { - %c = icmp ugt i8 %a, 127 ; [#uses=1] +define i32 @test12(i32 %a, i32 %b) { +; CHECK: @test12 + %c = icmp ugt i32 %a, 2147483647 ; [#uses=1] %d = zext i1 %c to i32 ; [#uses=1] - ; e = b & (a >> 31) %e = mul i32 %d, %b ; [#uses=1] ret i32 %e +; CHECK: [[TEST12:%.*]] = ashr i32 %a, 31 +; CHECK-NEXT: %e = and i32 [[TEST12]], %b +; CHECK-NEXT: ret i32 %e + } ; PR2642 define internal void @test13(<4 x float>*) { +; CHECK: @test13 load <4 x float>* %0, align 1 fmul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 > store <4 x float> %3, <4 x float>* %0, align 1 ret void +; CHECK-NEXT: ret void } define <16 x i8> @test14(<16 x i8> %a) { +; CHECK: @test14 %b = mul <16 x i8> %a, zeroinitializer ret <16 x i8> %b +; CHECK-NEXT: ret <16 x i8> zeroinitializer } ; rdar://7293527 define i32 @test15(i32 %A, i32 %B) { +; CHECK: @test15 entry: %shl = shl i32 1, %B %m = mul i32 %shl, %A ret i32 %m +; CHECK: shl i32 %A, %B } ; X * Y (when Y is 0 or 1) --> x & (0-Y) define i32 @test16(i32 %b, i1 %c) { +; CHECK: @test16 %d = zext i1 %c to i32 ; [#uses=1] ; e = b & (a >> 31) %e = mul i32 %d, %b ; [#uses=1] ret i32 %e +; CHECK: [[TEST16:%.*]] = sext i1 %c to i32 +; CHECK-NEXT: %e = and i32 [[TEST16]], %b +; CHECK-NEXT: ret i32 %e } ; X * Y (when Y is 0 or 1) --> x & (0-Y) define i32 @test17(i32 %a, i32 %b) { +; CHECK: @test17 %a.lobit = lshr i32 %a, 31 %e = mul i32 %a.lobit, %b ret i32 %e +; CHECK: [[TEST17:%.*]] = ashr i32 %a, 31 +; CHECK-NEXT: %e = and i32 [[TEST17]], %b +; CHECK-NEXT: ret i32 %e } +define i32 @test18(i32 %A, i32 %B) { +; CHECK: @test18 + %C = and i32 %A, 1 + %D = and i32 %B, 1 + %E = mul i32 %C, %D + %F = and i32 %E, 16 + ret i32 %F +; CHECK-NEXT: ret i32 0 +} +declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32) +declare void @use(i1) + +define i32 @test19(i32 %A, i32 %B) { +; CHECK: @test19 + %C = and i32 %A, 1 + %D = and i32 %B, 1 + +; It would be nice if we also started proving that this doesn't overflow. + %E = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %C, i32 %D) + %F = extractvalue {i32, i1} %E, 0 + %G = extractvalue {i32, i1} %E, 1 + call void @use(i1 %G) + %H = and i32 %F, 16 + ret i32 %H +; CHECK: ret i32 0 +}