diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 904c27e89d8..01e00caa3b2 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -130,6 +130,71 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW,
   }
 }
 
+static void ComputeMaskedBitsMul(Value *Op0, Value *Op1, bool NSW,
+                                 const APInt &Mask,
+                                 APInt &KnownZero, APInt &KnownOne,
+                                 APInt &KnownZero2, APInt &KnownOne2,
+                                 const TargetData *TD, unsigned Depth) {
+  unsigned BitWidth = Mask.getBitWidth();
+  APInt Mask2 = APInt::getAllOnesValue(BitWidth);
+  ComputeMaskedBits(Op1, Mask2, KnownZero, KnownOne, TD, Depth+1);
+  ComputeMaskedBits(Op0, Mask2, KnownZero2, KnownOne2, TD, Depth+1);
+  assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
+  assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
+
+  bool isKnownNegative = false;
+  bool isKnownNonNegative = false;
+  // If the multiplication is known not to overflow, compute the sign bit.
+  if (Mask.isNegative() && NSW) {
+    if (Op0 == Op1) {
+      // The product of a number with itself is non-negative.
+      isKnownNonNegative = true;
+    } else {
+      bool isKnownNonNegativeOp1 = KnownZero.isNegative();
+      bool isKnownNonNegativeOp0 = KnownZero2.isNegative();
+      bool isKnownNegativeOp1 = KnownOne.isNegative();
+      bool isKnownNegativeOp0 = KnownOne2.isNegative();
+      // The product of two numbers with the same sign is non-negative.
+      isKnownNonNegative = (isKnownNegativeOp1 && isKnownNegativeOp0) ||
+        (isKnownNonNegativeOp1 && isKnownNonNegativeOp0);
+      // The product of a negative number and a non-negative number is either
+      // negative or zero.
+      if (!isKnownNonNegative)
+        isKnownNegative = (isKnownNegativeOp1 && isKnownNonNegativeOp0 &&
+                           isKnownNonZero(Op0, TD, Depth)) ||
+                          (isKnownNegativeOp0 && isKnownNonNegativeOp1 &&
+                           isKnownNonZero(Op1, TD, Depth));
+    }
+  }
+
+  // If low bits are zero in either operand, output low known-0 bits.
+  // Also compute a conserative estimate for high known-0 bits.
+  // More trickiness is possible, but this is sufficient for the
+  // interesting case of alignment computation.
+  KnownOne.clearAllBits();
+  unsigned TrailZ = KnownZero.countTrailingOnes() +
+                    KnownZero2.countTrailingOnes();
+  unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
+                             KnownZero2.countLeadingOnes(),
+                             BitWidth) - BitWidth;
+
+  TrailZ = std::min(TrailZ, BitWidth);
+  LeadZ = std::min(LeadZ, BitWidth);
+  KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
+              APInt::getHighBitsSet(BitWidth, LeadZ);
+  KnownZero &= Mask;
+
+  // Only make use of no-wrap flags if we failed to compute the sign bit
+  // directly.  This matters if the multiplication always overflows, in
+  // which case we prefer to follow the result of the direct computation,
+  // though as the program is invoking undefined behaviour we can choose
+  // whatever we like here.
+  if (isKnownNonNegative && !KnownOne.isNegative())
+    KnownZero.setBit(BitWidth - 1);
+  else if (isKnownNegative && !KnownZero.isNegative())
+    KnownOne.setBit(BitWidth - 1);
+}
+
 /// ComputeMaskedBits - Determine which of the bits specified in Mask are
 /// known to be either zero or one and return them in the KnownZero/KnownOne
 /// bit sets.  This code only analyzes bits in Mask, in order to short-circuit
@@ -294,68 +359,11 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
     return;
   }
   case Instruction::Mul: {
-    APInt Mask2 = APInt::getAllOnesValue(BitWidth);
-    ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero, KnownOne, TD,Depth+1);
-    ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, TD,
-                      Depth+1);
-    assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
-    assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?");
-
-    bool isKnownNegative = false;
-    bool isKnownNonNegative = false;
-    // If the multiplication is known not to overflow, compute the sign bit.
-    if (Mask.isNegative() &&
-        cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap()) {
-      Value *Op1 = I->getOperand(1), *Op2 = I->getOperand(0);
-      if (Op1 == Op2) {
-        // The product of a number with itself is non-negative.
-        isKnownNonNegative = true;
-      } else {
-        bool isKnownNonNegative1 = KnownZero.isNegative();
-        bool isKnownNonNegative2 = KnownZero2.isNegative();
-        bool isKnownNegative1 = KnownOne.isNegative();
-        bool isKnownNegative2 = KnownOne2.isNegative();
-        // The product of two numbers with the same sign is non-negative.
-        isKnownNonNegative = (isKnownNegative1 && isKnownNegative2) ||
-          (isKnownNonNegative1 && isKnownNonNegative2);
-        // The product of a negative number and a non-negative number is either
-        // negative or zero.
-        if (!isKnownNonNegative)
-          isKnownNegative = (isKnownNegative1 && isKnownNonNegative2 &&
-                             isKnownNonZero(Op2, TD, Depth)) ||
-                            (isKnownNegative2 && isKnownNonNegative1 &&
-                             isKnownNonZero(Op1, TD, Depth));
-      }
-    }
-
-    // If low bits are zero in either operand, output low known-0 bits.
-    // Also compute a conserative estimate for high known-0 bits.
-    // More trickiness is possible, but this is sufficient for the
-    // interesting case of alignment computation.
-    KnownOne.clearAllBits();
-    unsigned TrailZ = KnownZero.countTrailingOnes() +
-                      KnownZero2.countTrailingOnes();
-    unsigned LeadZ =  std::max(KnownZero.countLeadingOnes() +
-                               KnownZero2.countLeadingOnes(),
-                               BitWidth) - BitWidth;
-
-    TrailZ = std::min(TrailZ, BitWidth);
-    LeadZ = std::min(LeadZ, BitWidth);
-    KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) |
-                APInt::getHighBitsSet(BitWidth, LeadZ);
-    KnownZero &= Mask;
-
-    // Only make use of no-wrap flags if we failed to compute the sign bit
-    // directly.  This matters if the multiplication always overflows, in
-    // which case we prefer to follow the result of the direct computation,
-    // though as the program is invoking undefined behaviour we can choose
-    // whatever we like here.
-    if (isKnownNonNegative && !KnownOne.isNegative())
-      KnownZero.setBit(BitWidth - 1);
-    else if (isKnownNegative && !KnownZero.isNegative())
-      KnownOne.setBit(BitWidth - 1);
-
-    return;
+    bool NSW = cast<OverflowingBinaryOperator>(I)->hasNoSignedWrap();
+    ComputeMaskedBitsMul(I->getOperand(0), I->getOperand(1), NSW,
+                         Mask, KnownZero, KnownOne, KnownZero2, KnownOne2,
+                         TD, Depth);
+    break;
   }
   case Instruction::UDiv: {
     // For the purposes of computing leading zeros we can conservatively
@@ -777,6 +785,12 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
                                   KnownZero, KnownOne, KnownZero2, KnownOne2,
                                   TD, Depth);
           break;
+        case Intrinsic::umul_with_overflow:
+        case Intrinsic::smul_with_overflow:
+          ComputeMaskedBitsMul(II->getArgOperand(0), II->getArgOperand(1),
+                               false, Mask, KnownZero, KnownOne,
+                               KnownZero2, KnownOne2, TD, Depth);
+          break;
         }
       }
     }
diff --git a/test/Transforms/InstCombine/mul.ll b/test/Transforms/InstCombine/mul.ll
index 53a56434aed..edb530585ce 100644
--- a/test/Transforms/InstCombine/mul.ll
+++ b/test/Transforms/InstCombine/mul.ll
@@ -1,116 +1,184 @@
 ; This test makes sure that mul instructions are properly eliminated.
-; RUN: opt < %s -instcombine -S | not grep mul
+; RUN: opt < %s -instcombine -S | FileCheck %s
 
 define i32 @test1(i32 %A) {
+; CHECK: @test1
         %B = mul i32 %A, 1              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: ret i32 %A
 }
 
 define i32 @test2(i32 %A) {
+; CHECK: @test2
         ; Should convert to an add instruction
         %B = mul i32 %A, 2              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: shl i32 %A, 1
 }
 
 define i32 @test3(i32 %A) {
+; CHECK: @test3
         ; This should disappear entirely
         %B = mul i32 %A, 0              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: ret i32 0
 }
 
 define double @test4(double %A) {
+; CHECK: @test4
         ; This is safe for FP
         %B = fmul double 1.000000e+00, %A                ; <double> [#uses=1]
         ret double %B
+; CHECK: ret double %A
 }
 
 define i32 @test5(i32 %A) {
+; CHECK: @test5
         %B = mul i32 %A, 8              ; <i32> [#uses=1]
         ret i32 %B
+; CHECK: shl i32 %A, 3
 }
 
 define i8 @test6(i8 %A) {
+; CHECK: @test6
         %B = mul i8 %A, 8               ; <i8> [#uses=1]
         %C = mul i8 %B, 8               ; <i8> [#uses=1]
         ret i8 %C
+; CHECK: shl i8 %A, 6
 }
 
 define i32 @test7(i32 %i) {
+; CHECK: @test7
         %tmp = mul i32 %i, -1           ; <i32> [#uses=1]
         ret i32 %tmp
+; CHECK: sub i32 0, %i
 }
 
 define i64 @test8(i64 %i) {
-       ; tmp = sub 0, %i
+; CHECK: @test8
         %j = mul i64 %i, -1             ; <i64> [#uses=1]
         ret i64 %j
+; CHECK: sub i64 0, %i
 }
 
 define i32 @test9(i32 %i) {
-        ; %j = sub 0, %i
+; CHECK: @test9
         %j = mul i32 %i, -1             ; <i32> [#uses=1]
         ret i32 %j
+; CHECJ: sub i32 0, %i
 }
 
 define i32 @test10(i32 %a, i32 %b) {
+; CHECK: @test10
         %c = icmp slt i32 %a, 0         ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
        ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST10:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST10]], %b
+; CHECK-NEXT: ret i32 %e
 }
 
 define i32 @test11(i32 %a, i32 %b) {
+; CHECK: @test11
         %c = icmp sle i32 %a, -1                ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST11:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST11]], %b
+; CHECK-NEXT: ret i32 %e
 }
 
-define i32 @test12(i8 %a, i32 %b) {
-        %c = icmp ugt i8 %a, 127                ; <i1> [#uses=1]
+define i32 @test12(i32 %a, i32 %b) {
+; CHECK: @test12
+        %c = icmp ugt i32 %a, 2147483647                ; <i1> [#uses=1]
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
-        ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST12:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST12]], %b
+; CHECK-NEXT: ret i32 %e
+
 }
 
 ; PR2642
 define internal void @test13(<4 x float>*) {
+; CHECK: @test13
 	load <4 x float>* %0, align 1
 	fmul <4 x float> %2, < float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 >
 	store <4 x float> %3, <4 x float>* %0, align 1
 	ret void
+; CHECK-NEXT: ret void
 }
 
 define <16 x i8> @test14(<16 x i8> %a) {
+; CHECK: @test14
         %b = mul <16 x i8> %a, zeroinitializer
         ret <16 x i8> %b
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
 }
 
 ; rdar://7293527
 define i32 @test15(i32 %A, i32 %B) {
+; CHECK: @test15
 entry:
   %shl = shl i32 1, %B
   %m = mul i32 %shl, %A
   ret i32 %m
+; CHECK: shl i32 %A, %B
 }
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
 define i32 @test16(i32 %b, i1 %c) {
+; CHECK: @test16
         %d = zext i1 %c to i32          ; <i32> [#uses=1]
         ; e = b & (a >> 31)
         %e = mul i32 %d, %b             ; <i32> [#uses=1]
         ret i32 %e
+; CHECK: [[TEST16:%.*]] = sext i1 %c to i32
+; CHECK-NEXT: %e = and i32 [[TEST16]], %b
+; CHECK-NEXT: ret i32 %e
 }
 
 ; X * Y (when Y is 0 or 1) --> x & (0-Y)
 define i32 @test17(i32 %a, i32 %b) {
+; CHECK: @test17
   %a.lobit = lshr i32 %a, 31
   %e = mul i32 %a.lobit, %b
   ret i32 %e
+; CHECK: [[TEST17:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: %e = and i32 [[TEST17]], %b
+; CHECK-NEXT: ret i32 %e
 }
 
+define i32 @test18(i32 %A, i32 %B) {
+; CHECK: @test18
+  %C = and i32 %A, 1
+  %D = and i32 %B, 1
 
+  %E = mul i32 %C, %D
+  %F = and i32 %E, 16
+  ret i32 %F
+; CHECK-NEXT: ret i32 0
+}
 
+declare {i32, i1} @llvm.smul.with.overflow.i32(i32, i32)
+declare void @use(i1)
+
+define i32 @test19(i32 %A, i32 %B) {
+; CHECK: @test19
+  %C = and i32 %A, 1
+  %D = and i32 %B, 1
+
+; It would be nice if we also started proving that this doesn't overflow.
+  %E = call {i32, i1} @llvm.smul.with.overflow.i32(i32 %C, i32 %D)
+  %F = extractvalue {i32, i1} %E, 0
+  %G = extractvalue {i32, i1} %E, 1
+  call void @use(i1 %G)
+  %H = and i32 %F, 16
+  ret i32 %H
+; CHECK: ret i32 0
+}