diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 90d64832d71..bfc35068994 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1229,6 +1229,50 @@ void SelectionDAG::ComputeMaskedBits(SDOperand Op, const APInt &Mask, KnownZero = KnownZeroOut; return; } + case ISD::MUL: { + APInt Mask2 = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + + // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. + // More trickiness is possible, but this is sufficient for the + // interesting case of alignment computation. + KnownOne.clear(); + unsigned TrailZ = KnownZero.countTrailingOnes() + + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes() + + 1, BitWidth) - BitWidth; + + TrailZ = std::min(TrailZ, BitWidth); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); + KnownZero &= Mask; + return; + } + case ISD::UDIV: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be greater than the denominator. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(0), + AllOnes, KnownZero2, KnownOne2, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clear(); + KnownZero2.clear(); + ComputeMaskedBits(Op.getOperand(1), + AllOnes, KnownZero2, KnownOne2, Depth+1); + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - KnownOne2.countLeadingZeros()); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + return; + } case ISD::SELECT: ComputeMaskedBits(Op.getOperand(2), Mask, KnownZero, KnownOne, Depth+1); ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); @@ -1469,48 +1513,95 @@ void SelectionDAG::ComputeMaskedBits(SDOperand Op, const APInt &Mask, KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - 1); return; + case ISD::SUB: { + if (ConstantSDNode *CLHS = dyn_cast(Op.getOperand(0))) { + // We know that the top bits of C-X are clear if X contains less bits + // than C (i.e. no wrap-around can happen). For example, 20-X is + // positive if we can prove that X is >= 0 and < 16. + if (CLHS->getAPIntValue().isNonNegative()) { + unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); + // NLZ can't be BitWidth with no sign bit + APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); + ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero2, KnownOne2, + Depth+1); + + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { + unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); + // Top bits known zero. + KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; + } + } + } + } + // fall through case ISD::ADD: { - // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(Op.getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(Op.getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - unsigned KnownZeroOut = std::min(KnownZero.countTrailingOnes(), - KnownZero2.countTrailingOnes()); - - KnownZero = APInt::getLowBitsSet(BitWidth, KnownZeroOut); - KnownOne = APInt(BitWidth, 0); + APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes()); + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); + + ComputeMaskedBits(Op.getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + KnownZeroOut = std::min(KnownZeroOut, + KnownZero2.countTrailingOnes()); + + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); return; } - case ISD::SUB: { - ConstantSDNode *CLHS = dyn_cast(Op.getOperand(0)); - if (!CLHS) return; + case ISD::SREM: + if (ConstantSDNode *Rem = dyn_cast(Op.getOperand(1))) { + APInt RA = Rem->getAPIntValue(); + if (RA.isPowerOf2() || (-RA).isPowerOf2()) { + APInt LowBits = RA.isStrictlyPositive() ? ((RA - 1) | RA) : ~RA; + APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); + ComputeMaskedBits(Op.getOperand(0), Mask2,KnownZero2,KnownOne2,Depth+1); - // We know that the top bits of C-X are clear if X contains less bits - // than C (i.e. no wrap-around can happen). For example, 20-X is - // positive if we can prove that X is >= 0 and < 16. - if (CLHS->getAPIntValue().isNonNegative()) { - unsigned NLZ = (CLHS->getAPIntValue()+1).countLeadingZeros(); - // NLZ can't be BitWidth with no sign bit - APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(Op.getOperand(1), MaskV, KnownZero, KnownOne, Depth+1); + // The sign of a remainder is equal to the sign of the first + // operand (zero being positive). + if (KnownZero2[BitWidth-1] || ((KnownZero2 & LowBits) == LowBits)) + KnownZero2 |= ~LowBits; + else if (KnownOne2[BitWidth-1]) + KnownOne2 |= ~LowBits; - // If all of the MaskV bits are known to be zero, then we know the output - // top bits are zero, because we now know that the output is from [0-C]. - if ((KnownZero & MaskV) == MaskV) { - unsigned NLZ2 = CLHS->getAPIntValue().countLeadingZeros(); - // Top bits known zero. - KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; - KnownOne = APInt(BitWidth, 0); // No one bits known. - } else { - KnownZero = KnownOne = APInt(BitWidth, 0); // Otherwise, nothing known. + KnownZero |= KnownZero2 & Mask; + KnownOne |= KnownOne2 & Mask; + + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); } } return; + case ISD::UREM: { + if (ConstantSDNode *Rem = dyn_cast(Op.getOperand(1))) { + APInt RA = Rem->getAPIntValue(); + if (RA.isStrictlyPositive() && RA.isPowerOf2()) { + APInt LowBits = (RA - 1) | RA; + APInt Mask2 = LowBits & Mask; + KnownZero |= ~LowBits & Mask; + ComputeMaskedBits(Op.getOperand(0), Mask2, KnownZero, KnownOne,Depth+1); + assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + break; + } + } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(Op.getOperand(0), AllOnes, KnownZero, KnownOne, + Depth+1); + ComputeMaskedBits(Op.getOperand(1), AllOnes, KnownZero2, KnownOne2, + Depth+1); + + uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clear(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; + return; } default: // Allow the target to implement this method for its nodes. diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp index 6bf06e77ebf..655ab102599 100644 --- a/lib/Transforms/Scalar/InstructionCombining.cpp +++ b/lib/Transforms/Scalar/InstructionCombining.cpp @@ -700,15 +700,15 @@ void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask, return; } + KnownZero.clear(); KnownOne.clear(); // Start out not knowing anything. + if (Depth == 6 || Mask == 0) return; // Limit search depth. User *I = dyn_cast(V); if (!I) return; - KnownZero.clear(); KnownOne.clear(); // Don't know anything. APInt KnownZero2(KnownZero), KnownOne2(KnownOne); - switch (getOpcode(I)) { default: break; case Instruction::And: { @@ -759,16 +759,42 @@ void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask, assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // If low bits are zero in either operand, output low known-0 bits. + // Also compute a conserative estimate for high known-0 bits. // More trickiness is possible, but this is sufficient for the // interesting case of alignment computation. KnownOne.clear(); unsigned TrailZ = KnownZero.countTrailingOnes() + KnownZero2.countTrailingOnes(); + unsigned LeadZ = std::max(KnownZero.countLeadingOnes() + + KnownZero2.countLeadingOnes() + + 1, BitWidth) - BitWidth; + TrailZ = std::min(TrailZ, BitWidth); - KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ); + LeadZ = std::min(LeadZ, BitWidth); + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ) | + APInt::getHighBitsSet(BitWidth, LeadZ); KnownZero &= Mask; return; } + case Instruction::UDiv: { + // For the purposes of computing leading zeros we can conservatively + // treat a udiv as a logical right shift by the power of 2 known to + // be greater than the denominator. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(0), + AllOnes, KnownZero2, KnownOne2, Depth+1); + unsigned LeadZ = KnownZero2.countLeadingOnes(); + + KnownOne2.clear(); + KnownZero2.clear(); + ComputeMaskedBits(I->getOperand(1), + AllOnes, KnownZero2, KnownOne2, Depth+1); + LeadZ = std::min(BitWidth, + LeadZ + BitWidth - KnownOne2.countLeadingZeros()); + + KnownZero = APInt::getHighBitsSet(BitWidth, LeadZ) & Mask; + return; + } case Instruction::Select: ComputeMaskedBits(I->getOperand(2), Mask, KnownZero, KnownOne, Depth+1); ComputeMaskedBits(I->getOperand(1), Mask, KnownZero2, KnownOne2, Depth+1); @@ -900,38 +926,36 @@ void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask, unsigned NLZ = (CLHS->getValue()+1).countLeadingZeros(); // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); - ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero, KnownOne, Depth+1); + ComputeMaskedBits(I->getOperand(1), MaskV, KnownZero2, KnownOne2, + Depth+1); - // If all of the MaskV bits are known to be zero, then we know the output - // top bits are zero, because we now know that the output is from [0-C]. - if ((KnownZero & MaskV) == MaskV) { + // If all of the MaskV bits are known to be zero, then we know the + // output top bits are zero, because we now know that the output is + // from [0-C]. + if ((KnownZero2 & MaskV) == MaskV) { unsigned NLZ2 = CLHS->getValue().countLeadingZeros(); // Top bits known zero. KnownZero = APInt::getHighBitsSet(BitWidth, NLZ2) & Mask; - KnownOne = APInt(BitWidth, 0); // No one bits known. - } else { - KnownZero = KnownOne = APInt(BitWidth, 0); // Otherwise, nothing known. } - return; } } } // fall through case Instruction::Add: { - // If either the LHS or the RHS are Zero, the result is zero. - ComputeMaskedBits(I->getOperand(1), Mask, KnownZero, KnownOne, Depth+1); - ComputeMaskedBits(I->getOperand(0), Mask, KnownZero2, KnownOne2, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - // Output known-0 bits are known if clear or set in both the low clear bits // common to both LHS & RHS. For example, 8+(X<<3) is known to have the // low 3 bits clear. - unsigned KnownZeroOut = std::min(KnownZero.countTrailingOnes(), - KnownZero2.countTrailingOnes()); - - KnownZero = APInt::getLowBitsSet(BitWidth, KnownZeroOut); - KnownOne = APInt(BitWidth, 0); + APInt Mask2 = APInt::getLowBitsSet(BitWidth, Mask.countTrailingOnes()); + ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + unsigned KnownZeroOut = KnownZero2.countTrailingOnes(); + + ComputeMaskedBits(I->getOperand(1), Mask2, KnownZero2, KnownOne2, Depth+1); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + KnownZeroOut = std::min(KnownZeroOut, + KnownZero2.countTrailingOnes()); + + KnownZero |= APInt::getLowBitsSet(BitWidth, KnownZeroOut); return; } case Instruction::SRem: @@ -956,7 +980,7 @@ void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask, } } break; - case Instruction::URem: + case Instruction::URem: { if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { APInt RA = Rem->getValue(); if (RA.isStrictlyPositive() && RA.isPowerOf2()) { @@ -965,19 +989,24 @@ void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask, KnownZero |= ~LowBits & Mask; ComputeMaskedBits(I->getOperand(0), Mask2, KnownZero, KnownOne,Depth+1); assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + break; } - } else { - // Since the result is less than or equal to RHS, any leading zero bits - // in RHS must also exist in the result. - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2, - Depth+1); - - uint32_t Leaders = KnownZero2.countLeadingOnes(); - KnownZero |= APInt::getHighBitsSet(BitWidth, Leaders) & Mask; - assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); } + + // Since the result is less than or equal to either operand, any leading + // zero bits in either operand must also exist in the result. + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(0), AllOnes, KnownZero, KnownOne, + Depth+1); + ComputeMaskedBits(I->getOperand(1), AllOnes, KnownZero2, KnownOne2, + Depth+1); + + uint32_t Leaders = std::max(KnownZero.countLeadingOnes(), + KnownZero2.countLeadingOnes()); + KnownOne.clear(); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & Mask; break; + } case Instruction::Alloca: case Instruction::Malloc: { @@ -1088,6 +1117,20 @@ void InstCombiner::ComputeMaskedBits(Value *V, const APInt &Mask, } break; } + case Instruction::Call: + if (IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + default: break; + case Intrinsic::ctpop: + case Intrinsic::ctlz: + case Intrinsic::cttz: { + unsigned LowBits = Log2_32(BitWidth)+1; + KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); + break; + } + } + } + break; } } @@ -1232,7 +1275,9 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask, APInt LHSKnownZero(BitWidth, 0), LHSKnownOne(BitWidth, 0); APInt &RHSKnownZero = KnownZero, &RHSKnownOne = KnownOne; switch (I->getOpcode()) { - default: break; + default: + ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); + break; case Instruction::And: // If either the LHS or the RHS are Zero, the result is zero. if (SimplifyDemandedBits(I->getOperand(1), DemandedMask, @@ -1578,6 +1623,9 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask, LHSKnownZero, LHSKnownOne, Depth+1)) return true; } + // Otherwise just hand the sub off to ComputeMaskedBits to fill in + // the known zeros and ones. + ComputeMaskedBits(V, DemandedMask, RHSKnownZero, RHSKnownOne, Depth); break; case Instruction::Shl: if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { @@ -1695,10 +1743,10 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask, } } break; - case Instruction::URem: + case Instruction::URem: { if (ConstantInt *Rem = dyn_cast(I->getOperand(1))) { APInt RA = Rem->getValue(); - if (RA.isPowerOf2()) { + if (RA.isStrictlyPositive() && RA.isPowerOf2()) { APInt LowBits = (RA - 1) | RA; APInt Mask2 = LowBits & DemandedMask; KnownZero |= ~LowBits & DemandedMask; @@ -1707,19 +1755,26 @@ bool InstCombiner::SimplifyDemandedBits(Value *V, APInt DemandedMask, return true; assert((KnownZero & KnownOne) == 0&&"Bits known to be one AND zero?"); + break; } - } else { - APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); - APInt AllOnes = APInt::getAllOnesValue(BitWidth); - if (SimplifyDemandedBits(I->getOperand(1), AllOnes, - KnownZero2, KnownOne2, Depth+1)) - return true; - - uint32_t Leaders = KnownZero2.countLeadingOnes(); - KnownZero |= APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; } + + APInt KnownZero2(BitWidth, 0), KnownOne2(BitWidth, 0); + APInt AllOnes = APInt::getAllOnesValue(BitWidth); + ComputeMaskedBits(I->getOperand(0), AllOnes, + KnownZero2, KnownOne2, Depth+1); + uint32_t Leaders = KnownZero2.countLeadingOnes(); + APInt HighZeros = APInt::getHighBitsSet(BitWidth, Leaders); + if (SimplifyDemandedBits(I->getOperand(1), ~HighZeros, + KnownZero2, KnownOne2, Depth+1)) + return true; + + Leaders = std::max(Leaders, + KnownZero2.countLeadingOnes()); + KnownZero = APInt::getHighBitsSet(BitWidth, Leaders) & DemandedMask; break; } + } // If the client is only demanding bits that we know, return the known // constant. diff --git a/test/Transforms/InstCombine/sext-misc.ll b/test/Transforms/InstCombine/sext-misc.ll new file mode 100644 index 00000000000..da69e85ff5f --- /dev/null +++ b/test/Transforms/InstCombine/sext-misc.ll @@ -0,0 +1,45 @@ +; RUN: llvm-as < %s | opt -instcombine | llvm-dis | not grep sext +; RUN: llvm-as < %s | llc -march=x86-64 | not grep movslq +; RUN: llvm-as < %s | llc -march=x86 | not grep sar + +declare i32 @llvm.ctpop.i32(i32) +declare i32 @llvm.ctlz.i32(i32) +declare i32 @llvm.cttz.i32(i32) + +define i64 @foo(i32 %x) { + %t = call i32 @llvm.ctpop.i32(i32 %x) + %s = sext i32 %t to i64 + ret i64 %s +} +define i64 @boo(i32 %x) { + %t = call i32 @llvm.ctlz.i32(i32 %x) + %s = sext i32 %t to i64 + ret i64 %s +} +define i64 @zoo(i32 %x) { + %t = call i32 @llvm.cttz.i32(i32 %x) + %s = sext i32 %t to i64 + ret i64 %s +} +define i64 @coo(i32 %x) { + %t = udiv i32 %x, 3 + %s = sext i32 %t to i64 + ret i64 %s +} +define i64 @moo(i32 %x) { + %t = urem i32 %x, 30000 + %s = sext i32 %t to i64 + ret i64 %s +} +define i64 @yoo(i32 %x) { + %u = lshr i32 %x, 3 + %t = mul i32 %u, 3 + %s = sext i32 %t to i64 + ret i64 %s +} +define i64 @voo(i32 %x) { + %t = and i32 %x, 511 + %u = sub i32 20000, %t + %s = sext i32 %u to i64 + ret i64 %s +}