diff --git a/include/llvm/CodeGen/SelectionDAGISel.h b/include/llvm/CodeGen/SelectionDAGISel.h index bf557268474..b33b21da42a 100644 --- a/include/llvm/CodeGen/SelectionDAGISel.h +++ b/include/llvm/CodeGen/SelectionDAGISel.h @@ -131,6 +131,7 @@ private: void CodeGenAndEmitDAG(); void LowerArguments(BasicBlock *BB); + void ShrinkDemandedOps(); void ComputeLiveOutVRegInfo(); void HandlePHINodesInSuccessorBlocks(BasicBlock *LLVMBB); diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index dd28a87938f..15da8456f17 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -774,10 +774,12 @@ public: /// that want to combine struct TargetLoweringOpt { SelectionDAG &DAG; + bool ShrinkOps; SDValue Old; SDValue New; - explicit TargetLoweringOpt(SelectionDAG &InDAG) : DAG(InDAG) {} + explicit TargetLoweringOpt(SelectionDAG &InDAG, bool Shrink = false) : + DAG(InDAG), ShrinkOps(Shrink) {} bool CombineTo(SDValue O, SDValue N) { Old = O; @@ -1478,7 +1480,7 @@ public: } /// isZExtFree - Return true if any actual instruction that defines a - /// value of type Ty1 implicit zero-extends the value to Ty2 in the result + /// value of type Ty1 implicitly zero-extends the value to Ty2 in the result /// register. This does not necessarily include registers defined in /// unknown ways, such as incoming arguments, or copies from unknown /// virtual registers. Also, if isTruncateFree(Ty2, Ty1) is true, this diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 759fa0e1125..5ab92805f1a 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1688,18 +1688,18 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) - // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) + // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) // // do not sink logical op inside of a vector extend, since it may combine // into a vsetcc. - if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND|| + EVT Op0VT = N0.getOperand(0).getValueType(); + if ((N0.getOpcode() == ISD::ZERO_EXTEND || + N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || - (N0.getOpcode() == ISD::TRUNCATE && - !TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) && + (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && !VT.isVector() && - N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && - (!LegalOperations || - TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) { + Op0VT == N1.getOperand(0).getValueType() && + (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); @@ -1839,6 +1839,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) return SDValue(N, 0); + // fold (zext_inreg (extload x)) -> (zextload x) if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { LoadSDNode *LN0 = cast(N0); @@ -1885,48 +1886,89 @@ SDValue DAGCombiner::visitAND(SDNode *N) { // fold (and (load x), 255) -> (zextload x, i8) // fold (and (extload x, i16), 255) -> (zextload x, i8) - if (N1C && N0.getOpcode() == ISD::LOAD) { - LoadSDNode *LN0 = cast(N0); + // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) + if (N1C && (N0.getOpcode() == ISD::LOAD || + (N0.getOpcode() == ISD::ANY_EXTEND && + N0.getOperand(0).getOpcode() == ISD::LOAD))) { + bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; + LoadSDNode *LN0 = HasAnyExt + ? cast(N0.getOperand(0)) + : cast(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && - LN0->isUnindexed() && N0.hasOneUse() && - // Do not change the width of a volatile load. - !LN0->isVolatile()) { - EVT ExtVT = MVT::Other; + LN0->isUnindexed() && N0.hasOneUse()) { uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); - if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())) - ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ + EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); + EVT LoadedVT = LN0->getMemoryVT(); - EVT LoadedVT = LN0->getMemoryVT(); + if (ExtVT == LoadedVT && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + if (HasAnyExt) { + SDValue Load = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), + LN0->getValueType(0), + LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), LN0->getSrcValueOffset(), + ExtVT, LN0->isVolatile(), LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.getOperand(0).getNode(), Load, Load.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } else { + SDValue Load = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), + LN0->getSrcValue(), LN0->getSrcValueOffset(), + ExtVT, LN0->isVolatile(), LN0->getAlignment()); + AddToWorkList(N); + CombineTo(N0.getNode(), Load, Load.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } else if (!LN0->isVolatile()) { + // Do not change the width of a volatile load. + // Do not generate loads of non-round integer types since these can + // be expensive (and would be wrong if the type is not byte sized). + if (LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && + (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { + EVT PtrType = LN0->getOperand(1).getValueType(); - // Do not generate loads of non-round integer types since these can - // be expensive (and would be wrong if the type is not byte sized). - if (ExtVT != MVT::Other && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && - (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { - EVT PtrType = N0.getOperand(1).getValueType(); + // For big endian targets, we need to add an offset to the pointer + // to load the correct bytes. For little endian systems, we merely + // need to read fewer bytes from the same pointer. + unsigned LVTStoreBytes = LoadedVT.getStoreSize(); + unsigned EVTStoreBytes = ExtVT.getStoreSize(); + unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; + unsigned Alignment = LN0->getAlignment(); + SDValue NewPtr = LN0->getBasePtr(); - // For big endian targets, we need to add an offset to the pointer to - // load the correct bytes. For little endian systems, we merely need to - // read fewer bytes from the same pointer. - unsigned LVTStoreBytes = LoadedVT.getStoreSize(); - unsigned EVTStoreBytes = ExtVT.getStoreSize(); - unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - unsigned Alignment = LN0->getAlignment(); - SDValue NewPtr = LN0->getBasePtr(); + if (TLI.isBigEndian()) { + NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, + NewPtr, DAG.getConstant(PtrOff, PtrType)); + Alignment = MinAlign(Alignment, PtrOff); + } - if (TLI.isBigEndian()) { - NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, - NewPtr, DAG.getConstant(PtrOff, PtrType)); - Alignment = MinAlign(Alignment, PtrOff); + AddToWorkList(NewPtr.getNode()); + if (HasAnyExt) { + SDValue Load = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), + LN0->getValueType(0), + LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), + ExtVT, LN0->isVolatile(), Alignment); + AddToWorkList(N); + CombineTo(N0.getOperand(0).getNode(), Load, Load.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } else { + SDValue Load = + DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, + LN0->getChain(), NewPtr, + LN0->getSrcValue(), LN0->getSrcValueOffset(), + ExtVT, LN0->isVolatile(), Alignment); + AddToWorkList(N); + CombineTo(N0.getNode(), Load, Load.getValue(1)); + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } + } } - - AddToWorkList(NewPtr.getNode()); - SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, LN0->getChain(), - NewPtr, LN0->getSrcValue(), LN0->getSrcValueOffset(), - ExtVT, LN0->isVolatile(), Alignment); - AddToWorkList(N); - CombineTo(N0.getNode(), Load, Load.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! } } } @@ -2778,9 +2820,17 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // However when after the source operand of SRL is optimized into AND, the SRL // itself may not be optimized further. Look for it and add the BRCOND into // the worklist. - if (N->hasOneUse() && - N->use_begin()->getOpcode() == ISD::BRCOND) - AddToWorkList(*N->use_begin()); + if (N->hasOneUse()) { + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorkList(Use); + else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { + // Also look pass the truncate. + Use = *Use->use_begin(); + if (Use->getOpcode() == ISD::BRCOND) + AddToWorkList(Use); + } + } return SDValue(); } @@ -3198,7 +3248,10 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext (truncate x)) -> (and x, mask) if (N0.getOpcode() == ISD::TRUNCATE && - (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) && + (!TLI.isTruncateFree(N0.getOperand(0).getValueType(), + N0.getValueType()) || + !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); @@ -3704,7 +3757,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); else // if the source and dest are the same type, we can drop both the extend - // and the truncate + // and the truncate. return N0.getOperand(0); } @@ -4515,6 +4568,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { N1.getOperand(0), N1.getOperand(1), N2); } + SDNode *Trunc = 0; + if (N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) { + // Look pass truncate. + Trunc = N1.getNode(); + N1 = N1.getOperand(0); + } + if (N1.hasOneUse() && N1.getOpcode() == ISD::SRL) { // Match this pattern so that we can generate simpler code: // @@ -4526,7 +4586,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // into // // %a = ... - // %b = and %a, 2 + // %b = and i32 %a, 2 // %c = setcc eq %b, 0 // brcond %c ... // @@ -4537,7 +4597,6 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { SDValue Op1 = N1.getOperand(1); if (Op0.getOpcode() == ISD::AND && - Op0.hasOneUse() && Op1.getOpcode() == ISD::Constant) { SDValue AndOp1 = Op0.getOperand(1); @@ -4552,12 +4611,21 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { Op0, DAG.getConstant(0, Op0.getValueType()), ISD::SETNE); + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, SetCC, N2); + // Don't add the new BRCond into the worklist or else SimplifySelectCC + // will convert it back to (X & C1) >> C2. + CombineTo(N, NewBRCond, false); + // Truncate is dead. + if (Trunc) { + removeFromWorkList(Trunc); + DAG.DeleteNode(Trunc); + } // Replace the uses of SRL with SETCC DAG.ReplaceAllUsesOfValueWith(N1, SetCC); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), - MVT::Other, Chain, SetCC, N2); + return SDValue(N, 0); // Return N so it doesn't get rechecked! } } } diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f1c8650728c..ca8c17beffa 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2656,6 +2656,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, // size of the value, the shift/rotate count is guaranteed to be zero. if (VT == MVT::i1) return N1; + if (N2C && N2C->isNullValue()) + return N1; break; case ISD::FP_ROUND_INREG: { EVT EVT = cast(N2)->getVT(); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3073dfe9cc6..8ed24cce744 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -438,6 +438,75 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB, SDB->clear(); } +void SelectionDAGISel::ShrinkDemandedOps() { + SmallVector Worklist; + + // Add all the dag nodes to the worklist. + Worklist.reserve(CurDAG->allnodes_size()); + for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), + E = CurDAG->allnodes_end(); I != E; ++I) + Worklist.push_back(I); + + APInt Mask; + APInt KnownZero; + APInt KnownOne; + + TargetLowering::TargetLoweringOpt TLO(*CurDAG, true); + while (!Worklist.empty()) { + SDNode *N = Worklist.back(); + Worklist.pop_back(); + + if (N->use_empty() && N != CurDAG->getRoot().getNode()) { + CurDAG->DeleteNode(N); + continue; + } + + // Run ShrinkDemandedOp on scalar binary operations. + if (N->getNumValues() == 1 && + N->getValueType(0).isSimple() && N->getValueType(0).isInteger()) { + DebugLoc dl = N->getDebugLoc(); + unsigned BitWidth = N->getValueType(0).getScalarType().getSizeInBits(); + APInt Demanded = APInt::getAllOnesValue(BitWidth); + APInt KnownZero, KnownOne; + if (TLI.SimplifyDemandedBits(SDValue(N, 0), Demanded, + KnownZero, KnownOne, TLO)) { + // Revisit the node. + Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), N), + Worklist.end()); + Worklist.push_back(N); + + // Replace the old value with the new one. + DEBUG(errs() << "\nReplacing "; + TLO.Old.getNode()->dump(CurDAG); + errs() << "\nWith: "; + TLO.New.getNode()->dump(CurDAG); + errs() << '\n'); + + Worklist.push_back(TLO.New.getNode()); + CurDAG->ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); + + if (TLO.Old.getNode()->use_empty()) { + for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); + i != e; ++i) { + SDNode *OpNode = TLO.Old.getNode()->getOperand(i).getNode(); + if (OpNode->hasOneUse()) { + Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), + OpNode), + Worklist.end()); + Worklist.push_back(TLO.Old.getNode()->getOperand(i).getNode()); + } + } + + Worklist.erase(std::remove(Worklist.begin(), Worklist.end(), + TLO.Old.getNode()), + Worklist.end()); + CurDAG->DeleteNode(TLO.Old.getNode()); + } + } + } + } +} + void SelectionDAGISel::ComputeLiveOutVRegInfo() { SmallPtrSet VisitedNodes; SmallVector Worklist; @@ -609,8 +678,10 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { if (ViewISelDAGs) CurDAG->viewGraph("isel input for " + BlockName); - if (OptLevel != CodeGenOpt::None) + if (OptLevel != CodeGenOpt::None) { + ShrinkDemandedOps(); ComputeLiveOutVRegInfo(); + } // Third, instruction select all of the operations to machine code, adding the // code to the MachineBasicBlock. diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index d9a5a13666b..f7694dbd3e5 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -990,7 +990,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, ~KnownZero2 & NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-1 bits are only known if set in both the LHS & RHS. @@ -1024,7 +1024,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if (TLO.ShrinkDemandedConstant(Op, NewMask)) return true; // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // Output known-0 bits are only known if clear in both the LHS & RHS. @@ -1049,7 +1049,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, if ((KnownZero2 & NewMask) == NewMask) return TLO.CombineTo(Op, Op.getOperand(1)); // If the operation can be done in a smaller type, do so. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; // If all of the unknown bits are known to be zero on one side or the other @@ -1480,7 +1480,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, KnownOne2, TLO, Depth+1)) return true; // See if the operation should be performed at a smaller bit width. - if (TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) + if (TLO.ShrinkOps && TLO.ShrinkDemandedOp(Op, BitWidth, NewMask, dl)) return true; } // FALL THROUGH @@ -1876,7 +1876,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && - VT == N0.getValueType() && N0.getOpcode() == ISD::AND) + (VT == N0.getValueType() || + (isTypeLegal(VT) && VT.bitsLE(N0.getValueType()))) && + N0.getOpcode() == ISD::AND) if (ConstantSDNode *AndRHS = dyn_cast(N0.getOperand(1))) { EVT ShiftTy = DCI.isBeforeLegalize() ? @@ -1884,16 +1886,18 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (Cond == ISD::SETNE && C1 == 0) {// (X & 8) != 0 --> (X & 8) >> 3 // Perform the xform if the AND RHS is a single bit. if (isPowerOf2_64(AndRHS->getZExtValue())) { - return DAG.getNode(ISD::SRL, dl, VT, N0, + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, DAG.getConstant(Log2_64(AndRHS->getZExtValue()), - ShiftTy)); + ShiftTy))); } } else if (Cond == ISD::SETEQ && C1 == AndRHS->getZExtValue()) { // (X & 8) == 8 --> (X & 8) >> 3 // Perform the xform if C1 is a single bit. if (C1.isPowerOf2()) { - return DAG.getNode(ISD::SRL, dl, VT, N0, - DAG.getConstant(C1.logBase2(), ShiftTy)); + return DAG.getNode(ISD::TRUNCATE, dl, VT, + DAG.getNode(ISD::SRL, dl, N0.getValueType(), N0, + DAG.getConstant(C1.logBase2(), ShiftTy))); } } } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 0367165f5c4..7d92fd8fad6 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5681,58 +5681,51 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); } -static SDValue LowerToBT(SDValue Op0, SDValue Op1, ISD::CondCode CC, +/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node +/// if it's possible. +static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC, DebugLoc dl, SelectionDAG &DAG) { - // Lower (X & (1 << N)) == 0 to BT(X, N). - // Lower ((X >>u N) & 1) != 0 to BT(X, N). - // Lower ((X >>s N) & 1) != 0 to BT(X, N). - if (Op0.getOpcode() == ISD::AND && - Op0.hasOneUse() && - Op1.getOpcode() == ISD::Constant && - cast(Op1)->getZExtValue() == 0 && - (CC == ISD::SETEQ || CC == ISD::SETNE)) { - SDValue LHS, RHS; - if (Op0.getOperand(1).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op010C = - dyn_cast(Op0.getOperand(1).getOperand(0))) - if (Op010C->getZExtValue() == 1) { - LHS = Op0.getOperand(0); - RHS = Op0.getOperand(1).getOperand(1); - } - } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { - if (ConstantSDNode *Op000C = - dyn_cast(Op0.getOperand(0).getOperand(0))) - if (Op000C->getZExtValue() == 1) { - LHS = Op0.getOperand(1); - RHS = Op0.getOperand(0).getOperand(1); - } - } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { - ConstantSDNode *AndRHS = cast(Op0.getOperand(1)); - SDValue AndLHS = Op0.getOperand(0); - if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { - LHS = AndLHS.getOperand(0); - RHS = AndLHS.getOperand(1); + SDValue LHS, RHS; + if (Op0.getOperand(1).getOpcode() == ISD::SHL) { + if (ConstantSDNode *Op010C = + dyn_cast(Op0.getOperand(1).getOperand(0))) + if (Op010C->getZExtValue() == 1) { + LHS = Op0.getOperand(0); + RHS = Op0.getOperand(1).getOperand(1); } + } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) { + if (ConstantSDNode *Op000C = + dyn_cast(Op0.getOperand(0).getOperand(0))) + if (Op000C->getZExtValue() == 1) { + LHS = Op0.getOperand(1); + RHS = Op0.getOperand(0).getOperand(1); + } + } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) { + ConstantSDNode *AndRHS = cast(Op0.getOperand(1)); + SDValue AndLHS = Op0.getOperand(0); + if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) { + LHS = AndLHS.getOperand(0); + RHS = AndLHS.getOperand(1); } + } - if (LHS.getNode()) { - // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT - // instruction. Since the shift amount is in-range-or-undefined, we know - // that doing a bittest on the i16 value is ok. We extend to i32 because - // the encoding for the i16 version is larger than the i32 version. - if (LHS.getValueType() == MVT::i8) - LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); + if (LHS.getNode()) { + // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT + // instruction. Since the shift amount is in-range-or-undefined, we know + // that doing a bittest on the i16 value is ok. We extend to i32 because + // the encoding for the i16 version is larger than the i32 version. + if (LHS.getValueType() == MVT::i8) + LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); - // If the operand types disagree, extend the shift amount to match. Since - // BT ignores high bits (like shifts) we can use anyextend. - if (LHS.getValueType() != RHS.getValueType()) - RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS); + // If the operand types disagree, extend the shift amount to match. Since + // BT ignores high bits (like shifts) we can use anyextend. + if (LHS.getValueType() != RHS.getValueType()) + RHS = DAG.getNode(ISD::ANY_EXTEND, dl, LHS.getValueType(), RHS); - SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); - unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; - return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, - DAG.getConstant(Cond, MVT::i8), BT); - } + SDValue BT = DAG.getNode(X86ISD::BT, dl, MVT::i32, LHS, RHS); + unsigned Cond = CC == ISD::SETEQ ? X86::COND_AE : X86::COND_B; + return DAG.getNode(X86ISD::SETCC, dl, MVT::i8, + DAG.getConstant(Cond, MVT::i8), BT); } return SDValue(); @@ -5746,9 +5739,18 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { ISD::CondCode CC = cast(Op.getOperand(2))->get(); // Optimize to BT if possible. - SDValue NewCond = LowerToBT(Op0, Op1, CC, dl, DAG); - if (NewCond.getNode()) - return NewCond; + // Lower (X & (1 << N)) == 0 to BT(X, N). + // Lower ((X >>u N) & 1) != 0 to BT(X, N). + // Lower ((X >>s N) & 1) != 0 to BT(X, N). + if (Op0.getOpcode() == ISD::AND && + Op0.hasOneUse() && + Op1.getOpcode() == ISD::Constant && + cast(Op1)->getZExtValue() == 0 && + (CC == ISD::SETEQ || CC == ISD::SETNE)) { + SDValue NewSetCC = LowerToBT(Op0, CC, dl, DAG); + if (NewSetCC.getNode()) + return NewSetCC; + } bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); @@ -5946,6 +5948,23 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) { } } + if (addTest) { + // Look pass the truncate. + if (Cond.getOpcode() == ISD::TRUNCATE) + Cond = Cond.getOperand(0); + + // We know the result of AND is compared against zero. Try to match + // it to BT. + if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { + SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); + if (NewSetCC.getNode()) { + CC = NewSetCC.getOperand(0); + Cond = NewSetCC.getOperand(1); + addTest = false; + } + } + } + if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); Cond = EmitTest(Cond, X86::COND_NE, DAG); @@ -6103,6 +6122,23 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) { } } + if (addTest) { + // Look pass the truncate. + if (Cond.getOpcode() == ISD::TRUNCATE) + Cond = Cond.getOperand(0); + + // We know the result of AND is compared against zero. Try to match + // it to BT. + if (Cond.getOpcode() == ISD::AND && Cond.hasOneUse()) { + SDValue NewSetCC = LowerToBT(Cond, ISD::SETNE, dl, DAG); + if (NewSetCC.getNode()) { + CC = NewSetCC.getOperand(0); + Cond = NewSetCC.getOperand(1); + addTest = false; + } + } + } + if (addTest) { CC = DAG.getConstant(X86::COND_NE, MVT::i8); Cond = EmitTest(Cond, X86::COND_NE, DAG); diff --git a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll index f21da52315f..b6cd2d40d1a 100644 --- a/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll +++ b/test/CodeGen/Blackfin/2009-08-15-SetCC-Undef.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -march=bfin -verify-machineinstrs -; XFAIL: * ; An undef argument causes a setugt node to escape instruction selection. diff --git a/test/CodeGen/Blackfin/promote-logic.ll b/test/CodeGen/Blackfin/promote-logic.ll index c247aca0a5b..46da56681d4 100644 --- a/test/CodeGen/Blackfin/promote-logic.ll +++ b/test/CodeGen/Blackfin/promote-logic.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -march=bfin > %t +; XFAIL: * ; DAGCombiner::SimplifyBinOpWithSameOpcodeHands can produce an illegal i16 OR ; operation after LegalizeOps. diff --git a/test/CodeGen/CellSPU/mul_ops.ll b/test/CodeGen/CellSPU/mul_ops.ll index 031d6c37ce7..1e28fc7a918 100644 --- a/test/CodeGen/CellSPU/mul_ops.ll +++ b/test/CodeGen/CellSPU/mul_ops.ll @@ -11,7 +11,6 @@ ; RUN: grep shli %t1.s | count 4 ; RUN: grep shlhi %t1.s | count 4 ; RUN: grep ila %t1.s | count 2 -; RUN: grep xsbh %t1.s | count 4 target datalayout = "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128-i16:16:128-i8:8:128-i1:8:128-a0:0:128-v128:128:128-s0:128:128" target triple = "spu" diff --git a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll index 3317864c014..07a164d4264 100644 --- a/test/CodeGen/SystemZ/2009-06-02-Rotate.ll +++ b/test/CodeGen/SystemZ/2009-06-02-Rotate.ll @@ -5,8 +5,8 @@ target triple = "s390x-linux" define i32 @rotl(i32 %x, i32 %y, i32 %z) nounwind readnone { entry: - %shl = shl i32 %x, 0 ; [#uses=1] - %sub = sub i32 32, 0 ; [#uses=1] + %shl = shl i32 %x, 1 ; [#uses=1] + %sub = sub i32 32, 1 ; [#uses=1] %shr = lshr i32 %x, %sub ; [#uses=1] %or = or i32 %shr, %shl ; [#uses=1] ret i32 %or diff --git a/test/CodeGen/X86/xor-icmp.ll b/test/CodeGen/X86/xor-icmp.ll new file mode 100644 index 00000000000..a6bdb13ec6b --- /dev/null +++ b/test/CodeGen/X86/xor-icmp.ll @@ -0,0 +1,36 @@ +; RUN: llc < %s -march=x86 | FileCheck %s -check-prefix=X32 +; RUN: llc < %s -march=x86-64 | FileCheck %s -check-prefix=X64 + +define i32 @t(i32 %a, i32 %b) nounwind ssp { +entry: +; X32: t: +; X32: xorb +; X32-NOT: andb +; X32-NOT: shrb +; X32: testb $64 +; X32: jne + +; X64: t: +; X64-NOT: setne +; X64: xorl +; X64: testb $64 +; X64: jne + %0 = and i32 %a, 16384 + %1 = icmp ne i32 %0, 0 + %2 = and i32 %b, 16384 + %3 = icmp ne i32 %2, 0 + %4 = xor i1 %1, %3 + br i1 %4, label %bb1, label %bb + +bb: ; preds = %entry + %5 = tail call i32 (...)* @foo() nounwind ; [#uses=1] + ret i32 %5 + +bb1: ; preds = %entry + %6 = tail call i32 (...)* @bar() nounwind ; [#uses=1] + ret i32 %6 +} + +declare i32 @foo(...) + +declare i32 @bar(...)