diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 811f173cd53..75828414a68 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -767,12 +767,19 @@ public: /// that want to combine struct TargetLoweringOpt { SelectionDAG &DAG; + bool LegalTys; + bool LegalOps; bool ShrinkOps; SDValue Old; SDValue New; - explicit TargetLoweringOpt(SelectionDAG &InDAG, bool Shrink = false) : - DAG(InDAG), ShrinkOps(Shrink) {} + explicit TargetLoweringOpt(SelectionDAG &InDAG, + bool LT, bool LO, + bool Shrink = false) : + DAG(InDAG), LegalTys(LT), LegalOps(LO), ShrinkOps(Shrink) {} + + bool LegalTypes() const { return LegalTys; } + bool LegalOperations() const { return LegalOps; } bool CombineTo(SDValue O, SDValue N) { Old = O; @@ -873,10 +880,19 @@ public: /// virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - /// PerformDAGCombinePromotion - This method query the target whether it is + /// isTypeDesirableForOp - Return true if the target has native support for + /// the specified value type and it is 'desirable' to use the type for the + /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 + /// instruction encodings are longer and some i16 instructions are slow. + virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const { + // By default, assume all legal types are desirable. + return isTypeLegal(VT); + } + + /// IsDesirableToPromoteOp - This method query the target whether it is /// beneficial for dag combiner to promote the specified node. If true, it /// should return the desired promotion type by reference. - virtual bool PerformDAGCombinePromotion(SDValue Op, EVT &PVT) const { + virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { return false; } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 9ba9bb5e38d..5fe67b5d3b9 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -582,9 +582,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, return SDValue(N, 0); } -void -DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt & - TLO) { +void DAGCombiner:: +CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { // Replace all uses. If any nodes become isomorphic to other nodes and // are deleted, make sure to remove them from our worklist. WorkListRemover DeadNodes(*this); @@ -614,7 +613,7 @@ DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt & /// it can be simplified or if things it uses can be simplified by bit /// propagation. If so, return true. bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { - TargetLowering::TargetLoweringOpt TLO(DAG); + TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); APInt KnownZero, KnownOne; if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) return false; @@ -634,18 +633,50 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { return true; } -static SDValue PromoteOperand(SDValue Op, EVT PVT, SelectionDAG &DAG) { - unsigned Opc = ISD::ZERO_EXTEND; - if (Op.getOpcode() == ISD::Constant) { +static SDValue PromoteOperand(SDValue Op, EVT PVT, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (LoadSDNode *LD = dyn_cast(Op)) { + return DAG.getExtLoad(ISD::EXTLOAD, Op.getDebugLoc(), PVT, + LD->getChain(), LD->getBasePtr(), + LD->getSrcValue(), LD->getSrcValueOffset(), + LD->getMemoryVT(), LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + } + + unsigned Opc = ISD::ANY_EXTEND; + if (Op.getOpcode() == ISD::Constant) // Zero extend things like i1, sign extend everything else. It shouldn't // matter in theory which one we pick, but this tends to give better code? // See DAGTypeLegalizer::PromoteIntRes_Constant. - if (Op.getValueType().isByteSized()) - Opc = ISD::SIGN_EXTEND; - } + Opc = Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; + if (!TLI.isOperationLegal(Opc, PVT)) + return SDValue(); return DAG.getNode(Opc, Op.getDebugLoc(), PVT, Op); } +static SDValue SExtPromoteOperand(SDValue Op, EVT PVT, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) + return SDValue(); + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + Op = PromoteOperand(Op, PVT, DAG, TLI); + if (Op.getNode() == 0) + return SDValue(); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, + DAG.getValueType(OldVT)); +} + +static SDValue ZExtPromoteOperand(SDValue Op, EVT PVT, SelectionDAG &DAG, + const TargetLowering &TLI) { + EVT OldVT = Op.getValueType(); + DebugLoc dl = Op.getDebugLoc(); + Op = PromoteOperand(Op, PVT, DAG, TLI); + if (Op.getNode() == 0) + return SDValue(); + return DAG.getZeroExtendInReg(Op, dl, OldVT); +} + /// PromoteIntBinOp - Promote the specified integer binary operation if the /// target indicates it is beneficial. e.g. On x86, it's usually better to /// promote i16 operations to i32 since i16 instructions are longer. @@ -657,15 +688,37 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { if (VT.isVector() || !VT.isInteger()) return SDValue(); + // If operation type is 'undesirable', e.g. i16 on x86, consider + // promoting it. + unsigned Opc = Op.getOpcode(); + if (TLI.isTypeDesirableForOp(Opc, VT)) + return SDValue(); + EVT PVT = VT; - if (TLI.PerformDAGCombinePromotion(Op, PVT)) { + // Consult target whether it is a good idea to promote this operation and + // what's the right type to promote it to. + if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - SDValue N0 = PromoteOperand(Op.getOperand(0), PVT, DAG); - AddToWorkList(N0.getNode()); + bool isShift = (Opc == ISD::SHL) || (Opc == ISD::SRA) || (Opc == ISD::SRL); + SDValue N0 = Op.getOperand(0); + if (Opc == ISD::SRA) + N0 = SExtPromoteOperand(Op.getOperand(0), PVT, DAG, TLI); + else if (Opc == ISD::SRL) + N0 = ZExtPromoteOperand(Op.getOperand(0), PVT, DAG, TLI); + else + N0 = PromoteOperand(N0, PVT, DAG, TLI); + if (N0.getNode() == 0) + return SDValue(); - SDValue N1 = PromoteOperand(Op.getOperand(1), PVT, DAG); - AddToWorkList(N1.getNode()); + SDValue N1 = Op.getOperand(1); + if (!isShift) { + N1 = PromoteOperand(N1, PVT, DAG, TLI); + if (N1.getNode() == 0) + return SDValue(); + AddToWorkList(N1.getNode()); + } + AddToWorkList(N0.getNode()); DebugLoc dl = Op.getDebugLoc(); return DAG.getNode(ISD::TRUNCATE, dl, VT, @@ -674,6 +727,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { return SDValue(); } + //===----------------------------------------------------------------------===// // Main DAG Combiner implementation //===----------------------------------------------------------------------===// @@ -1765,8 +1819,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { // into a vsetcc. EVT Op0VT = N0.getOperand(0).getValueType(); if ((N0.getOpcode() == ISD::ZERO_EXTEND || - N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || + // Avoid infinite looping with PromoteIntBinOp. + (N0.getOpcode() == ISD::ANY_EXTEND && + (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || (N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) && !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && @@ -2624,7 +2680,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { HiBitsMask); } - return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); + if (N1C) { + SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSHL.getNode()) + return NewSHL; + } + + return PromoteIntBinOp(SDValue(N, 0)); } SDValue DAGCombiner::visitSRA(SDNode *N) { @@ -2738,7 +2800,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (DAG.SignBitIsZero(N0)) return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); - return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue(); + if (N1C) { + SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); + if (NewSRA.getNode()) + return NewSRA; + } + + return PromoteIntBinOp(SDValue(N, 0)); } SDValue DAGCombiner::visitSRL(SDNode *N) { @@ -2793,10 +2861,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C->getZExtValue() >= SmallVT.getSizeInBits()) return DAG.getUNDEF(VT); - SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, - N0.getOperand(0), N1); - AddToWorkList(SmallShift.getNode()); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { + SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, + N0.getOperand(0), N1); + AddToWorkList(SmallShift.getNode()); + return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + } } // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign @@ -2902,7 +2972,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { } } - return SDValue(); + return PromoteIntBinOp(SDValue(N, 0)); } SDValue DAGCombiner::visitCTLZ(SDNode *N) { @@ -3861,7 +3931,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) - return ReduceLoadWidth(N); + if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) + return ReduceLoadWidth(N); + return SDValue(); } static SDNode *getBuildPairElt(SDNode *N, unsigned i) { diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 9082fb6fbe9..4b7fb869711 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -355,7 +355,7 @@ void SelectionDAGISel::ShrinkDemandedOps() { InWorklist.insert(I); } - TargetLowering::TargetLoweringOpt TLO(*CurDAG, true); + TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true); while (!Worklist.empty()) { SDNode *N = Worklist.pop_back_val(); InWorklist.erase(N); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index df923c6a707..11dca398b7c 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1279,8 +1279,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // variable. The low bit of the shift cannot be an input sign bit unless // the shift amount is >= the size of the datatype, which is undefined. if (DemandedMask == 1) - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), - Op.getOperand(0), Op.getOperand(1))); + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(), + Op.getOperand(0), Op.getOperand(1))); if (ConstantSDNode *SA = dyn_cast(Op.getOperand(1))) { EVT VT = Op.getValueType(); @@ -1465,23 +1466,29 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, case ISD::SRL: // Shrink SRL by a constant if none of the high bits shifted in are // demanded. - if (ConstantSDNode *ShAmt = dyn_cast(In.getOperand(1))){ - APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, - OperandBitWidth - BitWidth); - HighBits = HighBits.lshr(ShAmt->getZExtValue()); - HighBits.trunc(BitWidth); - - if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { - // None of the shifted in bits are needed. Add a truncate of the - // shift input, then shift it. - SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, - Op.getValueType(), - In.getOperand(0)); - return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, - Op.getValueType(), - NewTrunc, - In.getOperand(1))); - } + if (TLO.LegalTypes() && + !isTypeDesirableForOp(ISD::SRL, Op.getValueType())) + // Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is + // undesirable. + break; + ConstantSDNode *ShAmt = dyn_cast(In.getOperand(1)); + if (!ShAmt) + break; + APInt HighBits = APInt::getHighBitsSet(OperandBitWidth, + OperandBitWidth - BitWidth); + HighBits = HighBits.lshr(ShAmt->getZExtValue()); + HighBits.trunc(BitWidth); + + if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) { + // None of the shifted in bits are needed. Add a truncate of the + // shift input, then shift it. + SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl, + Op.getValueType(), + In.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, + Op.getValueType(), + NewTrunc, + In.getOperand(1))); } break; } diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 00cc083f7db..8c69d2db7ad 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -5992,6 +5992,8 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC, } // Otherwise just emit a CMP with 0, which is the TEST pattern. + if (Promote16Bit && Op.getValueType() == MVT::i16) + Op = DAG.getNode(ISD::ANY_EXTEND, Op.getDebugLoc(), MVT::i32, Op); return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op, DAG.getConstant(0, Op.getValueType())); } @@ -6005,6 +6007,10 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC, return EmitTest(Op0, X86CC, DAG); DebugLoc dl = Op0.getDebugLoc(); + if (Promote16Bit && Op0.getValueType() == MVT::i16) { + Op0 = DAG.getNode(ISD::ANY_EXTEND, Op0.getDebugLoc(), MVT::i32, Op0); + Op1 = DAG.getNode(ISD::ANY_EXTEND, Op1.getDebugLoc(), MVT::i32, Op1); + } return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1); } @@ -6042,11 +6048,13 @@ static SDValue LowerToBT(SDValue And, ISD::CondCode CC, } if (LHS.getNode()) { - // If LHS is i8, promote it to i16 with any_extend. There is no i8 BT + // If LHS is i8, promote it to i32 with any_extend. There is no i8 BT // instruction. Since the shift amount is in-range-or-undefined, we know - // that doing a bittest on the i16 value is ok. We extend to i32 because + // that doing a bittest on the i32 value is ok. We extend to i32 because // the encoding for the i16 version is larger than the i32 version. - if (LHS.getValueType() == MVT::i8) + // Also promote i16 to i32 for performance / code size reason. + if (LHS.getValueType() == MVT::i8 || + (Promote16Bit && LHS.getValueType() == MVT::i16)) LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS); // If the operand types disagree, extend the shift amount to match. Since @@ -6099,7 +6107,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) { DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1)); } - bool isFP = Op.getOperand(1).getValueType().isFloatingPoint(); + bool isFP = Op1.getValueType().isFloatingPoint(); unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG); if (X86CC == X86::COND_INVALID) return SDValue(); @@ -9781,7 +9789,8 @@ static SDValue PerformBTCombine(SDNode *N, unsigned BitWidth = Op1.getValueSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth)); APInt KnownZero, KnownOne; - TargetLowering::TargetLoweringOpt TLO(DAG); + TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(), + !DCI.isBeforeLegalizeOps()); TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) || TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO)) @@ -9909,10 +9918,36 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, return SDValue(); } -/// PerformDAGCombinePromotion - This method query the target whether it is +/// isTypeDesirableForOp - Return true if the target has native support for +/// the specified value type and it is 'desirable' to use the type for the +/// given node type. e.g. On x86 i16 is legal, but undesirable since i16 +/// instruction encodings are longer and some i16 instructions are slow. +bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { + if (!isTypeLegal(VT)) + return false; + if (!Promote16Bit || VT != MVT::i16) + return true; + + switch (Opc) { + default: + return true; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::SUB: + case ISD::ADD: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + return false; + } +} + +/// IsDesirableToPromoteOp - This method query the target whether it is /// beneficial for dag combiner to promote the specified node. If true, it /// should return the desired promotion type by reference. -bool X86TargetLowering::PerformDAGCombinePromotion(SDValue Op, EVT &PVT) const { +bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const { if (!Promote16Bit) return false; @@ -9923,6 +9958,16 @@ bool X86TargetLowering::PerformDAGCombinePromotion(SDValue Op, EVT &PVT) const { bool Commute = true; switch (Op.getOpcode()) { default: return false; + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: { + SDValue N0 = Op.getOperand(0); + // Look out for (store (shl (load), x)). + if (isa(N0) && N0.hasOneUse() && + Op.hasOneUse() && Op.getNode()->use_begin()->getOpcode() == ISD::STORE) + return false; + break; + } case ISD::SUB: Commute = false; // fallthrough diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h index ca1a2598034..8e268f119dc 100644 --- a/lib/Target/X86/X86ISelLowering.h +++ b/lib/Target/X86/X86ISelLowering.h @@ -452,7 +452,17 @@ namespace llvm { virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - virtual bool PerformDAGCombinePromotion(SDValue Op, EVT &PVT) const; + /// isTypeDesirableForOp - Return true if the target has native support for + /// the specified value type and it is 'desirable' to use the type for the + /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 + /// instruction encodings are longer and some i16 instructions are slow. + virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const; + + /// isTypeDesirable - Return true if the target has native support for the + /// specified value type and it is 'desirable' to use the type. e.g. On x86 + /// i16 is legal, but undesirable since i16 instruction encodings are longer + /// and some i16 instructions are slow. + virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const; virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB, diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 940b439d22a..0d762ba3571 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -4717,6 +4717,8 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C16r)>; def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))), (SETB_C32r)>; +def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))), + (SETB_C32r)>; // (or x1, x2) -> (add x1, x2) if two operands are known not to share bits. let AddedComplexity = 5 in { // Try this before the selecting to OR