More work to allow dag combiner to promote 16-bit ops to 32-bit.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@101621 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2010-04-17 06:13:15 +00:00
parent 4ff28527bb
commit e5b51ac770
7 changed files with 208 additions and 56 deletions

View File

@ -767,12 +767,19 @@ public:
/// that want to combine
struct TargetLoweringOpt {
SelectionDAG &DAG;
bool LegalTys;
bool LegalOps;
bool ShrinkOps;
SDValue Old;
SDValue New;
explicit TargetLoweringOpt(SelectionDAG &InDAG, bool Shrink = false) :
DAG(InDAG), ShrinkOps(Shrink) {}
explicit TargetLoweringOpt(SelectionDAG &InDAG,
bool LT, bool LO,
bool Shrink = false) :
DAG(InDAG), LegalTys(LT), LegalOps(LO), ShrinkOps(Shrink) {}
bool LegalTypes() const { return LegalTys; }
bool LegalOperations() const { return LegalOps; }
bool CombineTo(SDValue O, SDValue N) {
Old = O;
@ -873,10 +880,19 @@ public:
///
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
/// PerformDAGCombinePromotion - This method query the target whether it is
/// isTypeDesirableForOp - Return true if the target has native support for
/// the specified value type and it is 'desirable' to use the type for the
/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
/// instruction encodings are longer and some i16 instructions are slow.
virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const {
// By default, assume all legal types are desirable.
return isTypeLegal(VT);
}
/// IsDesirableToPromoteOp - This method query the target whether it is
/// beneficial for dag combiner to promote the specified node. If true, it
/// should return the desired promotion type by reference.
virtual bool PerformDAGCombinePromotion(SDValue Op, EVT &PVT) const {
virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
return false;
}

View File

@ -582,9 +582,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
return SDValue(N, 0);
}
void
DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &
TLO) {
void DAGCombiner::
CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
// Replace all uses. If any nodes become isomorphic to other nodes and
// are deleted, make sure to remove them from our worklist.
WorkListRemover DeadNodes(*this);
@ -614,7 +613,7 @@ DAGCombiner::CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &
/// it can be simplified or if things it uses can be simplified by bit
/// propagation. If so, return true.
bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
TargetLowering::TargetLoweringOpt TLO(DAG);
TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
APInt KnownZero, KnownOne;
if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
return false;
@ -634,18 +633,50 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
return true;
}
static SDValue PromoteOperand(SDValue Op, EVT PVT, SelectionDAG &DAG) {
unsigned Opc = ISD::ZERO_EXTEND;
if (Op.getOpcode() == ISD::Constant) {
static SDValue PromoteOperand(SDValue Op, EVT PVT, SelectionDAG &DAG,
const TargetLowering &TLI) {
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
return DAG.getExtLoad(ISD::EXTLOAD, Op.getDebugLoc(), PVT,
LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset(),
LD->getMemoryVT(), LD->isVolatile(),
LD->isNonTemporal(), LD->getAlignment());
}
unsigned Opc = ISD::ANY_EXTEND;
if (Op.getOpcode() == ISD::Constant)
// Zero extend things like i1, sign extend everything else. It shouldn't
// matter in theory which one we pick, but this tends to give better code?
// See DAGTypeLegalizer::PromoteIntRes_Constant.
if (Op.getValueType().isByteSized())
Opc = ISD::SIGN_EXTEND;
}
Opc = Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
if (!TLI.isOperationLegal(Opc, PVT))
return SDValue();
return DAG.getNode(Opc, Op.getDebugLoc(), PVT, Op);
}
static SDValue SExtPromoteOperand(SDValue Op, EVT PVT, SelectionDAG &DAG,
const TargetLowering &TLI) {
if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
return SDValue();
EVT OldVT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
Op = PromoteOperand(Op, PVT, DAG, TLI);
if (Op.getNode() == 0)
return SDValue();
return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op,
DAG.getValueType(OldVT));
}
static SDValue ZExtPromoteOperand(SDValue Op, EVT PVT, SelectionDAG &DAG,
const TargetLowering &TLI) {
EVT OldVT = Op.getValueType();
DebugLoc dl = Op.getDebugLoc();
Op = PromoteOperand(Op, PVT, DAG, TLI);
if (Op.getNode() == 0)
return SDValue();
return DAG.getZeroExtendInReg(Op, dl, OldVT);
}
/// PromoteIntBinOp - Promote the specified integer binary operation if the
/// target indicates it is beneficial. e.g. On x86, it's usually better to
/// promote i16 operations to i32 since i16 instructions are longer.
@ -657,15 +688,37 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
if (VT.isVector() || !VT.isInteger())
return SDValue();
// If operation type is 'undesirable', e.g. i16 on x86, consider
// promoting it.
unsigned Opc = Op.getOpcode();
if (TLI.isTypeDesirableForOp(Opc, VT))
return SDValue();
EVT PVT = VT;
if (TLI.PerformDAGCombinePromotion(Op, PVT)) {
// Consult target whether it is a good idea to promote this operation and
// what's the right type to promote it to.
if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
assert(PVT != VT && "Don't know what type to promote to!");
SDValue N0 = PromoteOperand(Op.getOperand(0), PVT, DAG);
AddToWorkList(N0.getNode());
bool isShift = (Opc == ISD::SHL) || (Opc == ISD::SRA) || (Opc == ISD::SRL);
SDValue N0 = Op.getOperand(0);
if (Opc == ISD::SRA)
N0 = SExtPromoteOperand(Op.getOperand(0), PVT, DAG, TLI);
else if (Opc == ISD::SRL)
N0 = ZExtPromoteOperand(Op.getOperand(0), PVT, DAG, TLI);
else
N0 = PromoteOperand(N0, PVT, DAG, TLI);
if (N0.getNode() == 0)
return SDValue();
SDValue N1 = PromoteOperand(Op.getOperand(1), PVT, DAG);
AddToWorkList(N1.getNode());
SDValue N1 = Op.getOperand(1);
if (!isShift) {
N1 = PromoteOperand(N1, PVT, DAG, TLI);
if (N1.getNode() == 0)
return SDValue();
AddToWorkList(N1.getNode());
}
AddToWorkList(N0.getNode());
DebugLoc dl = Op.getDebugLoc();
return DAG.getNode(ISD::TRUNCATE, dl, VT,
@ -674,6 +727,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
return SDValue();
}
//===----------------------------------------------------------------------===//
// Main DAG Combiner implementation
//===----------------------------------------------------------------------===//
@ -1765,8 +1819,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// into a vsetcc.
EVT Op0VT = N0.getOperand(0).getValueType();
if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
N0.getOpcode() == ISD::ANY_EXTEND ||
N0.getOpcode() == ISD::SIGN_EXTEND ||
// Avoid infinite looping with PromoteIntBinOp.
(N0.getOpcode() == ISD::ANY_EXTEND &&
(!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
(N0.getOpcode() == ISD::TRUNCATE && TLI.isTypeLegal(Op0VT))) &&
!VT.isVector() &&
Op0VT == N1.getOperand(0).getValueType() &&
@ -2624,7 +2680,13 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
HiBitsMask);
}
return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
if (N1C) {
SDValue NewSHL = visitShiftByConstant(N, N1C->getZExtValue());
if (NewSHL.getNode())
return NewSHL;
}
return PromoteIntBinOp(SDValue(N, 0));
}
SDValue DAGCombiner::visitSRA(SDNode *N) {
@ -2738,7 +2800,13 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (DAG.SignBitIsZero(N0))
return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1);
return N1C ? visitShiftByConstant(N, N1C->getZExtValue()) : SDValue();
if (N1C) {
SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue());
if (NewSRA.getNode())
return NewSRA;
}
return PromoteIntBinOp(SDValue(N, 0));
}
SDValue DAGCombiner::visitSRL(SDNode *N) {
@ -2793,10 +2861,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1C->getZExtValue() >= SmallVT.getSizeInBits())
return DAG.getUNDEF(VT);
SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
N0.getOperand(0), N1);
AddToWorkList(SmallShift.getNode());
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT,
N0.getOperand(0), N1);
AddToWorkList(SmallShift.getNode());
return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
}
}
// fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign
@ -2902,7 +2972,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
}
}
return SDValue();
return PromoteIntBinOp(SDValue(N, 0));
}
SDValue DAGCombiner::visitCTLZ(SDNode *N) {
@ -3861,7 +3931,9 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
// fold (truncate (load x)) -> (smaller load x)
// fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
return ReduceLoadWidth(N);
if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT))
return ReduceLoadWidth(N);
return SDValue();
}
static SDNode *getBuildPairElt(SDNode *N, unsigned i) {

View File

@ -355,7 +355,7 @@ void SelectionDAGISel::ShrinkDemandedOps() {
InWorklist.insert(I);
}
TargetLowering::TargetLoweringOpt TLO(*CurDAG, true);
TargetLowering::TargetLoweringOpt TLO(*CurDAG, true, true, true);
while (!Worklist.empty()) {
SDNode *N = Worklist.pop_back_val();
InWorklist.erase(N);

View File

@ -1279,8 +1279,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// variable. The low bit of the shift cannot be an input sign bit unless
// the shift amount is >= the size of the datatype, which is undefined.
if (DemandedMask == 1)
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1)));
return TLO.CombineTo(Op,
TLO.DAG.getNode(ISD::SRL, dl, Op.getValueType(),
Op.getOperand(0), Op.getOperand(1)));
if (ConstantSDNode *SA = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
EVT VT = Op.getValueType();
@ -1465,23 +1466,29 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::SRL:
// Shrink SRL by a constant if none of the high bits shifted in are
// demanded.
if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
OperandBitWidth - BitWidth);
HighBits = HighBits.lshr(ShAmt->getZExtValue());
HighBits.trunc(BitWidth);
if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
Op.getValueType(),
In.getOperand(0));
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
Op.getValueType(),
NewTrunc,
In.getOperand(1)));
}
if (TLO.LegalTypes() &&
!isTypeDesirableForOp(ISD::SRL, Op.getValueType()))
// Do not turn (vt1 truncate (vt2 srl)) into (vt1 srl) if vt1 is
// undesirable.
break;
ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1));
if (!ShAmt)
break;
APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
OperandBitWidth - BitWidth);
HighBits = HighBits.lshr(ShAmt->getZExtValue());
HighBits.trunc(BitWidth);
if (ShAmt->getZExtValue() < BitWidth && !(HighBits & NewMask)) {
// None of the shifted in bits are needed. Add a truncate of the
// shift input, then shift it.
SDValue NewTrunc = TLO.DAG.getNode(ISD::TRUNCATE, dl,
Op.getValueType(),
In.getOperand(0));
return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl,
Op.getValueType(),
NewTrunc,
In.getOperand(1)));
}
break;
}

View File

@ -5992,6 +5992,8 @@ SDValue X86TargetLowering::EmitTest(SDValue Op, unsigned X86CC,
}
// Otherwise just emit a CMP with 0, which is the TEST pattern.
if (Promote16Bit && Op.getValueType() == MVT::i16)
Op = DAG.getNode(ISD::ANY_EXTEND, Op.getDebugLoc(), MVT::i32, Op);
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op,
DAG.getConstant(0, Op.getValueType()));
}
@ -6005,6 +6007,10 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
return EmitTest(Op0, X86CC, DAG);
DebugLoc dl = Op0.getDebugLoc();
if (Promote16Bit && Op0.getValueType() == MVT::i16) {
Op0 = DAG.getNode(ISD::ANY_EXTEND, Op0.getDebugLoc(), MVT::i32, Op0);
Op1 = DAG.getNode(ISD::ANY_EXTEND, Op1.getDebugLoc(), MVT::i32, Op1);
}
return DAG.getNode(X86ISD::CMP, dl, MVT::i32, Op0, Op1);
}
@ -6042,11 +6048,13 @@ static SDValue LowerToBT(SDValue And, ISD::CondCode CC,
}
if (LHS.getNode()) {
// If LHS is i8, promote it to i16 with any_extend. There is no i8 BT
// If LHS is i8, promote it to i32 with any_extend. There is no i8 BT
// instruction. Since the shift amount is in-range-or-undefined, we know
// that doing a bittest on the i16 value is ok. We extend to i32 because
// that doing a bittest on the i32 value is ok. We extend to i32 because
// the encoding for the i16 version is larger than the i32 version.
if (LHS.getValueType() == MVT::i8)
// Also promote i16 to i32 for performance / code size reason.
if (LHS.getValueType() == MVT::i8 ||
(Promote16Bit && LHS.getValueType() == MVT::i16))
LHS = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, LHS);
// If the operand types disagree, extend the shift amount to match. Since
@ -6099,7 +6107,7 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
}
bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
bool isFP = Op1.getValueType().isFloatingPoint();
unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
if (X86CC == X86::COND_INVALID)
return SDValue();
@ -9781,7 +9789,8 @@ static SDValue PerformBTCombine(SDNode *N,
unsigned BitWidth = Op1.getValueSizeInBits();
APInt DemandedMask = APInt::getLowBitsSet(BitWidth, Log2_32(BitWidth));
APInt KnownZero, KnownOne;
TargetLowering::TargetLoweringOpt TLO(DAG);
TargetLowering::TargetLoweringOpt TLO(DAG, !DCI.isBeforeLegalize(),
!DCI.isBeforeLegalizeOps());
TargetLowering &TLI = DAG.getTargetLoweringInfo();
if (TLO.ShrinkDemandedConstant(Op1, DemandedMask) ||
TLI.SimplifyDemandedBits(Op1, DemandedMask, KnownZero, KnownOne, TLO))
@ -9909,10 +9918,36 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
return SDValue();
}
/// PerformDAGCombinePromotion - This method query the target whether it is
/// isTypeDesirableForOp - Return true if the target has native support for
/// the specified value type and it is 'desirable' to use the type for the
/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
/// instruction encodings are longer and some i16 instructions are slow.
bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const {
if (!isTypeLegal(VT))
return false;
if (!Promote16Bit || VT != MVT::i16)
return true;
switch (Opc) {
default:
return true;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
case ISD::SUB:
case ISD::ADD:
case ISD::MUL:
case ISD::AND:
case ISD::OR:
case ISD::XOR:
return false;
}
}
/// IsDesirableToPromoteOp - This method query the target whether it is
/// beneficial for dag combiner to promote the specified node. If true, it
/// should return the desired promotion type by reference.
bool X86TargetLowering::PerformDAGCombinePromotion(SDValue Op, EVT &PVT) const {
bool X86TargetLowering::IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const {
if (!Promote16Bit)
return false;
@ -9923,6 +9958,16 @@ bool X86TargetLowering::PerformDAGCombinePromotion(SDValue Op, EVT &PVT) const {
bool Commute = true;
switch (Op.getOpcode()) {
default: return false;
case ISD::SHL:
case ISD::SRA:
case ISD::SRL: {
SDValue N0 = Op.getOperand(0);
// Look out for (store (shl (load), x)).
if (isa<LoadSDNode>(N0) && N0.hasOneUse() &&
Op.hasOneUse() && Op.getNode()->use_begin()->getOpcode() == ISD::STORE)
return false;
break;
}
case ISD::SUB:
Commute = false;
// fallthrough

View File

@ -452,7 +452,17 @@ namespace llvm {
virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const;
virtual bool PerformDAGCombinePromotion(SDValue Op, EVT &PVT) const;
/// isTypeDesirableForOp - Return true if the target has native support for
/// the specified value type and it is 'desirable' to use the type for the
/// given node type. e.g. On x86 i16 is legal, but undesirable since i16
/// instruction encodings are longer and some i16 instructions are slow.
virtual bool isTypeDesirableForOp(unsigned Opc, EVT VT) const;
/// isTypeDesirable - Return true if the target has native support for the
/// specified value type and it is 'desirable' to use the type. e.g. On x86
/// i16 is legal, but undesirable since i16 instruction encodings are longer
/// and some i16 instructions are slow.
virtual bool IsDesirableToPromoteOp(SDValue Op, EVT &PVT) const;
virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *MBB,

View File

@ -4717,6 +4717,8 @@ def : Pat<(i16 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C16r)>;
def : Pat<(i32 (anyext (i8 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C32r)>;
def : Pat<(i32 (anyext (i16 (X86setcc_c X86_COND_B, EFLAGS)))),
(SETB_C32r)>;
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
let AddedComplexity = 5 in { // Try this before the selecting to OR