Generalize ExtendUsesToFormExtLoad to be usable for ANY_EXTEND,

in addition to ZERO_EXTEND and SIGN_EXTEND. Fix a bug in the
way it checked for live-out values, and simplify the way it
find users by using SDNode::use_iterator's (relatively) new
features. Also, make it slightly more permissive on targets
with free truncates.

In SelectionDAGBuild, avoid creating ANY_EXTEND nodes that are
larger than necessary. If the target's SwitchAmountTy has
enough bits, use it. This exposes the truncate to optimization
early, enabling more optimizations.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@68670 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dan Gohman 2009-04-09 03:51:29 +00:00
parent 7d770be047
commit 57fc82d409
5 changed files with 159 additions and 85 deletions

View File

@ -2874,7 +2874,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) {
}
// ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
// "fold ({s|z}ext (load x)) -> ({s|z}ext (truncate ({s|z}extload x)))"
// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
// transformation. Returns true if extension are possible and the above
// mentioned transformation is profitable.
static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
@ -2889,8 +2889,10 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
SDNode *User = *UI;
if (User == N)
continue;
if (UI.getUse().getResNo() != N0.getResNo())
continue;
// FIXME: Only extend SETCC N, N and SETCC N, c for now.
if (User->getOpcode() == ISD::SETCC) {
if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
// Sign bits will be lost after a zext.
@ -2906,32 +2908,25 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
}
if (Add)
ExtendNodes.push_back(User);
} else {
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
SDValue UseOp = User->getOperand(i);
if (UseOp == N0) {
// If truncate from extended type to original load type is free
// on this target, then it's ok to extend a CopyToReg.
if (isTruncFree && User->getOpcode() == ISD::CopyToReg)
HasCopyToRegUses = true;
else
return false;
}
}
continue;
}
// If truncates aren't free and there are users we can't
// extend, it isn't worthwhile.
if (!isTruncFree)
return false;
// Remember if this value is live-out.
if (User->getOpcode() == ISD::CopyToReg)
HasCopyToRegUses = true;
}
if (HasCopyToRegUses) {
bool BothLiveOut = false;
for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
UI != UE; ++UI) {
SDNode *User = *UI;
for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) {
SDValue UseOp = User->getOperand(i);
if (UseOp.getNode() == N && UseOp.getResNo() == 0) {
BothLiveOut = true;
break;
}
SDUse &Use = UI.getUse();
if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
BothLiveOut = true;
break;
}
}
if (BothLiveOut)
@ -3013,8 +3008,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(),
VT, LN0->getChain(),
SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
N0.getValueType(),
@ -3034,8 +3029,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
if (SOp == Trunc)
Ops.push_back(ExtLoad);
else
Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(),
VT, SOp));
Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND,
N->getDebugLoc(), VT, SOp));
}
Ops.push_back(SetCC->getOperand(2));
@ -3278,26 +3273,48 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
}
// fold (aext (load x)) -> (aext (truncate (extload x)))
if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() &&
if (ISD::isNON_EXTLoad(N0.getNode()) &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
N0.getValueType(),
LN0->isVolatile(), LN0->getAlignment());
CombineTo(N, ExtLoad);
// Redirect any chain users to the new load.
DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1),
SDValue(ExtLoad.getNode(), 1));
// If any node needs the original loaded value, recompute it.
if (!LN0->use_empty())
CombineTo(LN0, DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
N0.getValueType(), ExtLoad),
ExtLoad.getValue(1));
return SDValue(N, 0); // Return N so it doesn't get rechecked!
bool DoXform = true;
SmallVector<SDNode*, 4> SetCCs;
if (!N0.hasOneUse())
DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
if (DoXform) {
LoadSDNode *LN0 = cast<LoadSDNode>(N0);
SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT,
LN0->getChain(),
LN0->getBasePtr(), LN0->getSrcValue(),
LN0->getSrcValueOffset(),
N0.getValueType(),
LN0->isVolatile(), LN0->getAlignment());
CombineTo(N, ExtLoad);
SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(),
N0.getValueType(), ExtLoad);
CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
// Extend SetCC uses if necessary.
for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
SDNode *SetCC = SetCCs[i];
SmallVector<SDValue, 4> Ops;
for (unsigned j = 0; j != 2; ++j) {
SDValue SOp = SetCC->getOperand(j);
if (SOp == Trunc)
Ops.push_back(ExtLoad);
else
Ops.push_back(DAG.getNode(ISD::ANY_EXTEND,
N->getDebugLoc(), VT, SOp));
}
Ops.push_back(SetCC->getOperand(2));
CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(),
SetCC->getValueType(0),
&Ops[0], Ops.size()));
}
return SDValue(N, 0); // Return N so it doesn't get rechecked!
}
}
// fold (aext (zextload x)) -> (aext (truncate (zextload x)))

View File

@ -2190,8 +2190,24 @@ void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) {
void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
if (!isa<VectorType>(I.getType())) {
if (TLI.getPointerTy().bitsLT(Op2.getValueType()))
if (!isa<VectorType>(I.getType()) &&
Op2.getValueType() != TLI.getShiftAmountTy()) {
// If the operand is smaller than the shift count type, promote it.
if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType()))
Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(),
TLI.getShiftAmountTy(), Op2);
// If the operand is larger than the shift count type but the shift
// count type has enough bits to represent any shift value, truncate
// it now. This is a common case and it exposes the truncate to
// optimization early.
else if (TLI.getShiftAmountTy().getSizeInBits() >=
Log2_32_Ceil(Op2.getValueType().getSizeInBits()))
Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
TLI.getShiftAmountTy(), Op2);
// Otherwise we'll need to temporarily settle for some other
// convenient type; type legalization will make adjustments as
// needed.
else if (TLI.getPointerTy().bitsLT(Op2.getValueType()))
Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(),
TLI.getPointerTy(), Op2);
else if (TLI.getPointerTy().bitsGT(Op2.getValueType()))

View File

@ -1,40 +0,0 @@
; RUN: llvm-as < %s | llc -march=x86 | grep movw | not grep %e.x
; PR2681
@g_491 = external global i32 ; <i32*> [#uses=1]
@g_897 = external global i16 ; <i16*> [#uses=1]
define i32 @func_7(i16 signext %p_9) nounwind {
entry:
%p_9.addr = alloca i16 ; <i16*> [#uses=2]
%l_1122 = alloca i16, align 2 ; <i16*> [#uses=1]
%l_1128 = alloca i32, align 4 ; <i32*> [#uses=1]
%l_1129 = alloca i32, align 4 ; <i32*> [#uses=1]
%l_1130 = alloca i32, align 4 ; <i32*> [#uses=1]
%tmp14 = load i16* %l_1122 ; <i16> [#uses=1]
%conv15 = sext i16 %tmp14 to i32 ; <i32> [#uses=1]
%tmp16 = load i16* %p_9.addr ; <i16> [#uses=1]
%conv17 = sext i16 %tmp16 to i32 ; <i32> [#uses=1]
%xor = xor i32 %conv15, %conv17 ; <i32> [#uses=1]
%tmp18 = load i32* null ; <i32> [#uses=1]
%or = or i32 %xor, %tmp18 ; <i32> [#uses=1]
%conv19 = trunc i32 %or to i16 ; <i16> [#uses=1]
%tmp28 = load i16* %p_9.addr ; <i16> [#uses=1]
%tmp33 = load i16* @g_897 ; <i16> [#uses=1]
%tmp34 = load i32* @g_491 ; <i32> [#uses=1]
%conv35 = trunc i32 %tmp34 to i16 ; <i16> [#uses=1]
%tmp36 = load i16* null ; <i16> [#uses=1]
%conv37 = trunc i16 %tmp36 to i8 ; <i8> [#uses=1]
%tmp38 = load i32* %l_1128 ; <i32> [#uses=1]
%conv39 = sext i32 %tmp38 to i64 ; <i64> [#uses=1]
%tmp42 = load i32* %l_1129 ; <i32> [#uses=1]
%conv43 = trunc i32 %tmp42 to i16 ; <i16> [#uses=1]
%tmp44 = load i32* %l_1130 ; <i32> [#uses=1]
%conv45 = sext i32 %tmp44 to i64 ; <i64> [#uses=1]
%call46 = call i32 @func_18( i16 zeroext 0, i16 zeroext 0, i16 zeroext %tmp33, i16 zeroext %conv35, i8 zeroext %conv37, i64 %conv39, i32 0, i16 zeroext %conv43, i64 %conv45, i8 zeroext 1 ) ; <i32> [#uses=0]
%call48 = call i32 @func_18( i16 zeroext 0, i16 zeroext 0, i16 zeroext 0, i16 zeroext 1, i8 zeroext 0, i64 0, i32 1, i16 zeroext %tmp28, i64 0, i8 zeroext 1 ) ; <i32> [#uses=0]
%call50 = call i32 @func_18( i16 zeroext 1, i16 zeroext 0, i16 zeroext 0, i16 zeroext 1, i8 zeroext 0, i64 0, i32 1, i16 zeroext %conv19, i64 0, i8 zeroext 1 ) ; <i32> [#uses=0]
ret i32 undef
}
declare i32 @func_18(i16 zeroext, i16 zeroext, i16 zeroext, i16 zeroext, i8 zeroext, i64, i32, i16 zeroext, i64, i8 zeroext)

View File

@ -0,0 +1,47 @@
; RUN: llvm-as < %s | llc -march=x86-64 > %t
; RUN: grep mov %t | count 8
; RUN: not grep implicit %t
; Avoid partial register updates; don't define an i8 register and read
; the i32 super-register.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin9.6"
%struct.RC4_KEY = type { i8, i8, [256 x i8] }
define void @foo(%struct.RC4_KEY* nocapture %key, i64 %len, i8* %indata, i8* %outdata) nounwind {
entry:
br label %bb24
bb24: ; preds = %bb24, %entry
%0 = load i8* null, align 1 ; <i8> [#uses=1]
%1 = zext i8 %0 to i64 ; <i64> [#uses=1]
%2 = shl i64 %1, 32 ; <i64> [#uses=1]
%3 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0 ; <i8*> [#uses=1]
%4 = load i8* %3, align 1 ; <i8> [#uses=2]
%5 = add i8 %4, 0 ; <i8> [#uses=2]
%6 = zext i8 %5 to i64 ; <i64> [#uses=0]
%7 = load i8* null, align 1 ; <i8> [#uses=1]
%8 = zext i8 %4 to i32 ; <i32> [#uses=1]
%9 = zext i8 %7 to i32 ; <i32> [#uses=1]
%10 = add i32 %9, %8 ; <i32> [#uses=1]
%11 = and i32 %10, 255 ; <i32> [#uses=1]
%12 = zext i32 %11 to i64 ; <i64> [#uses=1]
%13 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %12 ; <i8*> [#uses=1]
%14 = load i8* %13, align 1 ; <i8> [#uses=1]
%15 = zext i8 %14 to i64 ; <i64> [#uses=1]
%16 = shl i64 %15, 48 ; <i64> [#uses=1]
%17 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0 ; <i8*> [#uses=1]
%18 = load i8* %17, align 1 ; <i8> [#uses=2]
%19 = add i8 %18, %5 ; <i8> [#uses=1]
%20 = zext i8 %19 to i64 ; <i64> [#uses=1]
%21 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %20 ; <i8*> [#uses=1]
store i8 %18, i8* %21, align 1
%22 = or i64 0, %2 ; <i64> [#uses=1]
%23 = or i64 %22, 0 ; <i64> [#uses=1]
%24 = or i64 %23, %16 ; <i64> [#uses=1]
%25 = or i64 %24, 0 ; <i64> [#uses=1]
%26 = xor i64 %25, 0 ; <i64> [#uses=1]
store i64 %26, i64* null, align 8
br label %bb24
}

View File

@ -0,0 +1,34 @@
; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1
; Do zextload, instead of a load and a separate zext.
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin9.6"
%struct.move_s = type { i32, i32, i32, i32, i32, i32 }
%struct.node_t = type { i8, i8, i8, i8, i32, i32, %struct.node_t**, %struct.node_t*, %struct.move_s }
define fastcc void @set_proof_and_disproof_numbers(%struct.node_t* nocapture %node) nounwind {
entry:
%0 = load i8* null, align 1 ; <i8> [#uses=1]
switch i8 %0, label %return [
i8 2, label %bb31
i8 0, label %bb80
i8 1, label %bb82
i8 3, label %bb84
]
bb31: ; preds = %entry
unreachable
bb80: ; preds = %entry
ret void
bb82: ; preds = %entry
ret void
bb84: ; preds = %entry
ret void
return: ; preds = %entry
ret void
}