diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index e874f1b1ec1..5af12eb1745 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -2874,7 +2874,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: -// "fold ({s|z}ext (load x)) -> ({s|z}ext (truncate ({s|z}extload x)))" +// "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" // transformation. Returns true if extension are possible and the above // mentioned transformation is profitable. static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, @@ -2889,8 +2889,10 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, SDNode *User = *UI; if (User == N) continue; + if (UI.getUse().getResNo() != N0.getResNo()) + continue; // FIXME: Only extend SETCC N, N and SETCC N, c for now. - if (User->getOpcode() == ISD::SETCC) { + if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { ISD::CondCode CC = cast(User->getOperand(2))->get(); if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) // Sign bits will be lost after a zext. @@ -2906,32 +2908,25 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, } if (Add) ExtendNodes.push_back(User); - } else { - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { - SDValue UseOp = User->getOperand(i); - if (UseOp == N0) { - // If truncate from extended type to original load type is free - // on this target, then it's ok to extend a CopyToReg. - if (isTruncFree && User->getOpcode() == ISD::CopyToReg) - HasCopyToRegUses = true; - else - return false; - } - } + continue; } + // If truncates aren't free and there are users we can't + // extend, it isn't worthwhile. + if (!isTruncFree) + return false; + // Remember if this value is live-out. + if (User->getOpcode() == ISD::CopyToReg) + HasCopyToRegUses = true; } if (HasCopyToRegUses) { bool BothLiveOut = false; for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); UI != UE; ++UI) { - SDNode *User = *UI; - for (unsigned i = 0, e = User->getNumOperands(); i != e; ++i) { - SDValue UseOp = User->getOperand(i); - if (UseOp.getNode() == N && UseOp.getResNo() == 0) { - BothLiveOut = true; - break; - } + SDUse &Use = UI.getUse(); + if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { + BothLiveOut = true; + break; } } if (BothLiveOut) @@ -3013,8 +3008,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), - VT, LN0->getChain(), + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), LN0->getBasePtr(), LN0->getSrcValue(), LN0->getSrcValueOffset(), N0.getValueType(), @@ -3034,8 +3029,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { if (SOp == Trunc) Ops.push_back(ExtLoad); else - Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), - VT, SOp)); + Ops.push_back(DAG.getNode(ISD::SIGN_EXTEND, + N->getDebugLoc(), VT, SOp)); } Ops.push_back(SetCC->getOperand(2)); @@ -3278,26 +3273,48 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { } // fold (aext (load x)) -> (aext (truncate (extload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && + if (ISD::isNON_EXTLoad(N0.getNode()) && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { - LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, - LN0->getChain(), - LN0->getBasePtr(), LN0->getSrcValue(), - LN0->getSrcValueOffset(), - N0.getValueType(), - LN0->isVolatile(), LN0->getAlignment()); - CombineTo(N, ExtLoad); - // Redirect any chain users to the new load. - DAG.ReplaceAllUsesOfValueWith(SDValue(LN0, 1), - SDValue(ExtLoad.getNode(), 1)); - // If any node needs the original loaded value, recompute it. - if (!LN0->use_empty()) - CombineTo(LN0, DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), - N0.getValueType(), ExtLoad), - ExtLoad.getValue(1)); - return SDValue(N, 0); // Return N so it doesn't get rechecked! + bool DoXform = true; + SmallVector SetCCs; + if (!N0.hasOneUse()) + DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); + if (DoXform) { + LoadSDNode *LN0 = cast(N0); + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + LN0->getChain(), + LN0->getBasePtr(), LN0->getSrcValue(), + LN0->getSrcValueOffset(), + N0.getValueType(), + LN0->isVolatile(), LN0->getAlignment()); + CombineTo(N, ExtLoad); + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + N0.getValueType(), ExtLoad); + CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); + + // Extend SetCC uses if necessary. + for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { + SDNode *SetCC = SetCCs[i]; + SmallVector Ops; + + for (unsigned j = 0; j != 2; ++j) { + SDValue SOp = SetCC->getOperand(j); + if (SOp == Trunc) + Ops.push_back(ExtLoad); + else + Ops.push_back(DAG.getNode(ISD::ANY_EXTEND, + N->getDebugLoc(), VT, SOp)); + } + + Ops.push_back(SetCC->getOperand(2)); + CombineTo(SetCC, DAG.getNode(ISD::SETCC, N->getDebugLoc(), + SetCC->getValueType(0), + &Ops[0], Ops.size())); + } + + return SDValue(N, 0); // Return N so it doesn't get rechecked! + } } // fold (aext (zextload x)) -> (aext (truncate (zextload x))) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp index fd15603d990..d46c9c0630e 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp @@ -2190,8 +2190,24 @@ void SelectionDAGLowering::visitBinary(User &I, unsigned OpCode) { void SelectionDAGLowering::visitShift(User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - if (!isa(I.getType())) { - if (TLI.getPointerTy().bitsLT(Op2.getValueType())) + if (!isa(I.getType()) && + Op2.getValueType() != TLI.getShiftAmountTy()) { + // If the operand is smaller than the shift count type, promote it. + if (TLI.getShiftAmountTy().bitsGT(Op2.getValueType())) + Op2 = DAG.getNode(ISD::ANY_EXTEND, getCurDebugLoc(), + TLI.getShiftAmountTy(), Op2); + // If the operand is larger than the shift count type but the shift + // count type has enough bits to represent any shift value, truncate + // it now. This is a common case and it exposes the truncate to + // optimization early. + else if (TLI.getShiftAmountTy().getSizeInBits() >= + Log2_32_Ceil(Op2.getValueType().getSizeInBits())) + Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), + TLI.getShiftAmountTy(), Op2); + // Otherwise we'll need to temporarily settle for some other + // convenient type; type legalization will make adjustments as + // needed. + else if (TLI.getPointerTy().bitsLT(Op2.getValueType())) Op2 = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), TLI.getPointerTy(), Op2); else if (TLI.getPointerTy().bitsGT(Op2.getValueType())) diff --git a/test/CodeGen/X86/2008-09-10-SpillerBug2.ll b/test/CodeGen/X86/2008-09-10-SpillerBug2.ll deleted file mode 100644 index a1b4cccbce8..00000000000 --- a/test/CodeGen/X86/2008-09-10-SpillerBug2.ll +++ /dev/null @@ -1,40 +0,0 @@ -; RUN: llvm-as < %s | llc -march=x86 | grep movw | not grep %e.x -; PR2681 - -@g_491 = external global i32 ; [#uses=1] -@g_897 = external global i16 ; [#uses=1] - -define i32 @func_7(i16 signext %p_9) nounwind { -entry: - %p_9.addr = alloca i16 ; [#uses=2] - %l_1122 = alloca i16, align 2 ; [#uses=1] - %l_1128 = alloca i32, align 4 ; [#uses=1] - %l_1129 = alloca i32, align 4 ; [#uses=1] - %l_1130 = alloca i32, align 4 ; [#uses=1] - %tmp14 = load i16* %l_1122 ; [#uses=1] - %conv15 = sext i16 %tmp14 to i32 ; [#uses=1] - %tmp16 = load i16* %p_9.addr ; [#uses=1] - %conv17 = sext i16 %tmp16 to i32 ; [#uses=1] - %xor = xor i32 %conv15, %conv17 ; [#uses=1] - %tmp18 = load i32* null ; [#uses=1] - %or = or i32 %xor, %tmp18 ; [#uses=1] - %conv19 = trunc i32 %or to i16 ; [#uses=1] - %tmp28 = load i16* %p_9.addr ; [#uses=1] - %tmp33 = load i16* @g_897 ; [#uses=1] - %tmp34 = load i32* @g_491 ; [#uses=1] - %conv35 = trunc i32 %tmp34 to i16 ; [#uses=1] - %tmp36 = load i16* null ; [#uses=1] - %conv37 = trunc i16 %tmp36 to i8 ; [#uses=1] - %tmp38 = load i32* %l_1128 ; [#uses=1] - %conv39 = sext i32 %tmp38 to i64 ; [#uses=1] - %tmp42 = load i32* %l_1129 ; [#uses=1] - %conv43 = trunc i32 %tmp42 to i16 ; [#uses=1] - %tmp44 = load i32* %l_1130 ; [#uses=1] - %conv45 = sext i32 %tmp44 to i64 ; [#uses=1] - %call46 = call i32 @func_18( i16 zeroext 0, i16 zeroext 0, i16 zeroext %tmp33, i16 zeroext %conv35, i8 zeroext %conv37, i64 %conv39, i32 0, i16 zeroext %conv43, i64 %conv45, i8 zeroext 1 ) ; [#uses=0] - %call48 = call i32 @func_18( i16 zeroext 0, i16 zeroext 0, i16 zeroext 0, i16 zeroext 1, i8 zeroext 0, i64 0, i32 1, i16 zeroext %tmp28, i64 0, i8 zeroext 1 ) ; [#uses=0] - %call50 = call i32 @func_18( i16 zeroext 1, i16 zeroext 0, i16 zeroext 0, i16 zeroext 1, i8 zeroext 0, i64 0, i32 1, i16 zeroext %conv19, i64 0, i8 zeroext 1 ) ; [#uses=0] - ret i32 undef -} - -declare i32 @func_18(i16 zeroext, i16 zeroext, i16 zeroext, i16 zeroext, i8 zeroext, i64, i32, i16 zeroext, i64, i8 zeroext) diff --git a/test/CodeGen/X86/anyext-uses.ll b/test/CodeGen/X86/anyext-uses.ll new file mode 100644 index 00000000000..e8c3cf0e71b --- /dev/null +++ b/test/CodeGen/X86/anyext-uses.ll @@ -0,0 +1,47 @@ +; RUN: llvm-as < %s | llc -march=x86-64 > %t +; RUN: grep mov %t | count 8 +; RUN: not grep implicit %t + +; Avoid partial register updates; don't define an i8 register and read +; the i32 super-register. + +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128" +target triple = "x86_64-apple-darwin9.6" + %struct.RC4_KEY = type { i8, i8, [256 x i8] } + +define void @foo(%struct.RC4_KEY* nocapture %key, i64 %len, i8* %indata, i8* %outdata) nounwind { +entry: + br label %bb24 + +bb24: ; preds = %bb24, %entry + %0 = load i8* null, align 1 ; [#uses=1] + %1 = zext i8 %0 to i64 ; [#uses=1] + %2 = shl i64 %1, 32 ; [#uses=1] + %3 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0 ; [#uses=1] + %4 = load i8* %3, align 1 ; [#uses=2] + %5 = add i8 %4, 0 ; [#uses=2] + %6 = zext i8 %5 to i64 ; [#uses=0] + %7 = load i8* null, align 1 ; [#uses=1] + %8 = zext i8 %4 to i32 ; [#uses=1] + %9 = zext i8 %7 to i32 ; [#uses=1] + %10 = add i32 %9, %8 ; [#uses=1] + %11 = and i32 %10, 255 ; [#uses=1] + %12 = zext i32 %11 to i64 ; [#uses=1] + %13 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %12 ; [#uses=1] + %14 = load i8* %13, align 1 ; [#uses=1] + %15 = zext i8 %14 to i64 ; [#uses=1] + %16 = shl i64 %15, 48 ; [#uses=1] + %17 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 0 ; [#uses=1] + %18 = load i8* %17, align 1 ; [#uses=2] + %19 = add i8 %18, %5 ; [#uses=1] + %20 = zext i8 %19 to i64 ; [#uses=1] + %21 = getelementptr %struct.RC4_KEY* %key, i64 0, i32 2, i64 %20 ; [#uses=1] + store i8 %18, i8* %21, align 1 + %22 = or i64 0, %2 ; [#uses=1] + %23 = or i64 %22, 0 ; [#uses=1] + %24 = or i64 %23, %16 ; [#uses=1] + %25 = or i64 %24, 0 ; [#uses=1] + %26 = xor i64 %25, 0 ; [#uses=1] + store i64 %26, i64* null, align 8 + br label %bb24 +} diff --git a/test/CodeGen/X86/switch-zextload.ll b/test/CodeGen/X86/switch-zextload.ll new file mode 100644 index 00000000000..f3c701ff5f9 --- /dev/null +++ b/test/CodeGen/X86/switch-zextload.ll @@ -0,0 +1,34 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 1 + +; Do zextload, instead of a load and a separate zext. + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" +target triple = "i386-apple-darwin9.6" + %struct.move_s = type { i32, i32, i32, i32, i32, i32 } + %struct.node_t = type { i8, i8, i8, i8, i32, i32, %struct.node_t**, %struct.node_t*, %struct.move_s } + +define fastcc void @set_proof_and_disproof_numbers(%struct.node_t* nocapture %node) nounwind { +entry: + %0 = load i8* null, align 1 ; [#uses=1] + switch i8 %0, label %return [ + i8 2, label %bb31 + i8 0, label %bb80 + i8 1, label %bb82 + i8 3, label %bb84 + ] + +bb31: ; preds = %entry + unreachable + +bb80: ; preds = %entry + ret void + +bb82: ; preds = %entry + ret void + +bb84: ; preds = %entry + ret void + +return: ; preds = %entry + ret void +}