From ab6c3bb44d254d1b3c9dcce74ba3a917558feab3 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 25 Aug 2008 21:27:18 +0000 Subject: [PATCH] Try approach to moving call address load inside of callseq_start. Now it's done during the preprocess of x86 isel. callseq_start's chain is changed to load's chain node; while load's chain is the last of callseq_start or the loads or copytoreg nodes inserted to move arguments to the right spot. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@55338 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86ISelDAGToDAG.cpp | 139 ++++++++++++++++++++++------- lib/Target/X86/X86ISelLowering.cpp | 4 +- test/CodeGen/X86/fold-call-2.ll | 1 - 3 files changed, 108 insertions(+), 36 deletions(-) diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 29986e50d6d..3f9b1c20d7d 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -373,12 +373,12 @@ bool X86DAGToDAGISel::CanBeFoldedBy(SDNode *N, SDNode *U, SDNode *Root) const { /// load's chain result. static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load, SDValue Store, SDValue TF) { - std::vector Ops; + SmallVector Ops; for (unsigned i = 0, e = TF.Val->getNumOperands(); i != e; ++i) - if (Load.Val == TF.Val->getOperand(i).Val) - Ops.push_back(Load.Val->getOperand(0)); + if (Load.Val == TF.getOperand(i).Val) + Ops.push_back(Load.getOperand(0)); else - Ops.push_back(TF.Val->getOperand(i)); + Ops.push_back(TF.getOperand(i)); CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size()); CurDAG->UpdateNodeOperands(Load, TF, Load.getOperand(1), Load.getOperand(2)); CurDAG->UpdateNodeOperands(Store, Load.getValue(1), Store.getOperand(1), @@ -411,6 +411,49 @@ static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, return false; } +/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain +/// operand and move load below the call's chain operand. +static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load, + SDValue Call, SDValue Chain) { + SmallVector Ops; + for (unsigned i = 0, e = Chain.Val->getNumOperands(); i != e; ++i) + if (Load.Val == Chain.getOperand(i).Val) + Ops.push_back(Load.getOperand(0)); + else + Ops.push_back(Chain.getOperand(i)); + CurDAG->UpdateNodeOperands(Chain, &Ops[0], Ops.size()); + CurDAG->UpdateNodeOperands(Load, Call.getOperand(0), + Load.getOperand(1), Load.getOperand(2)); + Ops.clear(); + Ops.push_back(SDValue(Load.Val, 1)); + for (unsigned i = 1, e = Call.Val->getNumOperands(); i != e; ++i) + Ops.push_back(Call.getOperand(i)); + CurDAG->UpdateNodeOperands(Call, &Ops[0], Ops.size()); +} + +/// isCalleeLoad - Return true if call address is a load and it can be +/// moved below CALLSEQ_START and the chains leading up to the call. +/// Return the CALLSEQ_START by reference as a second output. +static bool isCalleeLoad(SDValue Callee, SDValue &Chain) { + if (Callee.Val == Chain.Val || !Callee.hasOneUse()) + return false; + LoadSDNode *LD = dyn_cast(Callee.Val); + if (!LD || + LD->isVolatile() || + LD->getAddressingMode() != ISD::UNINDEXED || + LD->getExtensionType() != ISD::NON_EXTLOAD) + return false; + + // Now let's find the callseq_start. + while (Chain.getOpcode() != ISD::CALLSEQ_START) { + if (!Chain.hasOneUse()) + return false; + Chain = Chain.getOperand(0); + } + return Chain.getOperand(0).Val == Callee.Val; +} + + /// PreprocessForRMW - Preprocess the DAG to make instruction selection better. /// This is only run if not in -fast mode (aka -O0). /// This allows the instruction selector to pick more read-modify-write @@ -454,9 +497,39 @@ static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address, void X86DAGToDAGISel::PreprocessForRMW() { for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), E = CurDAG->allnodes_end(); I != E; ++I) { + if (I->getOpcode() == X86ISD::CALL) { + /// Also try moving call address load from outside callseq_start to just + /// before the call to allow it to be folded. + /// + /// [Load chain] + /// ^ + /// | + /// [Load] + /// ^ ^ + /// | | + /// / \-- + /// / | + ///[CALLSEQ_START] | + /// ^ | + /// | | + /// [LOAD/C2Reg] | + /// | | + /// \ / + /// \ / + /// [CALL] + SDValue Chain = I->getOperand(0); + SDValue Load = I->getOperand(1); + if (!isCalleeLoad(Load, Chain)) + continue; + MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain); + ++NumLoadMoved; + continue; + } + if (!ISD::isNON_TRUNCStore(I)) continue; SDValue Chain = I->getOperand(0); + if (Chain.Val->getOpcode() != ISD::TokenFactor) continue; @@ -471,35 +544,35 @@ void X86DAGToDAGISel::PreprocessForRMW() { SDValue Load; unsigned Opcode = N1.Val->getOpcode(); switch (Opcode) { - case ISD::ADD: - case ISD::MUL: - case ISD::AND: - case ISD::OR: - case ISD::XOR: - case ISD::ADDC: - case ISD::ADDE: - case ISD::VECTOR_SHUFFLE: { - SDValue N10 = N1.getOperand(0); - SDValue N11 = N1.getOperand(1); - RModW = isRMWLoad(N10, Chain, N2, Load); - if (!RModW) - RModW = isRMWLoad(N11, Chain, N2, Load); - break; - } - case ISD::SUB: - case ISD::SHL: - case ISD::SRA: - case ISD::SRL: - case ISD::ROTL: - case ISD::ROTR: - case ISD::SUBC: - case ISD::SUBE: - case X86ISD::SHLD: - case X86ISD::SHRD: { - SDValue N10 = N1.getOperand(0); - RModW = isRMWLoad(N10, Chain, N2, Load); - break; - } + case ISD::ADD: + case ISD::MUL: + case ISD::AND: + case ISD::OR: + case ISD::XOR: + case ISD::ADDC: + case ISD::ADDE: + case ISD::VECTOR_SHUFFLE: { + SDValue N10 = N1.getOperand(0); + SDValue N11 = N1.getOperand(1); + RModW = isRMWLoad(N10, Chain, N2, Load); + if (!RModW) + RModW = isRMWLoad(N11, Chain, N2, Load); + break; + } + case ISD::SUB: + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + case ISD::ROTL: + case ISD::ROTR: + case ISD::SUBC: + case ISD::SUBE: + case X86ISD::SHLD: + case X86ISD::SHRD: { + SDValue N10 = N1.getOperand(0); + RModW = isRMWLoad(N10, Chain, N2, Load); + break; + } } if (RModW) { diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 5d8cb1cd291..86016832e89 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1457,12 +1457,12 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF, SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) { MachineFunction &MF = DAG.getMachineFunction(); - SDValue Chain = Op.getOperand(0); + SDValue Chain = Op.getOperand(0); unsigned CC = cast(Op.getOperand(1))->getValue(); bool isVarArg = cast(Op.getOperand(2))->getValue() != 0; bool IsTailCall = cast(Op.getOperand(3))->getValue() != 0 && CC == CallingConv::Fast && PerformTailCallOpt; - SDValue Callee = Op.getOperand(4); + SDValue Callee = Op.getOperand(4); bool Is64Bit = Subtarget->is64Bit(); bool IsStructRet = CallIsStructReturn(Op); diff --git a/test/CodeGen/X86/fold-call-2.ll b/test/CodeGen/X86/fold-call-2.ll index fe060a7ea5a..349f986830a 100644 --- a/test/CodeGen/X86/fold-call-2.ll +++ b/test/CodeGen/X86/fold-call-2.ll @@ -1,5 +1,4 @@ ; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin | grep mov | count 1 -; XFAIL: * @f = external global void ()* ; [#uses=1]