[mips] Make sure loads from lazy-binding entries do not get CSE'd or hoisted out

of loops.

Previously, two consecutive calls to function "func" would result in the
following sequence of instructions:

1. load $16, %got(func)($gp) // load address of lazy-binding stub.
2. move $25, $16
3. jalr $25                  // jump to lazy-binding stub.
4. nop
5. move $25, $16
6. jalr $25                  // jump to lazy-binding stub again.

With this patch, the second call directly jumps to func's address, bypassing
the lazy-binding resolution routine:

1. load $25, %got(func)($gp) // load address of lazy-binding stub.
2. jalr $25                  // jump to lazy-binding stub.
3. nop
4. load $25, %got(func)($gp) // load resolved address of func.
5. jalr $25                  // directly jump to func.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191591 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Akira Hatanaka 2013-09-28 00:12:32 +00:00
parent e5f32cf320
commit 6ff59a16a0
8 changed files with 90 additions and 28 deletions

View File

@ -419,6 +419,8 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
bool IsPICCall, bool GlobalOrExternal, bool InternalLinkage,
CallLoweringInfo &CLI, SDValue Callee, SDValue Chain) const {
SelectionDAG &DAG = CLI.DAG;
MachineFunction &MF = DAG.getMachineFunction();
MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
const char* Mips16HelperFunction = 0;
bool NeedMips16Helper = false;
@ -474,9 +476,10 @@ getOpndList(SmallVectorImpl<SDValue> &Ops,
if (NeedMips16Helper) {
RegsToPass.push_front(std::make_pair(V0Reg, Callee));
JumpTarget = DAG.getExternalSymbol(Mips16HelperFunction, getPointerTy());
JumpTarget = getAddrGlobal(cast<ExternalSymbolSDNode>(JumpTarget),
JumpTarget.getValueType(), DAG,
MipsII::MO_GOT);
ExternalSymbolSDNode *S = cast<ExternalSymbolSDNode>(JumpTarget);
JumpTarget = getAddrGlobal(S, JumpTarget.getValueType(), DAG,
MipsII::MO_GOT, Chain,
FuncInfo->callPtrInfo(S->getSymbol()));
} else
RegsToPass.push_front(std::make_pair((unsigned)Mips::T9, Callee));
}

View File

@ -421,8 +421,7 @@ bool LoadFromStackOrConst::hasHazard_(const MachineInstr &MI) {
return false;
if (const PseudoSourceValue *PSV = dyn_cast<const PseudoSourceValue>(V))
return !PSV->PseudoSourceValue::isConstant(0) &&
(V != PseudoSourceValue::getStack());
return !PSV->isConstant(0) && V != PseudoSourceValue::getStack();
return true;
}

View File

@ -1468,10 +1468,12 @@ SDValue MipsTargetLowering::lowerGlobalAddress(SDValue Op,
if (LargeGOT)
return getAddrGlobalLargeGOT(N, Ty, DAG, MipsII::MO_GOT_HI16,
MipsII::MO_GOT_LO16);
MipsII::MO_GOT_LO16, DAG.getEntryNode(),
MachinePointerInfo::getGOT());
return getAddrGlobal(N, Ty, DAG,
HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16);
HasMips64 ? MipsII::MO_GOT_DISP : MipsII::MO_GOT16,
DAG.getEntryNode(), MachinePointerInfo::getGOT());
}
SDValue MipsTargetLowering::lowerBlockAddress(SDValue Op,
@ -2313,6 +2315,7 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *MFI = MF.getFrameInfo();
const TargetFrameLowering *TFL = MF.getTarget().getFrameLowering();
MipsFunctionInfo *FuncInfo = MF.getInfo<MipsFunctionInfo>();
bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
// Analyze operands of the call, assigning locations to each operand.
@ -2446,29 +2449,36 @@ MipsTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
if (IsPICCall) {
InternalLinkage = G->getGlobal()->hasInternalLinkage();
const GlobalValue *Val = G->getGlobal();
InternalLinkage = Val->hasInternalLinkage();
if (InternalLinkage)
Callee = getAddrLocal(G, Ty, DAG, HasMips64);
else if (LargeGOT)
Callee = getAddrGlobalLargeGOT(G, Ty, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16);
MipsII::MO_CALL_LO16, Chain,
FuncInfo->callPtrInfo(Val));
else
Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL);
Callee = getAddrGlobal(G, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
FuncInfo->callPtrInfo(Val));
} else
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, getPointerTy(), 0,
MipsII::MO_NO_FLAG);
GlobalOrExternal = true;
}
else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
const char *Sym = S->getSymbol();
if (!IsN64 && !IsPIC) // !N64 && static
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
Callee = DAG.getTargetExternalSymbol(Sym, getPointerTy(),
MipsII::MO_NO_FLAG);
else if (LargeGOT)
Callee = getAddrGlobalLargeGOT(S, Ty, DAG, MipsII::MO_CALL_HI16,
MipsII::MO_CALL_LO16);
MipsII::MO_CALL_LO16, Chain,
FuncInfo->callPtrInfo(Sym));
else // N64 || PIC
Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL);
Callee = getAddrGlobal(S, Ty, DAG, MipsII::MO_GOT_CALL, Chain,
FuncInfo->callPtrInfo(Sym));
GlobalOrExternal = true;
}

View File

@ -275,12 +275,12 @@ namespace llvm {
// (load (wrapper $gp, %got(sym)))
template<class NodeTy>
SDValue getAddrGlobal(NodeTy *N, EVT Ty, SelectionDAG &DAG,
unsigned Flag) const {
unsigned Flag, SDValue Chain,
const MachinePointerInfo &PtrInfo) const {
SDLoc DL(N);
SDValue Tgt = DAG.getNode(MipsISD::Wrapper, DL, Ty, getGlobalReg(DAG, Ty),
getTargetNode(N, Ty, DAG, Flag));
return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Tgt,
MachinePointerInfo::getGOT(), false, false, false, 0);
return DAG.getLoad(Ty, DL, Chain, Tgt, PtrInfo, false, false, false, 0);
}
// This method creates the following nodes, which are necessary for
@ -289,15 +289,17 @@ namespace llvm {
// (load (wrapper (add %hi(sym), $gp), %lo(sym)))
template<class NodeTy>
SDValue getAddrGlobalLargeGOT(NodeTy *N, EVT Ty, SelectionDAG &DAG,
unsigned HiFlag, unsigned LoFlag) const {
unsigned HiFlag, unsigned LoFlag,
SDValue Chain,
const MachinePointerInfo &PtrInfo) const {
SDLoc DL(N);
SDValue Hi = DAG.getNode(MipsISD::Hi, DL, Ty,
getTargetNode(N, Ty, DAG, HiFlag));
Hi = DAG.getNode(ISD::ADD, DL, Ty, Hi, getGlobalReg(DAG, Ty));
SDValue Wrapper = DAG.getNode(MipsISD::Wrapper, DL, Ty, Hi,
getTargetNode(N, Ty, DAG, LoFlag));
return DAG.getLoad(Ty, DL, DAG.getEntryNode(), Wrapper,
MachinePointerInfo::getGOT(), false, false, false, 0);
return DAG.getLoad(Ty, DL, Chain, Wrapper, PtrInfo, false, false, false,
0);
}
// This method creates the following nodes, which are necessary for

View File

@ -160,7 +160,14 @@ for.end: ; preds = %for.body, %entry
;
; SUCCBB-LABEL: succbbs_br1:
; SUCCBB: beqz ${{[0-9]+}}, $BB
; SUCCBB-NEXT: lw $25, %call16(foo100)
; SUCCBB-NEXT: lw ${{[0-9]+}}, %got(foo101)(${{[0-9]+}})
define internal fastcc void @foo101() {
entry:
tail call void @foo100()
tail call void @foo100()
ret void
}
define void @succbbs_br1(i32 %a) {
entry:
@ -168,7 +175,7 @@ entry:
br i1 %tobool, label %if.end, label %if.then
if.then: ; preds = %entry
tail call void @foo100() #1
tail call fastcc void @foo101()
br label %if.end
if.end: ; preds = %entry, %if.then

View File

@ -9,11 +9,11 @@ entry:
; CHECK: lw $25, %call16(ff1)
; CHECK: jalr
tail call void @ff1(i32 %i, i64 1085102592623924856) nounwind
; CHECK: lw $25, %call16(ff2)
; CHECK: lw $[[R2:[0-9]+]], 80($sp)
; CHECK: lw $[[R3:[0-9]+]], 84($sp)
; CHECK: move $4, $[[R2]]
; CHECK: move $5, $[[R3]]
; CHECK-DAG: lw $25, %call16(ff2)
; CHECK-DAG: lw $[[R2:[0-9]+]], 80($sp)
; CHECK-DAG: lw $[[R3:[0-9]+]], 84($sp)
; CHECK-DAG: move $4, $[[R2]]
; CHECK-DAG: move $5, $[[R3]]
; CHECK: jalr $25
tail call void @ff2(i64 %ll, double 3.000000e+00) nounwind
%sub = add nsw i32 %i, -1

View File

@ -18,11 +18,11 @@ entry:
; 64: dsll $[[R0]], $[[R0]], 48
; 64: daddiu $[[R0]], $[[R0]], -1
; 64: dsll $[[R0]], $[[R0]], 16
; 64: daddiu $[[R0]], $[[R0]], -48
; 64: daddiu $[[R0]], $[[R0]], -32
; 64: daddu $sp, $sp, $[[R0]]
; 64: lui $[[R1:[0-9]+]], 1
; 64: daddu $[[R1]], $sp, $[[R1]]
; 64: sd $ra, 40($[[R1]])
; 64: sd $ra, 24($[[R1]])
%agg.tmp = alloca %struct.S1, align 1
%tmp = getelementptr inbounds %struct.S1* %agg.tmp, i32 0, i32 0, i32 0

View File

@ -0,0 +1,41 @@
; RUN: llc -march=mipsel < %s | FileCheck %s
; CHECK-LABEL: foo6:
; CHECK: %while.body
; CHECK: lw $25, %call16(foo2)(${{[0-9]+}})
; CHECK: jalr $25
; CHECK: %while.end
define void @foo6(i32 %n) {
entry:
%tobool1 = icmp eq i32 %n, 0
br i1 %tobool1, label %while.end, label %while.body
while.body: ; preds = %entry, %while.body
%n.addr.02 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
%dec = add nsw i32 %n.addr.02, -1
tail call void @foo2()
%tobool = icmp eq i32 %dec, 0
br i1 %tobool, label %while.end, label %while.body
while.end: ; preds = %while.body, %entry
ret void
}
declare void @foo2()
; CHECK-LABEL: foo1:
; CHECK: lw $25, %call16(foo2)(${{[0-9]+}})
; CHECK: jalr $25
; CHECK: lw $25, %call16(foo2)(${{[0-9]+}})
; CHECK: jalr $25
; CHECK: lw $25, %call16(foo2)(${{[0-9]+}})
; CHECK: jalr $25
define void @foo1() {
entry:
tail call void @foo2()
tail call void @foo2()
tail call void @foo2()
ret void
}