[PowerPC] ELFv2 function call changes

This patch builds upon the two preceding MC changes to implement the
basic ELFv2 function call convention.  In the ELFv1 ABI, a "function
descriptor" was associated with every function, pointing to both the
entry address and the related TOC base (and a static chain pointer
for nested functions).  Function pointers would actually refer to that
descriptor, and the indirect call sequence needed to load up both entry
address and TOC base.

In the ELFv2 ABI, there are no more function descriptors, and function
pointers simply refer to the (global) entry point of the function code.
Indirect function calls simply branch to that address, after loading it
up into r12 (as required by the ABI rules for a global entry point).
Direct function calls continue to just do a "bl" to the target symbol;
this will be resolved by the linker to the local entry point of the
target function if it is local, and to a PLT stub if it is global.
That PLT stub would then load the (global) entry point address of the
final target into r12 and branch to it.  Note that when performing a
local function call, r2 must be set up to point to the current TOC
base: if the target ends up local, the ABI requires that its local
entry point is called with r2 set up; if the target ends up global,
the PLT stub requires that r2 is set up.

This patch implements all LLVM changes to implement that scheme:
- No longer create a function descriptor when emitting a function
  definition (in EmitFunctionEntryLabel)
- Emit two entry points *if* the function needs the TOC base (r2)
  anywhere (this is done EmitFunctionBodyStart; note that this cannot
  be done in EmitFunctionBodyStart because the global entry point
  prologue code must be *part* of the function as covered by debug info).
- In order to make use tracking of r2 (as needed above) work correctly,
  mark direct function calls as implicitly using r2.
- Implement the ELFv2 indirect function call sequence (no function
  descriptors; load target address into r12).
- When creating an ELFv2 object file, emit the .abiversion 2 directive
  to tell the linker to create the appropriate version of PLT stubs.  

Reviewed by Hal Finkel.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213489 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Ulrich Weigand 2014-07-20 23:31:44 +00:00
parent 07d4e558b3
commit edfd4f18bc
6 changed files with 160 additions and 3 deletions

View File

@ -33,6 +33,7 @@
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfoImpls.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DebugInfo.h"
@ -106,6 +107,7 @@ namespace {
void EmitFunctionEntryLabel() override;
void EmitFunctionBodyStart() override;
void EmitFunctionBodyEnd() override;
};
@ -781,6 +783,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
}
void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) {
if (Subtarget.isELFv2ABI()) {
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
if (TS)
TS->emitAbiVersion(2);
}
if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_)
return AsmPrinter::EmitStartOfAsmFile(M);
@ -834,7 +844,11 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() {
} else
return AsmPrinter::EmitFunctionEntryLabel();
}
// ELFv2 ABI - Normal entry label.
if (Subtarget.isELFv2ABI())
return AsmPrinter::EmitFunctionEntryLabel();
// Emit an official procedure descriptor.
MCSectionSubPair Current = OutStreamer.getCurrentSection();
const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd",
@ -919,6 +933,68 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) {
return AsmPrinter::doFinalization(M);
}
/// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2.
void PPCLinuxAsmPrinter::EmitFunctionBodyStart() {
// In the ELFv2 ABI, in functions that use the TOC register, we need to
// provide two entry points. The ABI guarantees that when calling the
// local entry point, r2 is set up by the caller to contain the TOC base
// for this function, and when calling the global entry point, r12 is set
// up by the caller to hold the address of the global entry point. We
// thus emit a prefix sequence along the following lines:
//
// func:
// # global entry point
// addis r2,r12,(.TOC.-func)@ha
// addi r2,r2,(.TOC.-func)@l
// .localentry func, .-func
// # local entry point, followed by function body
//
// This ensures we have r2 set up correctly while executing the function
// body, no matter which entry point is called.
if (Subtarget.isELFv2ABI()
// Only do all that if the function uses r2 in the first place.
&& !MF->getRegInfo().use_empty(PPC::X2)) {
MCSymbol *GlobalEntryLabel = OutContext.CreateTempSymbol();
OutStreamer.EmitLabel(GlobalEntryLabel);
const MCSymbolRefExpr *GlobalEntryLabelExp =
MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext);
MCSymbol *TOCSymbol = OutContext.GetOrCreateSymbol(StringRef(".TOC."));
const MCExpr *TOCDeltaExpr =
MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext),
GlobalEntryLabelExp, OutContext);
const MCExpr *TOCDeltaHi =
PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext);
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS)
.addReg(PPC::X2)
.addReg(PPC::X12)
.addExpr(TOCDeltaHi));
const MCExpr *TOCDeltaLo =
PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext);
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI)
.addReg(PPC::X2)
.addReg(PPC::X2)
.addExpr(TOCDeltaLo));
MCSymbol *LocalEntryLabel = OutContext.CreateTempSymbol();
OutStreamer.EmitLabel(LocalEntryLabel);
const MCSymbolRefExpr *LocalEntryLabelExp =
MCSymbolRefExpr::Create(LocalEntryLabel, OutContext);
const MCExpr *LocalOffsetExp =
MCBinaryExpr::CreateSub(LocalEntryLabelExp,
GlobalEntryLabelExp, OutContext);
PPCTargetStreamer *TS =
static_cast<PPCTargetStreamer *>(OutStreamer.getTargetStreamer());
if (TS)
TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp);
}
}
/// EmitFunctionBodyEnd - Print the traceback table before the .size
/// directive.
///

View File

@ -1498,6 +1498,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) {
for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II)
MIB.addReg(RegArgs[II], RegState::Implicit);
// Direct calls in the ELFv2 ABI need the TOC register live into the call.
if (PPCSubTarget->isELFv2ABI())
MIB.addReg(PPC::X2, RegState::Implicit);
// Add a register mask with the call-preserved registers. Proper
// defs for return values will be added by setPhysRegsDeadExcept().
MIB.addRegMask(TRI.getCallPreservedMask(CC));

View File

@ -3371,6 +3371,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
bool isPPC64 = Subtarget.isPPC64();
bool isSVR4ABI = Subtarget.isSVR4ABI();
bool isELFv2ABI = Subtarget.isELFv2ABI();
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
NodeTys.push_back(MVT::Other); // Returns a chain
@ -3440,7 +3441,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
// to do the call, we can't use PPCISD::CALL.
SDValue MTCTROps[] = {Chain, Callee, InFlag};
if (isSVR4ABI && isPPC64) {
if (isSVR4ABI && isPPC64 && !isELFv2ABI) {
// Function pointers in the 64-bit SVR4 ABI do not point to the function
// entry point, but to the function descriptor (the function entry point
// address is part of the function descriptor though).
@ -3520,7 +3521,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
CallOpc = PPCISD::BCTRL;
Callee.setNode(nullptr);
// Add use of X11 (holding environment pointer)
if (isSVR4ABI && isPPC64)
if (isSVR4ABI && isPPC64 && !isELFv2ABI)
Ops.push_back(DAG.getRegister(PPC::X11, PtrVT));
// Add CTR register as callee so a bctr can be emitted later.
if (isTailCall)
@ -3542,6 +3543,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
Ops.push_back(DAG.getRegister(RegsToPass[i].first,
RegsToPass[i].second.getValueType()));
// Direct calls in the ELFv2 ABI need the TOC register live into the call.
if (Callee.getNode() && isELFv2ABI)
Ops.push_back(DAG.getRegister(PPC::X2, PtrVT));
return CallOpc;
}
@ -3988,6 +3993,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
SDLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
bool isELFv2ABI = Subtarget.isELFv2ABI();
bool isLittleEndian = Subtarget.isLittleEndian();
unsigned NumOps = Outs.size();
@ -4373,6 +4379,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
false, false, 0);
// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
// This does not mean the MTCTR instruction must use R12; it's easier
// to model this as an extra parameter, so do that.
if (isELFv2ABI)
RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee));
}
// Build a sequence of copy-to-reg nodes chained together with token chain

View File

@ -227,6 +227,9 @@ public:
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
/// FIXME: Should use a command-line option.
bool isELFv2ABI() const { return isPPC64() && isSVR4ABI() &&
isLittleEndian(); }
bool enableEarlyIfConversion() const override { return hasISEL(); }

View File

@ -0,0 +1,17 @@
; RUN: llc -march=ppc64le -mcpu=pwr8 < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
; Indirect calls requires a full stub creation
define void @test_indirect(void ()* nocapture %fp) {
; CHECK-LABEL: @test_indirect
tail call void %fp()
; CHECK-DAG: std 2, 40(1)
; CHECK-DAG: mr 12, 3
; CHECK-DAG: mtctr 3
; CHECK: bctrl
; CHECK-NEXT: ld 2, 40(1)
ret void
}

View File

@ -0,0 +1,46 @@
; RUN: llc -march=ppc64le -mcpu=pwr8 < %s | FileCheck %s
; RUN: llc -march=ppc64le -mcpu=pwr8 -O0 < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
@number64 = global i64 10, align 8
; CHECK: .abiversion 2
define i64 @use_toc(i64 %a) nounwind {
entry:
; CHECK-LABEL: @use_toc
; CHECK-NEXT: .Ltmp[[TMP1:[0-9]+]]:
; CHECK-NEXT: addis 2, 12, .TOC.-.Ltmp[[TMP1]]@ha
; CHECK-NEXT: addi 2, 2, .TOC.-.Ltmp[[TMP1]]@l
; CHECK-NEXT: .Ltmp[[TMP2:[0-9]+]]:
; CHECK-NEXT: .localentry use_toc, .Ltmp[[TMP2]]-.Ltmp[[TMP1]]
; CHECK-NEXT: %entry
%0 = load i64* @number64, align 8
%cmp = icmp eq i64 %0, %a
%conv1 = zext i1 %cmp to i64
ret i64 %conv1
}
declare void @callee()
define void @use_toc_implicit() nounwind {
entry:
; CHECK-LABEL: @use_toc_implicit
; CHECK-NEXT: .Ltmp[[TMP1:[0-9]+]]:
; CHECK-NEXT: addis 2, 12, .TOC.-.Ltmp[[TMP1]]@ha
; CHECK-NEXT: addi 2, 2, .TOC.-.Ltmp[[TMP1]]@l
; CHECK-NEXT: .Ltmp[[TMP2:[0-9]+]]:
; CHECK-NEXT: .localentry use_toc_implicit, .Ltmp[[TMP2]]-.Ltmp[[TMP1]]
; CHECK-NEXT: %entry
call void @callee()
ret void
}
define i64 @no_toc(i64 %a) nounwind {
entry:
; CHECK-LABEL: @no_toc
; CHECK-NEXT: %entry
ret i64 %a
}