diff --git a/lib/Target/PowerPC/PPCAsmPrinter.cpp b/lib/Target/PowerPC/PPCAsmPrinter.cpp index 844d98d1a95..6f67c598c75 100644 --- a/lib/Target/PowerPC/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/PPCAsmPrinter.cpp @@ -33,6 +33,7 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" @@ -106,6 +107,7 @@ namespace { void EmitFunctionEntryLabel() override; + void EmitFunctionBodyStart() override; void EmitFunctionBodyEnd() override; }; @@ -781,6 +783,14 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) { } void PPCLinuxAsmPrinter::EmitStartOfAsmFile(Module &M) { + if (Subtarget.isELFv2ABI()) { + PPCTargetStreamer *TS = + static_cast(OutStreamer.getTargetStreamer()); + + if (TS) + TS->emitAbiVersion(2); + } + if (Subtarget.isPPC64() || TM.getRelocationModel() != Reloc::PIC_) return AsmPrinter::EmitStartOfAsmFile(M); @@ -834,7 +844,11 @@ void PPCLinuxAsmPrinter::EmitFunctionEntryLabel() { } else return AsmPrinter::EmitFunctionEntryLabel(); } - + + // ELFv2 ABI - Normal entry label. + if (Subtarget.isELFv2ABI()) + return AsmPrinter::EmitFunctionEntryLabel(); + // Emit an official procedure descriptor. MCSectionSubPair Current = OutStreamer.getCurrentSection(); const MCSectionELF *Section = OutStreamer.getContext().getELFSection(".opd", @@ -919,6 +933,68 @@ bool PPCLinuxAsmPrinter::doFinalization(Module &M) { return AsmPrinter::doFinalization(M); } +/// EmitFunctionBodyStart - Emit a global entry point prefix for ELFv2. +void PPCLinuxAsmPrinter::EmitFunctionBodyStart() { + // In the ELFv2 ABI, in functions that use the TOC register, we need to + // provide two entry points. The ABI guarantees that when calling the + // local entry point, r2 is set up by the caller to contain the TOC base + // for this function, and when calling the global entry point, r12 is set + // up by the caller to hold the address of the global entry point. We + // thus emit a prefix sequence along the following lines: + // + // func: + // # global entry point + // addis r2,r12,(.TOC.-func)@ha + // addi r2,r2,(.TOC.-func)@l + // .localentry func, .-func + // # local entry point, followed by function body + // + // This ensures we have r2 set up correctly while executing the function + // body, no matter which entry point is called. + if (Subtarget.isELFv2ABI() + // Only do all that if the function uses r2 in the first place. + && !MF->getRegInfo().use_empty(PPC::X2)) { + + MCSymbol *GlobalEntryLabel = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(GlobalEntryLabel); + const MCSymbolRefExpr *GlobalEntryLabelExp = + MCSymbolRefExpr::Create(GlobalEntryLabel, OutContext); + + MCSymbol *TOCSymbol = OutContext.GetOrCreateSymbol(StringRef(".TOC.")); + const MCExpr *TOCDeltaExpr = + MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create(TOCSymbol, OutContext), + GlobalEntryLabelExp, OutContext); + + const MCExpr *TOCDeltaHi = + PPCMCExpr::CreateHa(TOCDeltaExpr, false, OutContext); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDIS) + .addReg(PPC::X2) + .addReg(PPC::X12) + .addExpr(TOCDeltaHi)); + + const MCExpr *TOCDeltaLo = + PPCMCExpr::CreateLo(TOCDeltaExpr, false, OutContext); + EmitToStreamer(OutStreamer, MCInstBuilder(PPC::ADDI) + .addReg(PPC::X2) + .addReg(PPC::X2) + .addExpr(TOCDeltaLo)); + + MCSymbol *LocalEntryLabel = OutContext.CreateTempSymbol(); + OutStreamer.EmitLabel(LocalEntryLabel); + const MCSymbolRefExpr *LocalEntryLabelExp = + MCSymbolRefExpr::Create(LocalEntryLabel, OutContext); + const MCExpr *LocalOffsetExp = + MCBinaryExpr::CreateSub(LocalEntryLabelExp, + GlobalEntryLabelExp, OutContext); + + PPCTargetStreamer *TS = + static_cast(OutStreamer.getTargetStreamer()); + + if (TS) + TS->emitLocalEntry(CurrentFnSym, LocalOffsetExp); + } +} + /// EmitFunctionBodyEnd - Print the traceback table before the .size /// directive. /// diff --git a/lib/Target/PowerPC/PPCFastISel.cpp b/lib/Target/PowerPC/PPCFastISel.cpp index 92a0ec1a9b3..d9e011e18fe 100644 --- a/lib/Target/PowerPC/PPCFastISel.cpp +++ b/lib/Target/PowerPC/PPCFastISel.cpp @@ -1498,6 +1498,10 @@ bool PPCFastISel::SelectCall(const Instruction *I) { for (unsigned II = 0, IE = RegArgs.size(); II != IE; ++II) MIB.addReg(RegArgs[II], RegState::Implicit); + // Direct calls in the ELFv2 ABI need the TOC register live into the call. + if (PPCSubTarget->isELFv2ABI()) + MIB.addReg(PPC::X2, RegState::Implicit); + // Add a register mask with the call-preserved registers. Proper // defs for return values will be added by setPhysRegsDeadExcept(). MIB.addRegMask(TRI.getCallPreservedMask(CC)); diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index c671e02e813..2731053c17b 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -3371,6 +3371,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, bool isPPC64 = Subtarget.isPPC64(); bool isSVR4ABI = Subtarget.isSVR4ABI(); + bool isELFv2ABI = Subtarget.isELFv2ABI(); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); NodeTys.push_back(MVT::Other); // Returns a chain @@ -3440,7 +3441,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, // to do the call, we can't use PPCISD::CALL. SDValue MTCTROps[] = {Chain, Callee, InFlag}; - if (isSVR4ABI && isPPC64) { + if (isSVR4ABI && isPPC64 && !isELFv2ABI) { // Function pointers in the 64-bit SVR4 ABI do not point to the function // entry point, but to the function descriptor (the function entry point // address is part of the function descriptor though). @@ -3520,7 +3521,7 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, CallOpc = PPCISD::BCTRL; Callee.setNode(nullptr); // Add use of X11 (holding environment pointer) - if (isSVR4ABI && isPPC64) + if (isSVR4ABI && isPPC64 && !isELFv2ABI) Ops.push_back(DAG.getRegister(PPC::X11, PtrVT)); // Add CTR register as callee so a bctr can be emitted later. if (isTailCall) @@ -3542,6 +3543,10 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag, Ops.push_back(DAG.getRegister(RegsToPass[i].first, RegsToPass[i].second.getValueType())); + // Direct calls in the ELFv2 ABI need the TOC register live into the call. + if (Callee.getNode() && isELFv2ABI) + Ops.push_back(DAG.getRegister(PPC::X2, PtrVT)); + return CallOpc; } @@ -3988,6 +3993,7 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { + bool isELFv2ABI = Subtarget.isELFv2ABI(); bool isLittleEndian = Subtarget.isLittleEndian(); unsigned NumOps = Outs.size(); @@ -4373,6 +4379,11 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee, SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff); Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(), false, false, 0); + // In the ELFv2 ABI, R12 must contain the address of an indirect callee. + // This does not mean the MTCTR instruction must use R12; it's easier + // to model this as an extra parameter, so do that. + if (isELFv2ABI) + RegsToPass.push_back(std::make_pair((unsigned)PPC::X12, Callee)); } // Build a sequence of copy-to-reg nodes chained together with token chain diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 58314538a10..a3cedafb5ef 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -227,6 +227,9 @@ public: bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } + /// FIXME: Should use a command-line option. + bool isELFv2ABI() const { return isPPC64() && isSVR4ABI() && + isLittleEndian(); } bool enableEarlyIfConversion() const override { return hasISEL(); } diff --git a/test/CodeGen/PowerPC/ppc64le-calls.ll b/test/CodeGen/PowerPC/ppc64le-calls.ll new file mode 100644 index 00000000000..84b431ae2df --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64le-calls.ll @@ -0,0 +1,17 @@ +; RUN: llc -march=ppc64le -mcpu=pwr8 < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; Indirect calls requires a full stub creation +define void @test_indirect(void ()* nocapture %fp) { +; CHECK-LABEL: @test_indirect + tail call void %fp() +; CHECK-DAG: std 2, 40(1) +; CHECK-DAG: mr 12, 3 +; CHECK-DAG: mtctr 3 +; CHECK: bctrl +; CHECK-NEXT: ld 2, 40(1) + ret void +} + diff --git a/test/CodeGen/PowerPC/ppc64le-localentry.ll b/test/CodeGen/PowerPC/ppc64le-localentry.ll new file mode 100644 index 00000000000..4676ce8eadc --- /dev/null +++ b/test/CodeGen/PowerPC/ppc64le-localentry.ll @@ -0,0 +1,46 @@ +; RUN: llc -march=ppc64le -mcpu=pwr8 < %s | FileCheck %s +; RUN: llc -march=ppc64le -mcpu=pwr8 -O0 < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +@number64 = global i64 10, align 8 + +; CHECK: .abiversion 2 + +define i64 @use_toc(i64 %a) nounwind { +entry: +; CHECK-LABEL: @use_toc +; CHECK-NEXT: .Ltmp[[TMP1:[0-9]+]]: +; CHECK-NEXT: addis 2, 12, .TOC.-.Ltmp[[TMP1]]@ha +; CHECK-NEXT: addi 2, 2, .TOC.-.Ltmp[[TMP1]]@l +; CHECK-NEXT: .Ltmp[[TMP2:[0-9]+]]: +; CHECK-NEXT: .localentry use_toc, .Ltmp[[TMP2]]-.Ltmp[[TMP1]] +; CHECK-NEXT: %entry + %0 = load i64* @number64, align 8 + %cmp = icmp eq i64 %0, %a + %conv1 = zext i1 %cmp to i64 + ret i64 %conv1 +} + +declare void @callee() +define void @use_toc_implicit() nounwind { +entry: +; CHECK-LABEL: @use_toc_implicit +; CHECK-NEXT: .Ltmp[[TMP1:[0-9]+]]: +; CHECK-NEXT: addis 2, 12, .TOC.-.Ltmp[[TMP1]]@ha +; CHECK-NEXT: addi 2, 2, .TOC.-.Ltmp[[TMP1]]@l +; CHECK-NEXT: .Ltmp[[TMP2:[0-9]+]]: +; CHECK-NEXT: .localentry use_toc_implicit, .Ltmp[[TMP2]]-.Ltmp[[TMP1]] +; CHECK-NEXT: %entry + call void @callee() + ret void +} + +define i64 @no_toc(i64 %a) nounwind { +entry: +; CHECK-LABEL: @no_toc +; CHECK-NEXT: %entry + ret i64 %a +} +