diff --git a/include/llvm/CodeGen/AsmPrinter.h b/include/llvm/CodeGen/AsmPrinter.h index e1c9a14c900..c9190ce5c65 100644 --- a/include/llvm/CodeGen/AsmPrinter.h +++ b/include/llvm/CodeGen/AsmPrinter.h @@ -264,6 +264,9 @@ public: /// function. virtual void EmitFunctionBodyEnd() {} + /// Targets can override this to emit stuff at the end of a basic block. + virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB) {} + /// Targets should implement this to emit instructions. virtual void EmitInstruction(const MachineInstr *) { llvm_unreachable("EmitInstruction not implemented"); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 424e759caa8..226e4204224 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -804,6 +804,8 @@ void AsmPrinter::EmitFunctionBody() { } } } + + EmitBasicBlockEnd(MBB); } // If the last instruction was a prolog label, then we have a situation where diff --git a/lib/Target/X86/X86AsmPrinter.cpp b/lib/Target/X86/X86AsmPrinter.cpp index 57c7a62bd5c..3c22e883375 100644 --- a/lib/Target/X86/X86AsmPrinter.cpp +++ b/lib/Target/X86/X86AsmPrinter.cpp @@ -47,6 +47,8 @@ using namespace llvm; /// runOnMachineFunction - Emit the function body. /// bool X86AsmPrinter::runOnMachineFunction(MachineFunction &MF) { + SMShadowTracker.startFunction(MF); + SetupMachineFunction(MF); if (Subtarget->isTargetCOFF()) { diff --git a/lib/Target/X86/X86AsmPrinter.h b/lib/Target/X86/X86AsmPrinter.h index b1bbe8e41cc..4b9913c01e6 100644 --- a/lib/Target/X86/X86AsmPrinter.h +++ b/lib/Target/X86/X86AsmPrinter.h @@ -15,6 +15,11 @@ #include "llvm/CodeGen/StackMaps.h" #include "llvm/Target/TargetMachine.h" +// Implemented in X86MCInstLower.cpp +namespace { + class X86MCInstLower; +} + namespace llvm { class MCStreamer; class MCSymbol; @@ -25,9 +30,52 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { void GenerateExportDirective(const MCSymbol *Sym, bool IsData); + // This utility class tracks the length of a stackmap instruction's 'shadow'. + // It is used by the X86AsmPrinter to ensure that the stackmap shadow + // invariants (i.e. no other stackmaps, patchpoints, or control flow within + // the shadow) are met, while outputting a minimal number of NOPs for padding. + // + // To minimise the number of NOPs used, the shadow tracker counts the number + // of instruction bytes output since the last stackmap. Only if there are too + // few instruction bytes to cover the shadow are NOPs used for padding. + class StackMapShadowTracker { + public: + StackMapShadowTracker(TargetMachine &TM); + ~StackMapShadowTracker(); + void startFunction(MachineFunction &MF); + void count(MCInst &Inst, const MCSubtargetInfo &STI); + void reset(unsigned RequiredSize) { + RequiredShadowSize = RequiredSize; + CurrentShadowSize = 0; + Count = true; + } + void emitShadowPadding(MCStreamer &OutStreamer, const MCSubtargetInfo &STI); + private: + TargetMachine &TM; + std::unique_ptr CodeEmitter; + bool Count; + unsigned RequiredShadowSize, CurrentShadowSize; + }; + + StackMapShadowTracker SMShadowTracker; + + // All instructions emitted by the X86AsmPrinter should use this helper + // method. + // + // This helper function invokes the SMShadowTracker on each instruction before + // outputting it to the OutStream. This allows the shadow tracker to minimise + // the number of NOPs used for stackmap padding. + void EmitAndCountInstruction(MCInst &Inst); + + void InsertStackMapShadows(MachineFunction &MF); + void LowerSTACKMAP(const MachineInstr &MI); + void LowerPATCHPOINT(const MachineInstr &MI); + + void LowerTlsAddr(X86MCInstLower &MCInstLowering, const MachineInstr &MI); + public: explicit X86AsmPrinter(TargetMachine &TM, MCStreamer &Streamer) - : AsmPrinter(TM, Streamer), SM(*this) { + : AsmPrinter(TM, Streamer), SM(*this), SMShadowTracker(TM) { Subtarget = &TM.getSubtarget(); } @@ -43,6 +91,10 @@ class LLVM_LIBRARY_VISIBILITY X86AsmPrinter : public AsmPrinter { void EmitInstruction(const MachineInstr *MI) override; + void EmitBasicBlockEnd(const MachineBasicBlock &MBB) override { + SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo()); + } + bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, unsigned AsmVariant, const char *ExtraCode, raw_ostream &OS) override; diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index 2bd70a96c43..4b55f00db63 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -24,12 +24,14 @@ #include "llvm/IR/GlobalValue.h" #include "llvm/IR/Mangler.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstBuilder.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/TargetRegistry.h" using namespace llvm; namespace { @@ -58,6 +60,52 @@ private: } // end anonymous namespace +// Emit a minimal sequence of nops spanning NumBytes bytes. +static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, + const MCSubtargetInfo &STI); + +namespace llvm { + X86AsmPrinter::StackMapShadowTracker::StackMapShadowTracker(TargetMachine &TM) + : TM(TM), Count(false), RequiredShadowSize(0), CurrentShadowSize(0) {} + + X86AsmPrinter::StackMapShadowTracker::~StackMapShadowTracker() {} + + void + X86AsmPrinter::StackMapShadowTracker::startFunction(MachineFunction &MF) { + CodeEmitter.reset(TM.getTarget().createMCCodeEmitter(*TM.getInstrInfo(), + *TM.getRegisterInfo(), + *TM.getSubtargetImpl(), + MF.getContext())); + } + + void X86AsmPrinter::StackMapShadowTracker::count(MCInst &Inst, + const MCSubtargetInfo &STI) { + if (Count) { + SmallString<256> Code; + SmallVector Fixups; + raw_svector_ostream VecOS(Code); + CodeEmitter->EncodeInstruction(Inst, VecOS, Fixups, STI); + VecOS.flush(); + CurrentShadowSize += Code.size(); + if (CurrentShadowSize >= RequiredShadowSize) + Count = false; // The shadow is big enough. Stop counting. + } + } + + void X86AsmPrinter::StackMapShadowTracker::emitShadowPadding( + MCStreamer &OutStreamer, const MCSubtargetInfo &STI) { + if (Count && CurrentShadowSize < RequiredShadowSize) + EmitNops(OutStreamer, RequiredShadowSize - CurrentShadowSize, + TM.getSubtarget().is64Bit(), STI); + Count = false; + } + + void X86AsmPrinter::EmitAndCountInstruction(MCInst &Inst) { + OutStreamer.EmitInstruction(Inst, getSubtargetInfo()); + SMShadowTracker.count(Inst, getSubtargetInfo()); + } +} // end llvm namespace + X86MCInstLower::X86MCInstLower(const MachineFunction &mf, X86AsmPrinter &asmprinter) : Ctx(mf.getContext()), MF(mf), TM(mf.getTarget()), @@ -602,10 +650,8 @@ ReSimplify: } } -static void LowerTlsAddr(MCStreamer &OutStreamer, - X86MCInstLower &MCInstLowering, - const MachineInstr &MI, - const MCSubtargetInfo& STI) { +void X86AsmPrinter::LowerTlsAddr(X86MCInstLower &MCInstLowering, + const MachineInstr &MI) { bool is64Bits = MI.getOpcode() == X86::TLS_addr64 || MI.getOpcode() == X86::TLS_base_addr64; @@ -615,7 +661,7 @@ static void LowerTlsAddr(MCStreamer &OutStreamer, MCContext &context = OutStreamer.getContext(); if (needsPadding) - OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX), STI); + EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); MCSymbolRefExpr::VariantKind SRVK; switch (MI.getOpcode()) { @@ -662,12 +708,12 @@ static void LowerTlsAddr(MCStreamer &OutStreamer, LEA.addOperand(MCOperand::CreateExpr(symRef)); // disp LEA.addOperand(MCOperand::CreateReg(0)); // seg } - OutStreamer.EmitInstruction(LEA, STI); + EmitAndCountInstruction(LEA); if (needsPadding) { - OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX), STI); - OutStreamer.EmitInstruction(MCInstBuilder(X86::DATA16_PREFIX), STI); - OutStreamer.EmitInstruction(MCInstBuilder(X86::REX64_PREFIX), STI); + EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); + EmitAndCountInstruction(MCInstBuilder(X86::DATA16_PREFIX)); + EmitAndCountInstruction(MCInstBuilder(X86::REX64_PREFIX)); } StringRef name = is64Bits ? "__tls_get_addr" : "___tls_get_addr"; @@ -677,9 +723,9 @@ static void LowerTlsAddr(MCStreamer &OutStreamer, MCSymbolRefExpr::VK_PLT, context); - OutStreamer.EmitInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32 - : X86::CALLpcrel32) - .addExpr(tlsRef), STI); + EmitAndCountInstruction(MCInstBuilder(is64Bits ? X86::CALL64pcrel32 + : X86::CALLpcrel32) + .addExpr(tlsRef)); } /// \brief Emit the optimal amount of multi-byte nops on X86. @@ -725,10 +771,9 @@ static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, const MCSu break; case X86::NOOPL: case X86::NOOPW: - OS.EmitInstruction(MCInstBuilder(Opc).addReg(BaseReg).addImm(ScaleVal) - .addReg(IndexReg) - .addImm(Displacement) - .addReg(SegmentReg), STI); + OS.EmitInstruction(MCInstBuilder(Opc).addReg(BaseReg) + .addImm(ScaleVal).addReg(IndexReg) + .addImm(Displacement).addReg(SegmentReg), STI); break; } } // while (NumBytes) @@ -736,22 +781,20 @@ static void EmitNops(MCStreamer &OS, unsigned NumBytes, bool Is64Bit, const MCSu // Lower a stackmap of the form: // , , ... -static void LowerSTACKMAP(MCStreamer &OS, StackMaps &SM, - const MachineInstr &MI, bool Is64Bit, const MCSubtargetInfo& STI) { - unsigned NumBytes = MI.getOperand(1).getImm(); +void X86AsmPrinter::LowerSTACKMAP(const MachineInstr &MI) { + SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo()); SM.recordStackMap(MI); - // Emit padding. - // FIXME: These nops ensure that the stackmap's shadow is covered by - // instructions from the same basic block, but the nops should not be - // necessary if instructions from the same block follow the stackmap. - EmitNops(OS, NumBytes, Is64Bit, STI); + unsigned NumShadowBytes = MI.getOperand(1).getImm(); + SMShadowTracker.reset(NumShadowBytes); } // Lower a patchpoint of the form: // [], , , , , , ... -static void LowerPATCHPOINT(MCStreamer &OS, StackMaps &SM, - const MachineInstr &MI, bool Is64Bit, const MCSubtargetInfo& STI) { - assert(Is64Bit && "Patchpoint currently only supports X86-64"); +void X86AsmPrinter::LowerPATCHPOINT(const MachineInstr &MI) { + assert(Subtarget->is64Bit() && "Patchpoint currently only supports X86-64"); + + SMShadowTracker.emitShadowPadding(OutStreamer, getSubtargetInfo()); + SM.recordPatchPoint(MI); PatchPointOpers opers(&MI); @@ -766,16 +809,17 @@ static void LowerPATCHPOINT(MCStreamer &OS, StackMaps &SM, EncodedBytes = 13; else EncodedBytes = 12; - OS.EmitInstruction(MCInstBuilder(X86::MOV64ri).addReg(ScratchReg) - .addImm(CallTarget), STI); - OS.EmitInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg), STI); + EmitAndCountInstruction(MCInstBuilder(X86::MOV64ri).addReg(ScratchReg) + .addImm(CallTarget)); + EmitAndCountInstruction(MCInstBuilder(X86::CALL64r).addReg(ScratchReg)); } // Emit padding. unsigned NumBytes = opers.getMetaOper(PatchPointOpers::NBytesPos).getImm(); assert(NumBytes >= EncodedBytes && "Patchpoint can't request size less than the length of a call."); - EmitNops(OS, NumBytes - EncodedBytes, Is64Bit, STI); + EmitNops(OutStreamer, NumBytes - EncodedBytes, Subtarget->is64Bit(), + getSubtargetInfo()); } void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { @@ -812,7 +856,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { case X86::TLS_addr64: case X86::TLS_base_addr32: case X86::TLS_base_addr64: - return LowerTlsAddr(OutStreamer, MCInstLowering, *MI, getSubtargetInfo()); + return LowerTlsAddr(MCInstLowering, *MI); case X86::MOVPC32r: { // This is a pseudo op for a two instruction sequence with a label, which @@ -825,15 +869,15 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { MCSymbol *PICBase = MF->getPICBaseSymbol(); // FIXME: We would like an efficient form for this, so we don't have to do a // lot of extra uniquing. - EmitToStreamer(OutStreamer, MCInstBuilder(X86::CALLpcrel32) + EmitAndCountInstruction(MCInstBuilder(X86::CALLpcrel32) .addExpr(MCSymbolRefExpr::Create(PICBase, OutContext))); // Emit the label. OutStreamer.EmitLabel(PICBase); // popl $reg - EmitToStreamer(OutStreamer, MCInstBuilder(X86::POP32r) - .addReg(MI->getOperand(0).getReg())); + EmitAndCountInstruction(MCInstBuilder(X86::POP32r) + .addReg(MI->getOperand(0).getReg())); return; } @@ -863,7 +907,7 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { DotExpr = MCBinaryExpr::CreateAdd(MCSymbolRefExpr::Create(OpSym,OutContext), DotExpr, OutContext); - EmitToStreamer(OutStreamer, MCInstBuilder(X86::ADD32ri) + EmitAndCountInstruction(MCInstBuilder(X86::ADD32ri) .addReg(MI->getOperand(0).getReg()) .addReg(MI->getOperand(1).getReg()) .addExpr(DotExpr)); @@ -871,21 +915,21 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { } case TargetOpcode::STACKMAP: - return LowerSTACKMAP(OutStreamer, SM, *MI, Subtarget->is64Bit(), getSubtargetInfo()); + return LowerSTACKMAP(*MI); case TargetOpcode::PATCHPOINT: - return LowerPATCHPOINT(OutStreamer, SM, *MI, Subtarget->is64Bit(), getSubtargetInfo()); + return LowerPATCHPOINT(*MI); case X86::MORESTACK_RET: - EmitToStreamer(OutStreamer, MCInstBuilder(getRetOpcode(*Subtarget))); + EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); return; case X86::MORESTACK_RET_RESTORE_R10: // Return, then restore R10. - EmitToStreamer(OutStreamer, MCInstBuilder(getRetOpcode(*Subtarget))); - EmitToStreamer(OutStreamer, MCInstBuilder(X86::MOV64rr) - .addReg(X86::R10) - .addReg(X86::RAX)); + EmitAndCountInstruction(MCInstBuilder(getRetOpcode(*Subtarget))); + EmitAndCountInstruction(MCInstBuilder(X86::MOV64rr) + .addReg(X86::R10) + .addReg(X86::RAX)); return; case X86::SEH_PushReg: @@ -922,5 +966,5 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { MCInst TmpInst; MCInstLowering.Lower(MI, TmpInst); - EmitToStreamer(OutStreamer, TmpInst); + EmitAndCountInstruction(TmpInst); } diff --git a/test/CodeGen/X86/stackmap-nops.ll b/test/CodeGen/X86/stackmap-nops.ll index 5a78f24d7b5..3888603c969 100644 --- a/test/CodeGen/X86/stackmap-nops.ll +++ b/test/CodeGen/X86/stackmap-nops.ll @@ -224,6 +224,7 @@ entry: tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 28, i32 28) tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 29, i32 29) tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 30, i32 30) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 31, i32 0) ret void } diff --git a/test/CodeGen/X86/stackmap-shadow-optimization.ll b/test/CodeGen/X86/stackmap-shadow-optimization.ll new file mode 100644 index 00000000000..63debe396b6 --- /dev/null +++ b/test/CodeGen/X86/stackmap-shadow-optimization.ll @@ -0,0 +1,21 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=corei7 -disable-fp-elim | FileCheck %s + +; Check that the X86 stackmap shadow optimization is only outputting a 1-byte +; nop here. 8-bytes are requested, but 7 are covered by the code for the call to +; bar, the frame teardown and the return. +define void @shadow_optimization_test() { +entry: +; CHECK-LABEL: shadow_optimization_test: +; CHECK: callq _bar +; CHECK-NOT: nop +; CHECK: callq _bar +; CHECK: retq +; CHECK: nop + call void @bar() + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 0, i32 8) + call void @bar() + ret void +} + +declare void @llvm.experimental.stackmap(i64, i32, ...) +declare void @bar() \ No newline at end of file diff --git a/test/MC/X86/stackmap-nops.ll b/test/MC/X86/stackmap-nops.ll index 98d17ea6a62..2b0b88c80fa 100644 --- a/test/MC/X86/stackmap-nops.ll +++ b/test/MC/X86/stackmap-nops.ll @@ -41,6 +41,7 @@ entry: tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 13, i32 13) tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 14, i32 14) tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 15, i32 15) + tail call void (i64, i32, ...)* @llvm.experimental.stackmap(i64 16, i32 0) ret void }