diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index 2295f797cff..16e7d5b445b 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -164,7 +164,7 @@ void NVPTXAsmPrinter::emitLineNumberAsDotLoc(const MachineInstr &MI) { void NVPTXAsmPrinter::EmitInstruction(const MachineInstr *MI) { SmallString<128> Str; raw_svector_ostream OS(Str); - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) + if (nvptxSubtarget->getDrvInterface() == NVPTX::CUDA) emitLineNumberAsDotLoc(*MI); MCInst Inst; @@ -237,8 +237,6 @@ void NVPTXAsmPrinter::lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp) { void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { OutMI.setOpcode(MI->getOpcode()); - const NVPTXSubtarget &ST = TM.getSubtarget(); - // Special: Do not mangle symbol operand of CALL_PROTOTYPE if (MI->getOpcode() == NVPTX::CALL_PROTOTYPE) { const MachineOperand &MO = MI->getOperand(0); @@ -251,7 +249,7 @@ void NVPTXAsmPrinter::lowerToMCInst(const MachineInstr *MI, MCInst &OutMI) { const MachineOperand &MO = MI->getOperand(i); MCOperand MCOp; - if (!ST.hasImageHandles()) { + if (!nvptxSubtarget->hasImageHandles()) { if (lowerImageHandleOperand(MI, i, MCOp)) { OutMI.addOperand(MCOp); continue; @@ -349,11 +347,11 @@ MCOperand NVPTXAsmPrinter::GetSymbolRef(const MCSymbol *Symbol) { void NVPTXAsmPrinter::printReturnValStr(const Function *F, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); Type *Ty = F->getReturnType(); - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (nvptxSubtarget->getSmVersion() >= 20); if (Ty->getTypeID() == Type::VoidTyID) return; @@ -506,14 +504,13 @@ void NVPTXAsmPrinter::EmitFunctionBodyEnd() { void NVPTXAsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - const TargetRegisterInfo *TRI = TM.getSubtargetImpl()->getRegisterInfo(); + const TargetRegisterInfo *TRI = nvptxSubtarget->getRegisterInfo(); if (TRI->isVirtualRegister(RegNo)) { OutStreamer.AddComment(Twine("implicit-def: ") + getVirtualRegisterName(RegNo)); } else { - OutStreamer.AddComment( - Twine("implicit-def: ") + - TM.getSubtargetImpl()->getRegisterInfo()->getName(RegNo)); + OutStreamer.AddComment(Twine("implicit-def: ") + + nvptxSubtarget->getRegisterInfo()->getName(RegNo)); } OutStreamer.AddBlankLine(); } @@ -815,6 +812,14 @@ void NVPTXAsmPrinter::recordAndEmitFilenames(Module &M) { } bool NVPTXAsmPrinter::doInitialization(Module &M) { + // Construct a default subtarget off of the TargetMachine defaults. The + // rest of NVPTX isn't friendly to change subtargets per function and + // so the default TargetMachine will have all of the options. + StringRef TT = TM.getTargetTriple(); + StringRef CPU = TM.getTargetCPU(); + StringRef FS = TM.getTargetFeatureString(); + const NVPTXTargetMachine &NTM = static_cast(TM); + const NVPTXSubtarget STI(TT, CPU, FS, NTM, NTM.is64Bit()); SmallString<128> Str1; raw_svector_ostream OS1(Str1); @@ -832,7 +837,7 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { Mang = new Mangler(TM.getDataLayout()); // Emit header before any dwarf directives are emitted below. - emitHeader(M, OS1); + emitHeader(M, OS1, STI); OutStreamer.EmitRawText(OS1.str()); // Already commented out @@ -848,7 +853,8 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { OutStreamer.AddBlankLine(); } - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) + // If we're not NVCL we're CUDA, go ahead and emit filenames. + if (Triple(TM.getTargetTriple()).getOS() != Triple::NVCL) recordAndEmitFilenames(M); GlobalsEmitted = false; @@ -889,22 +895,23 @@ void NVPTXAsmPrinter::emitGlobals(const Module &M) { OutStreamer.EmitRawText(OS2.str()); } -void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { +void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O, + const NVPTXSubtarget &STI) { O << "//\n"; O << "// Generated by LLVM NVPTX Back-End\n"; O << "//\n"; O << "\n"; - unsigned PTXVersion = nvptxSubtarget.getPTXVersion(); + unsigned PTXVersion = STI.getPTXVersion(); O << ".version " << (PTXVersion / 10) << "." << (PTXVersion % 10) << "\n"; O << ".target "; - O << nvptxSubtarget.getTargetName(); + O << STI.getTargetName(); - if (nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) + if (STI.getDrvInterface() == NVPTX::NVCL) O << ", texmode_independent"; - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { - if (!nvptxSubtarget.hasDouble()) + if (STI.getDrvInterface() == NVPTX::CUDA) { + if (!STI.hasDouble()) O << ", map_f64_to_f32"; } @@ -914,7 +921,7 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { O << "\n"; O << ".address_size "; - if (nvptxSubtarget.is64Bit()) + if (static_cast(TM).is64Bit()) O << "64"; else O << "32"; @@ -924,7 +931,6 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { } bool NVPTXAsmPrinter::doFinalization(Module &M) { - // If we did not emit any functions, then the global declarations have not // yet been emitted. if (!GlobalsEmitted) { @@ -986,7 +992,7 @@ bool NVPTXAsmPrinter::doFinalization(Module &M) { void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, raw_ostream &O) { - if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) { + if (nvptxSubtarget->getDrvInterface() == NVPTX::CUDA) { if (V->hasExternalLinkage()) { if (isa(V)) { const GlobalVariable *GVar = cast(V); @@ -1218,7 +1224,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, AggBuffer aggBuffer(ElementSize, O, *this); bufferAggregateConstant(Initializer, &aggBuffer); if (aggBuffer.numSymbols) { - if (nvptxSubtarget.is64Bit()) { + if (static_cast(TM).is64Bit()) { O << " .u64 " << *getSymbol(GVar) << "["; O << ElementSize / 8; } else { @@ -1316,7 +1322,7 @@ NVPTXAsmPrinter::getPTXFundamentalTypeStr(const Type *Ty, bool useB4PTR) const { case Type::DoubleTyID: return "f64"; case Type::PointerTyID: - if (nvptxSubtarget.is64Bit()) + if (static_cast(TM).is64Bit()) if (useB4PTR) return "b64"; else @@ -1407,8 +1413,8 @@ static unsigned int getOpenCLAlignment(const DataLayout *TD, Type *Ty) { void NVPTXAsmPrinter::printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O) { - if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || - (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) + if ((nvptxSubtarget->getDrvInterface() == NVPTX::NVCL) || + (nvptxSubtarget->getDrvInterface() == NVPTX::CUDA)) O << *getSymbol(I->getParent()) << "_param_" << paramIndex; else { std::string argName = I->getName(); @@ -1427,8 +1433,8 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { Function::const_arg_iterator I, E; int i = 0; - if ((nvptxSubtarget.getDrvInterface() == NVPTX::NVCL) || - (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA)) { + if ((nvptxSubtarget->getDrvInterface() == NVPTX::NVCL) || + (nvptxSubtarget->getDrvInterface() == NVPTX::CUDA)) { O << *CurrentFnSym << "_param_" << paramIndex; return; } @@ -1445,12 +1451,12 @@ void NVPTXAsmPrinter::printParamName(int paramIndex, raw_ostream &O) { void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { const DataLayout *TD = TM.getDataLayout(); const AttributeSet &PAL = F->getAttributes(); - const TargetLowering *TLI = TM.getSubtargetImpl()->getTargetLowering(); + const TargetLowering *TLI = nvptxSubtarget->getTargetLowering(); Function::const_arg_iterator I, E; unsigned paramIndex = 0; bool first = true; bool isKernelFunc = llvm::isKernelFunction(*F); - bool isABI = (nvptxSubtarget.getSmVersion() >= 20); + bool isABI = (nvptxSubtarget->getSmVersion() >= 20); MVT thePointerTy = TLI->getPointerTy(); O << "(\n"; @@ -1469,21 +1475,21 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { if (isImage(*I)) { std::string sname = I->getName(); if (isImageWriteOnly(*I) || isImageReadWrite(*I)) { - if (nvptxSubtarget.hasImageHandles()) + if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .surfref "; else O << "\t.param .surfref "; O << *CurrentFnSym << "_param_" << paramIndex; } else { // Default image is read_only - if (nvptxSubtarget.hasImageHandles()) + if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .texref "; else O << "\t.param .texref "; O << *CurrentFnSym << "_param_" << paramIndex; } } else { - if (nvptxSubtarget.hasImageHandles()) + if (nvptxSubtarget->hasImageHandles()) O << "\t.param .u64 .ptr .samplerref "; else O << "\t.param .samplerref "; @@ -1516,7 +1522,7 @@ void NVPTXAsmPrinter::emitFunctionParamList(const Function *F, raw_ostream &O) { // Special handling for pointer arguments to kernel O << "\t.param .u" << thePointerTy.getSizeInBits() << " "; - if (nvptxSubtarget.getDrvInterface() != NVPTX::CUDA) { + if (nvptxSubtarget->getDrvInterface() != NVPTX::CUDA) { Type *ETy = PTy->getElementType(); int addrSpace = PTy->getAddressSpace(); switch (addrSpace) { @@ -1645,7 +1651,7 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters( if (NumBytes) { O << "\t.local .align " << MFI->getMaxAlignment() << " .b8 \t" << DEPOTNAME << getFunctionNumber() << "[" << NumBytes << "];\n"; - if (nvptxSubtarget.is64Bit()) { + if (nvptxSubtarget->is64Bit()) { O << "\t.reg .b64 \t%SP;\n"; O << "\t.reg .b64 \t%SPL;\n"; } else { diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index aafd498f6c7..7e6b5e8d085 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -138,7 +138,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { unsigned int nSym = 0; unsigned int nextSymbolPos = symbolPosInBuffer[nSym]; unsigned int nBytes = 4; - if (AP.nvptxSubtarget.is64Bit()) + if (static_cast(AP.TM).is64Bit()) nBytes = 8; for (pos = 0; pos < size; pos += nBytes) { if (pos) @@ -212,7 +212,7 @@ private: void printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O); void emitGlobals(const Module &M); - void emitHeader(Module &M, raw_ostream &O); + void emitHeader(Module &M, raw_ostream &O, const NVPTXSubtarget &STI); void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const; void emitVirtualRegister(unsigned int vr, raw_ostream &); void emitFunctionExternParamList(const MachineFunction &MF); @@ -248,8 +248,10 @@ private: typedef DenseMap VRegMap; typedef DenseMap VRegRCMap; VRegRCMap VRegMapping; - // cache the subtarget here. - const NVPTXSubtarget &nvptxSubtarget; + + // Cache the subtarget here. + const NVPTXSubtarget *nvptxSubtarget; + // Build the map between type name and ID based on module's type // symbol table. std::map TypeNameMap; @@ -303,10 +305,10 @@ private: public: NVPTXAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)), - nvptxSubtarget(TM.getSubtarget()) { + EmitGeneric(static_cast(TM).getDrvInterface() == + NVPTX::CUDA) { CurrentBankselLabelInBasicBlock = ""; reader = nullptr; - EmitGeneric = (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA); } ~NVPTXAsmPrinter() { @@ -314,6 +316,10 @@ public: delete reader; } + bool runOnMachineFunction(MachineFunction &F) override { + nvptxSubtarget = &F.getSubtarget(); + return AsmPrinter::runOnMachineFunction(F); + } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AsmPrinter::getAnalysisUsage(AU); diff --git a/lib/Target/NVPTX/NVPTXSubtarget.cpp b/lib/Target/NVPTX/NVPTXSubtarget.cpp index f456891fb14..c22418cc57a 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.cpp +++ b/lib/Target/NVPTX/NVPTXSubtarget.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "NVPTXSubtarget.h" +#include "NVPTXTargetMachine.h" using namespace llvm; @@ -43,17 +44,13 @@ NVPTXSubtarget &NVPTXSubtarget::initializeSubtargetDependencies(StringRef CPU, } NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, const TargetMachine &TM, - bool is64Bit) + const std::string &FS, + const NVPTXTargetMachine &TM, bool is64Bit) : NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0), - SmVersion(20), InstrInfo(initializeSubtargetDependencies(CPU, FS)), - TLInfo((const NVPTXTargetMachine &)TM, *this), TSInfo(TM.getDataLayout()), - FrameLowering(*this) { + SmVersion(20), TM(TM), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM, *this), + TSInfo(TM.getDataLayout()), FrameLowering(*this) {} - Triple T(TT); - - if (T.getOS() == Triple::NVCL) - drvInterface = NVPTX::NVCL; - else - drvInterface = NVPTX::CUDA; +NVPTX::DrvInterface NVPTXSubtarget::getDrvInterface() const { + return TM.getDrvInterface(); } diff --git a/lib/Target/NVPTX/NVPTXSubtarget.h b/lib/Target/NVPTX/NVPTXSubtarget.h index 81ccd0c1782..299bb93be67 100644 --- a/lib/Target/NVPTX/NVPTXSubtarget.h +++ b/lib/Target/NVPTX/NVPTXSubtarget.h @@ -32,7 +32,6 @@ namespace llvm { class NVPTXSubtarget : public NVPTXGenSubtargetInfo { virtual void anchor(); std::string TargetName; - NVPTX::DrvInterface drvInterface; bool Is64Bit; // PTX version x.y is represented as 10*x+y, e.g. 3.1 == 31 @@ -41,6 +40,7 @@ class NVPTXSubtarget : public NVPTXGenSubtargetInfo { // SM version x.y is represented as 10*x+y, e.g. 3.1 == 31 unsigned int SmVersion; + const NVPTXTargetMachine &TM; NVPTXInstrInfo InstrInfo; NVPTXTargetLowering TLInfo; TargetSelectionDAGInfo TSInfo; @@ -54,7 +54,8 @@ public: /// of the specified module. /// NVPTXSubtarget(const std::string &TT, const std::string &CPU, - const std::string &FS, const TargetMachine &TM, bool is64Bit); + const std::string &FS, const NVPTXTargetMachine &TM, + bool is64Bit); const TargetFrameLowering *getFrameLowering() const override { return &FrameLowering; @@ -106,7 +107,7 @@ public: bool is64Bit() const { return Is64Bit; } unsigned int getSmVersion() const { return SmVersion; } - NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } + NVPTX::DrvInterface getDrvInterface() const; std::string getTargetName() const { return TargetName; } unsigned getPTXVersion() const { return PTXVersion; } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 56f1cb46ea2..7c6ecc387be 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -86,10 +86,13 @@ NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL, bool is64bit) - : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), + : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), is64bit(is64bit), TLOF(make_unique()), - DL(computeDataLayout(is64bit)), - Subtarget(TT, CPU, FS, *this, is64bit) { + DL(computeDataLayout(is64bit)), Subtarget(TT, CPU, FS, *this, is64bit) { + if (Triple(TT).getOS() == Triple::NVCL) + drvInterface = NVPTX::NVCL; + else + drvInterface = NVPTX::CUDA; initAsmInfo(); } diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.h b/lib/Target/NVPTX/NVPTXTargetMachine.h index 42e7a9257cb..a81abfeaf7d 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.h +++ b/lib/Target/NVPTX/NVPTXTargetMachine.h @@ -25,8 +25,10 @@ namespace llvm { /// NVPTXTargetMachine /// class NVPTXTargetMachine : public LLVMTargetMachine { + bool is64bit; std::unique_ptr TLOF; const DataLayout DL; // Calculates type size & alignment + NVPTX::DrvInterface drvInterface; NVPTXSubtarget Subtarget; // Hold Strings that can be free'd all together with NVPTXTargetMachine @@ -40,7 +42,8 @@ public: ~NVPTXTargetMachine() override; const DataLayout *getDataLayout() const override { return &DL; } const NVPTXSubtarget *getSubtargetImpl() const override { return &Subtarget; } - + bool is64Bit() const { return is64bit; } + NVPTX::DrvInterface getDrvInterface() const { return drvInterface; } ManagedStringPool *getManagedStrPool() const { return const_cast(&ManagedStrPool); }