diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 7da2fed4cd5..735ca9b6b76 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -23,6 +23,7 @@ set(NVPTXCodeGen_sources NVPTXAsmPrinter.cpp NVPTXUtilities.cpp NVVMReflect.cpp + NVPTXGenericToNVVM.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) diff --git a/lib/Target/NVPTX/NVPTX.h b/lib/Target/NVPTX/NVPTX.h index 6a53a443bfb..fa29de8e564 100644 --- a/lib/Target/NVPTX/NVPTX.h +++ b/lib/Target/NVPTX/NVPTX.h @@ -62,6 +62,7 @@ createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel); FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &); FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &); +ModulePass *createGenericToNVVMPass(); bool isImageOrSamplerVal(const Value *, const Module *); diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp index e2832b02b53..88b9aa06e53 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.cpp +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.cpp @@ -68,11 +68,12 @@ InterleaveSrc("nvptx-emit-src", cl::ZeroOrMore, namespace { /// DiscoverDependentGlobals - Return a set of GlobalVariables on which \p V /// depends. -void DiscoverDependentGlobals(Value *V, DenseSet &Globals) { - if (GlobalVariable *GV = dyn_cast(V)) +void DiscoverDependentGlobals(const Value *V, + DenseSet &Globals) { + if (const GlobalVariable *GV = dyn_cast(V)) Globals.insert(GV); else { - if (User *U = dyn_cast(V)) { + if (const User *U = dyn_cast(V)) { for (unsigned i = 0, e = U->getNumOperands(); i != e; ++i) { DiscoverDependentGlobals(U->getOperand(i), Globals); } @@ -84,8 +85,9 @@ void DiscoverDependentGlobals(Value *V, DenseSet &Globals) { /// instances to be emitted, but only after any dependents have been added /// first. void VisitGlobalVariableForEmission( - GlobalVariable *GV, SmallVectorImpl &Order, - DenseSet &Visited, DenseSet &Visiting) { + const GlobalVariable *GV, SmallVectorImpl &Order, + DenseSet &Visited, + DenseSet &Visiting) { // Have we already visited this one? if (Visited.count(GV)) return; @@ -98,12 +100,12 @@ void VisitGlobalVariableForEmission( Visiting.insert(GV); // Make sure we visit all dependents first - DenseSet Others; + DenseSet Others; for (unsigned i = 0, e = GV->getNumOperands(); i != e; ++i) DiscoverDependentGlobals(GV->getOperand(i), Others); - for (DenseSet::iterator I = Others.begin(), - E = Others.end(); + for (DenseSet::iterator I = Others.begin(), + E = Others.end(); I != E; ++I) VisitGlobalVariableForEmission(*I, Order, Visited, Visiting); @@ -405,6 +407,11 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() { SmallString<128> Str; raw_svector_ostream O(Str); + if (!GlobalsEmitted) { + emitGlobals(*MF->getFunction()->getParent()); + GlobalsEmitted = true; + } + // Set up MRI = &MF->getRegInfo(); F = MF->getFunction(); @@ -795,7 +802,7 @@ static bool useFuncSeen(const Constant *C, return false; } -void NVPTXAsmPrinter::emitDeclarations(Module &M, raw_ostream &O) { +void NVPTXAsmPrinter::emitDeclarations(const Module &M, raw_ostream &O) { llvm::DenseMap seenMap; for (Module::const_iterator FI = M.begin(), FE = M.end(); FI != FE; ++FI) { const Function *F = FI; @@ -921,6 +928,12 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { if (nvptxSubtarget.getDrvInterface() == NVPTX::CUDA) recordAndEmitFilenames(M); + GlobalsEmitted = false; + + return false; // success +} + +void NVPTXAsmPrinter::emitGlobals(const Module &M) { SmallString<128> Str2; raw_svector_ostream OS2(Str2); @@ -931,13 +944,13 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { // global variable in order, and ensure that we emit it *after* its dependent // globals. We use a little extra memory maintaining both a set and a list to // have fast searches while maintaining a strict ordering. - SmallVector Globals; - DenseSet GVVisited; - DenseSet GVVisiting; + SmallVector Globals; + DenseSet GVVisited; + DenseSet GVVisiting; // Visit each global variable, in order - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; - ++I) + for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); + I != E; ++I) VisitGlobalVariableForEmission(I, Globals, GVVisited, GVVisiting); assert(GVVisited.size() == M.getGlobalList().size() && @@ -951,7 +964,6 @@ bool NVPTXAsmPrinter::doInitialization(Module &M) { OS2 << '\n'; OutStreamer.EmitRawText(OS2.str()); - return false; // success } void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { @@ -989,6 +1001,14 @@ void NVPTXAsmPrinter::emitHeader(Module &M, raw_ostream &O) { } bool NVPTXAsmPrinter::doFinalization(Module &M) { + + // If we did not emit any functions, then the global declarations have not + // yet been emitted. + if (!GlobalsEmitted) { + emitGlobals(M); + GlobalsEmitted = true; + } + // XXX Temproarily remove global variables so that doFinalization() will not // emit them again (global variables are emitted at beginning). @@ -1063,7 +1083,8 @@ void NVPTXAsmPrinter::emitLinkageDirective(const GlobalValue *V, } } -void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, +void NVPTXAsmPrinter::printModuleLevelGV(const GlobalVariable *GVar, + raw_ostream &O, bool processDemoted) { // Skip meta data @@ -1107,10 +1128,10 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, if (llvm::isSampler(*GVar)) { O << ".global .samplerref " << llvm::getSamplerName(*GVar); - Constant *Initializer = NULL; + const Constant *Initializer = NULL; if (GVar->hasInitializer()) Initializer = GVar->getInitializer(); - ConstantInt *CI = NULL; + const ConstantInt *CI = NULL; if (Initializer) CI = dyn_cast(Initializer); if (CI) { @@ -1183,7 +1204,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, if (localDecls.find(demotedFunc) != localDecls.end()) localDecls[demotedFunc].push_back(GVar); else { - std::vector temp; + std::vector temp; temp.push_back(GVar); localDecls[demotedFunc] = temp; } @@ -1213,7 +1234,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && GVar->hasInitializer()) { - Constant *Initializer = GVar->getInitializer(); + const Constant *Initializer = GVar->getInitializer(); if (!Initializer->isNullValue()) { O << " = "; printScalarConstant(Initializer, O); @@ -1237,7 +1258,7 @@ void NVPTXAsmPrinter::printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST_NOT_GEN) || (PTy->getAddressSpace() == llvm::ADDRESS_SPACE_CONST)) && GVar->hasInitializer()) { - Constant *Initializer = GVar->getInitializer(); + const Constant *Initializer = GVar->getInitializer(); if (!isa(Initializer) && !Initializer->isNullValue()) { AggBuffer aggBuffer(ElementSize, O, *this); bufferAggregateConstant(Initializer, &aggBuffer); @@ -1287,7 +1308,7 @@ void NVPTXAsmPrinter::emitDemotedVars(const Function *f, raw_ostream &O) { if (localDecls.find(f) == localDecls.end()) return; - std::vector &gvars = localDecls[f]; + std::vector &gvars = localDecls[f]; for (unsigned i = 0, e = gvars.size(); i != e; ++i) { O << "\t// demoted variable\n\t"; @@ -1761,12 +1782,12 @@ void NVPTXAsmPrinter::printFPConstant(const ConstantFP *Fp, raw_ostream &O) { O << utohexstr(API.getZExtValue()); } -void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { - if (ConstantInt *CI = dyn_cast(CPV)) { +void NVPTXAsmPrinter::printScalarConstant(const Constant *CPV, raw_ostream &O) { + if (const ConstantInt *CI = dyn_cast(CPV)) { O << CI->getValue(); return; } - if (ConstantFP *CFP = dyn_cast(CPV)) { + if (const ConstantFP *CFP = dyn_cast(CPV)) { printFPConstant(CFP, O); return; } @@ -1774,13 +1795,13 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { O << "0"; return; } - if (GlobalValue *GVar = dyn_cast(CPV)) { + if (const GlobalValue *GVar = dyn_cast(CPV)) { O << *Mang->getSymbol(GVar); return; } - if (ConstantExpr *Cexpr = dyn_cast(CPV)) { - Value *v = Cexpr->stripPointerCasts(); - if (GlobalValue *GVar = dyn_cast(v)) { + if (const ConstantExpr *Cexpr = dyn_cast(CPV)) { + const Value *v = Cexpr->stripPointerCasts(); + if (const GlobalValue *GVar = dyn_cast(v)) { O << *Mang->getSymbol(GVar); return; } else { @@ -1791,7 +1812,7 @@ void NVPTXAsmPrinter::printScalarConstant(Constant *CPV, raw_ostream &O) { llvm_unreachable("Not scalar type found in printScalarConstant()"); } -void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, +void NVPTXAsmPrinter::bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer) { const DataLayout *TD = TM.getDataLayout(); @@ -1819,13 +1840,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, ptr = (unsigned char *)&int16; aggBuffer->addBytes(ptr, 2, Bytes); } else if (ETy == Type::getInt32Ty(CPV->getContext())) { - if (ConstantInt *constInt = dyn_cast(CPV)) { + if (const ConstantInt *constInt = dyn_cast(CPV)) { int int32 = (int)(constInt->getZExtValue()); ptr = (unsigned char *)&int32; aggBuffer->addBytes(ptr, 4, Bytes); break; - } else if (ConstantExpr *Cexpr = dyn_cast(CPV)) { - if (ConstantInt *constInt = dyn_cast( + } else if (const ConstantExpr *Cexpr = dyn_cast(CPV)) { + if (const ConstantInt *constInt = dyn_cast( ConstantFoldConstantExpression(Cexpr, TD))) { int int32 = (int)(constInt->getZExtValue()); ptr = (unsigned char *)&int32; @@ -1841,13 +1862,13 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, } llvm_unreachable("unsupported integer const type"); } else if (ETy == Type::getInt64Ty(CPV->getContext())) { - if (ConstantInt *constInt = dyn_cast(CPV)) { + if (const ConstantInt *constInt = dyn_cast(CPV)) { long long int64 = (long long)(constInt->getZExtValue()); ptr = (unsigned char *)&int64; aggBuffer->addBytes(ptr, 8, Bytes); break; - } else if (ConstantExpr *Cexpr = dyn_cast(CPV)) { - if (ConstantInt *constInt = dyn_cast( + } else if (const ConstantExpr *Cexpr = dyn_cast(CPV)) { + if (const ConstantInt *constInt = dyn_cast( ConstantFoldConstantExpression(Cexpr, TD))) { long long int64 = (long long)(constInt->getZExtValue()); ptr = (unsigned char *)&int64; @@ -1868,7 +1889,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, } case Type::FloatTyID: case Type::DoubleTyID: { - ConstantFP *CFP = dyn_cast(CPV); + const ConstantFP *CFP = dyn_cast(CPV); const Type *Ty = CFP->getType(); if (Ty == Type::getFloatTy(CPV->getContext())) { float float32 = (float) CFP->getValueAPF().convertToFloat(); @@ -1884,10 +1905,10 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, break; } case Type::PointerTyID: { - if (GlobalValue *GVar = dyn_cast(CPV)) { + if (const GlobalValue *GVar = dyn_cast(CPV)) { aggBuffer->addSymbol(GVar); - } else if (ConstantExpr *Cexpr = dyn_cast(CPV)) { - Value *v = Cexpr->stripPointerCasts(); + } else if (const ConstantExpr *Cexpr = dyn_cast(CPV)) { + const Value *v = Cexpr->stripPointerCasts(); aggBuffer->addSymbol(v); } unsigned int s = TD->getTypeAllocSize(CPV->getType()); @@ -1916,7 +1937,7 @@ void NVPTXAsmPrinter::bufferLEByte(Constant *CPV, int Bytes, } } -void NVPTXAsmPrinter::bufferAggregateConstant(Constant *CPV, +void NVPTXAsmPrinter::bufferAggregateConstant(const Constant *CPV, AggBuffer *aggBuffer) { const DataLayout *TD = TM.getDataLayout(); int Bytes; diff --git a/lib/Target/NVPTX/NVPTXAsmPrinter.h b/lib/Target/NVPTX/NVPTXAsmPrinter.h index 6dc9fc0ffef..7faa6b265b9 100644 --- a/lib/Target/NVPTX/NVPTXAsmPrinter.h +++ b/lib/Target/NVPTX/NVPTXAsmPrinter.h @@ -91,7 +91,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { unsigned char *buffer; // the buffer unsigned numSymbols; // number of symbol addresses SmallVector symbolPosInBuffer; - SmallVector Symbols; + SmallVector Symbols; private: unsigned curpos; @@ -128,7 +128,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { } return curpos; } - void addSymbol(Value *GVar) { + void addSymbol(const Value *GVar) { symbolPosInBuffer.push_back(curpos); Symbols.push_back(GVar); numSymbols++; @@ -153,11 +153,11 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { if (pos) O << ", "; if (pos == nextSymbolPos) { - Value *v = Symbols[nSym]; - if (GlobalValue *GVar = dyn_cast(v)) { + const Value *v = Symbols[nSym]; + if (const GlobalValue *GVar = dyn_cast(v)) { MCSymbol *Name = AP.Mang->getSymbol(GVar); O << *Name; - } else if (ConstantExpr *Cexpr = dyn_cast(v)) { + } else if (const ConstantExpr *Cexpr = dyn_cast(v)) { O << *nvptx::LowerConstant(Cexpr, AP); } else llvm_unreachable("symbol type unknown"); @@ -205,10 +205,12 @@ private: void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const; // definition autogenerated. void printInstruction(const MachineInstr *MI, raw_ostream &O); - void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false); + void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O, + bool = false); void printParamName(int paramIndex, raw_ostream &O); void printParamName(Function::const_arg_iterator I, int paramIndex, raw_ostream &O); + void emitGlobals(const Module &M); void emitHeader(Module &M, raw_ostream &O); void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const; void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O); @@ -234,6 +236,8 @@ protected: private: std::string CurrentBankselLabelInBasicBlock; + bool GlobalsEmitted; + // This is specific per MachineFunction. const MachineRegisterInfo *MRI; // The contents are specific for each @@ -247,7 +251,7 @@ private: std::map TypeNameMap; // List of variables demoted to a function scope. - std::map > localDecls; + std::map > localDecls; // To record filename to ID mapping std::map filenameMap; @@ -256,15 +260,15 @@ private: void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O); void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const; std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const; - void printScalarConstant(Constant *CPV, raw_ostream &O); + void printScalarConstant(const Constant *CPV, raw_ostream &O); void printFPConstant(const ConstantFP *Fp, raw_ostream &O); - void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer); - void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer); + void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer); + void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer); void printOperandProper(const MachineOperand &MO); void emitLinkageDirective(const GlobalValue *V, raw_ostream &O); - void emitDeclarations(Module &, raw_ostream &O); + void emitDeclarations(const Module &, raw_ostream &O); void emitDeclaration(const Function *, raw_ostream &O); static const char *getRegisterName(unsigned RegNo); diff --git a/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp new file mode 100644 index 00000000000..1077c46fb40 --- /dev/null +++ b/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp @@ -0,0 +1,436 @@ +//===-- GenericToNVVM.cpp - Convert generic module to NVVM module - C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Convert generic global variables into either .global or .const access based +// on the variable's "constant" qualifier. +// +//===----------------------------------------------------------------------===// + +#include "NVPTX.h" +#include "NVPTXUtilities.h" +#include "MCTargetDesc/NVPTXBaseInfo.h" + +#include "llvm/PassManager.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/ADT/ValueMap.h" +#include "llvm/CodeGen/MachineFunctionAnalysis.h" +#include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/IRBuilder.h" + +using namespace llvm; + +namespace llvm { +void initializeGenericToNVVMPass(PassRegistry &); +} + +namespace { +class GenericToNVVM : public ModulePass { +public: + static char ID; + + GenericToNVVM() : ModulePass(ID) {} + + virtual bool runOnModule(Module &M); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + } + +private: + Value *getOrInsertCVTA(Module *M, Function *F, GlobalVariable *GV, + IRBuilder<> &Builder); + Value *remapConstant(Module *M, Function *F, Constant *C, + IRBuilder<> &Builder); + Value *remapConstantVectorOrConstantAggregate(Module *M, Function *F, + Constant *C, + IRBuilder<> &Builder); + Value *remapConstantExpr(Module *M, Function *F, ConstantExpr *C, + IRBuilder<> &Builder); + void remapNamedMDNode(Module *M, NamedMDNode *N); + MDNode *remapMDNode(Module *M, MDNode *N); + + typedef ValueMap GVMapTy; + typedef ValueMap ConstantToValueMapTy; + GVMapTy GVMap; + ConstantToValueMapTy ConstantToValueMap; +}; +} + +char GenericToNVVM::ID = 0; + +ModulePass *llvm::createGenericToNVVMPass() { return new GenericToNVVM(); } + +INITIALIZE_PASS( + GenericToNVVM, "generic-to-nvvm", + "Ensure that the global variables are in the global address space", false, + false) + +bool GenericToNVVM::runOnModule(Module &M) { + // Create a clone of each global variable that has the default address space. + // The clone is created with the global address space specifier, and the pair + // of original global variable and its clone is placed in the GVMap for later + // use. + + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + I != E;) { + GlobalVariable *GV = I++; + if (GV->getType()->getAddressSpace() == llvm::ADDRESS_SPACE_GENERIC && + !llvm::isTexture(*GV) && !llvm::isSurface(*GV) && + !GV->getName().startswith("llvm.")) { + GlobalVariable *NewGV = new GlobalVariable( + M, GV->getType()->getElementType(), GV->isConstant(), + GV->getLinkage(), GV->hasInitializer() ? GV->getInitializer() : NULL, + "", GV, GV->getThreadLocalMode(), llvm::ADDRESS_SPACE_GLOBAL); + NewGV->copyAttributesFrom(GV); + GVMap[GV] = NewGV; + } + } + + // Return immediately, if every global variable has a specific address space + // specifier. + if (GVMap.empty()) { + return false; + } + + // Walk through the instructions in function defitinions, and replace any use + // of original global variables in GVMap with a use of the corresponding + // copies in GVMap. If necessary, promote constants to instructions. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + if (I->isDeclaration()) { + continue; + } + IRBuilder<> Builder(I->getEntryBlock().getFirstNonPHIOrDbg()); + for (Function::iterator BBI = I->begin(), BBE = I->end(); BBI != BBE; + ++BBI) { + for (BasicBlock::iterator II = BBI->begin(), IE = BBI->end(); II != IE; + ++II) { + for (unsigned i = 0, e = II->getNumOperands(); i < e; ++i) { + Value *Operand = II->getOperand(i); + if (isa(Operand)) { + II->setOperand( + i, remapConstant(&M, I, cast(Operand), Builder)); + } + } + } + } + ConstantToValueMap.clear(); + } + + // Walk through the metadata section and update the debug information + // associated with the global variables in the default address space. + for (Module::named_metadata_iterator I = M.named_metadata_begin(), + E = M.named_metadata_end(); + I != E; I++) { + remapNamedMDNode(&M, I); + } + + // Walk through the global variable initializers, and replace any use of + // original global variables in GVMap with a use of the corresponding copies + // in GVMap. The copies need to be bitcast to the original global variable + // types, as we cannot use cvta in global variable initializers. + for (GVMapTy::iterator I = GVMap.begin(), E = GVMap.end(); I != E;) { + GlobalVariable *GV = I->first; + GlobalVariable *NewGV = I->second; + ++I; + Constant *BitCastNewGV = ConstantExpr::getBitCast(NewGV, GV->getType()); + // At this point, the remaining uses of GV should be found only in global + // variable initializers, as other uses have been already been removed + // while walking through the instructions in function definitions. + for (Value::use_iterator UI = GV->use_begin(), UE = GV->use_end(); + UI != UE;) { + Use &U = (UI++).getUse(); + U.set(BitCastNewGV); + } + std::string Name = GV->getName(); + GV->removeDeadConstantUsers(); + GV->eraseFromParent(); + NewGV->setName(Name); + } + GVMap.clear(); + + return true; +} + +Value *GenericToNVVM::getOrInsertCVTA(Module *M, Function *F, + GlobalVariable *GV, + IRBuilder<> &Builder) { + PointerType *GVType = GV->getType(); + Value *CVTA = NULL; + + // See if the address space conversion requires the operand to be bitcast + // to i8 addrspace(n)* first. + EVT ExtendedGVType = EVT::getEVT(GVType->getElementType(), true); + if (!ExtendedGVType.isInteger() && !ExtendedGVType.isFloatingPoint()) { + // A bitcast to i8 addrspace(n)* on the operand is needed. + LLVMContext &Context = M->getContext(); + unsigned int AddrSpace = GVType->getAddressSpace(); + Type *DestTy = PointerType::get(Type::getInt8Ty(Context), AddrSpace); + CVTA = Builder.CreateBitCast(GV, DestTy, "cvta"); + // Insert the address space conversion. + Type *ResultType = + PointerType::get(Type::getInt8Ty(Context), llvm::ADDRESS_SPACE_GENERIC); + SmallVector ParamTypes; + ParamTypes.push_back(ResultType); + ParamTypes.push_back(DestTy); + Function *CVTAFunction = Intrinsic::getDeclaration( + M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes); + CVTA = Builder.CreateCall(CVTAFunction, CVTA, "cvta"); + // Another bitcast from i8 * to * is + // required. + DestTy = + PointerType::get(GVType->getElementType(), llvm::ADDRESS_SPACE_GENERIC); + CVTA = Builder.CreateBitCast(CVTA, DestTy, "cvta"); + } else { + // A simple CVTA is enough. + SmallVector ParamTypes; + ParamTypes.push_back(PointerType::get(GVType->getElementType(), + llvm::ADDRESS_SPACE_GENERIC)); + ParamTypes.push_back(GVType); + Function *CVTAFunction = Intrinsic::getDeclaration( + M, Intrinsic::nvvm_ptr_global_to_gen, ParamTypes); + CVTA = Builder.CreateCall(CVTAFunction, GV, "cvta"); + } + + return CVTA; +} + +Value *GenericToNVVM::remapConstant(Module *M, Function *F, Constant *C, + IRBuilder<> &Builder) { + // If the constant C has been converted already in the given function F, just + // return the converted value. + ConstantToValueMapTy::iterator CTII = ConstantToValueMap.find(C); + if (CTII != ConstantToValueMap.end()) { + return CTII->second; + } + + Value *NewValue = C; + if (isa(C)) { + // If the constant C is a global variable and is found in GVMap, generate a + // set set of instructions that convert the clone of C with the global + // address space specifier to a generic pointer. + // The constant C cannot be used here, as it will be erased from the + // module eventually. And the clone of C with the global address space + // specifier cannot be used here either, as it will affect the types of + // other instructions in the function. Hence, this address space conversion + // is required. + GVMapTy::iterator I = GVMap.find(cast(C)); + if (I != GVMap.end()) { + NewValue = getOrInsertCVTA(M, F, I->second, Builder); + } + } else if (isa(C) || isa(C) || + isa(C)) { + // If any element in the constant vector or aggregate C is or uses a global + // variable in GVMap, the constant C needs to be reconstructed, using a set + // of instructions. + NewValue = remapConstantVectorOrConstantAggregate(M, F, C, Builder); + } else if (isa(C)) { + // If any operand in the constant expression C is or uses a global variable + // in GVMap, the constant expression C needs to be reconstructed, using a + // set of instructions. + NewValue = remapConstantExpr(M, F, cast(C), Builder); + } + + ConstantToValueMap[C] = NewValue; + return NewValue; +} + +Value *GenericToNVVM::remapConstantVectorOrConstantAggregate( + Module *M, Function *F, Constant *C, IRBuilder<> &Builder) { + bool OperandChanged = false; + SmallVector NewOperands; + unsigned NumOperands = C->getNumOperands(); + + // Check if any element is or uses a global variable in GVMap, and thus + // converted to another value. + for (unsigned i = 0; i < NumOperands; ++i) { + Value *Operand = C->getOperand(i); + Value *NewOperand = remapConstant(M, F, cast(Operand), Builder); + OperandChanged |= Operand != NewOperand; + NewOperands.push_back(NewOperand); + } + + // If none of the elements has been modified, return C as it is. + if (!OperandChanged) { + return C; + } + + // If any of the elements has been modified, construct the equivalent + // vector or aggregate value with a set instructions and the converted + // elements. + Value *NewValue = UndefValue::get(C->getType()); + if (isa(C)) { + for (unsigned i = 0; i < NumOperands; ++i) { + Value *Idx = ConstantInt::get(Type::getInt32Ty(M->getContext()), i); + NewValue = Builder.CreateInsertElement(NewValue, NewOperands[i], Idx); + } + } else { + for (unsigned i = 0; i < NumOperands; ++i) { + NewValue = + Builder.CreateInsertValue(NewValue, NewOperands[i], makeArrayRef(i)); + } + } + + return NewValue; +} + +Value *GenericToNVVM::remapConstantExpr(Module *M, Function *F, ConstantExpr *C, + IRBuilder<> &Builder) { + bool OperandChanged = false; + SmallVector NewOperands; + unsigned NumOperands = C->getNumOperands(); + + // Check if any operand is or uses a global variable in GVMap, and thus + // converted to another value. + for (unsigned i = 0; i < NumOperands; ++i) { + Value *Operand = C->getOperand(i); + Value *NewOperand = remapConstant(M, F, cast(Operand), Builder); + OperandChanged |= Operand != NewOperand; + NewOperands.push_back(NewOperand); + } + + // If none of the operands has been modified, return C as it is. + if (!OperandChanged) { + return C; + } + + // If any of the operands has been modified, construct the instruction with + // the converted operands. + unsigned Opcode = C->getOpcode(); + switch (Opcode) { + case Instruction::ICmp: + // CompareConstantExpr (icmp) + return Builder.CreateICmp(CmpInst::Predicate(C->getPredicate()), + NewOperands[0], NewOperands[1]); + case Instruction::FCmp: + // CompareConstantExpr (fcmp) + assert(false && "Address space conversion should have no effect " + "on float point CompareConstantExpr (fcmp)!"); + return C; + case Instruction::ExtractElement: + // ExtractElementConstantExpr + return Builder.CreateExtractElement(NewOperands[0], NewOperands[1]); + case Instruction::InsertElement: + // InsertElementConstantExpr + return Builder.CreateInsertElement(NewOperands[0], NewOperands[1], + NewOperands[2]); + case Instruction::ShuffleVector: + // ShuffleVector + return Builder.CreateShuffleVector(NewOperands[0], NewOperands[1], + NewOperands[2]); + case Instruction::ExtractValue: + // ExtractValueConstantExpr + return Builder.CreateExtractValue(NewOperands[0], C->getIndices()); + case Instruction::InsertValue: + // InsertValueConstantExpr + return Builder.CreateInsertValue(NewOperands[0], NewOperands[1], + C->getIndices()); + case Instruction::GetElementPtr: + // GetElementPtrConstantExpr + return cast(C)->isInBounds() + ? Builder.CreateGEP( + NewOperands[0], + makeArrayRef(&NewOperands[1], NumOperands - 1)) + : Builder.CreateInBoundsGEP( + NewOperands[0], + makeArrayRef(&NewOperands[1], NumOperands - 1)); + case Instruction::Select: + // SelectConstantExpr + return Builder.CreateSelect(NewOperands[0], NewOperands[1], NewOperands[2]); + default: + // BinaryConstantExpr + if (Instruction::isBinaryOp(Opcode)) { + return Builder.CreateBinOp(Instruction::BinaryOps(C->getOpcode()), + NewOperands[0], NewOperands[1]); + } + // UnaryConstantExpr + if (Instruction::isCast(Opcode)) { + return Builder.CreateCast(Instruction::CastOps(C->getOpcode()), + NewOperands[0], C->getType()); + } + assert(false && "GenericToNVVM encountered an unsupported ConstantExpr"); + return C; + } +} + +void GenericToNVVM::remapNamedMDNode(Module *M, NamedMDNode *N) { + + bool OperandChanged = false; + SmallVector NewOperands; + unsigned NumOperands = N->getNumOperands(); + + // Check if any operand is or contains a global variable in GVMap, and thus + // converted to another value. + for (unsigned i = 0; i < NumOperands; ++i) { + MDNode *Operand = N->getOperand(i); + MDNode *NewOperand = remapMDNode(M, Operand); + OperandChanged |= Operand != NewOperand; + NewOperands.push_back(NewOperand); + } + + // If none of the operands has been modified, return immediately. + if (!OperandChanged) { + return; + } + + // Replace the old operands with the new operands. + N->dropAllReferences(); + for (SmallVector::iterator I = NewOperands.begin(), + E = NewOperands.end(); + I != E; ++I) { + N->addOperand(*I); + } +} + +MDNode *GenericToNVVM::remapMDNode(Module *M, MDNode *N) { + + bool OperandChanged = false; + SmallVector NewOperands; + unsigned NumOperands = N->getNumOperands(); + + // Check if any operand is or contains a global variable in GVMap, and thus + // converted to another value. + for (unsigned i = 0; i < NumOperands; ++i) { + Value *Operand = N->getOperand(i); + Value *NewOperand = Operand; + if (Operand) { + if (isa(Operand)) { + GVMapTy::iterator I = GVMap.find(cast(Operand)); + if (I != GVMap.end()) { + NewOperand = I->second; + if (++i < NumOperands) { + NewOperands.push_back(NewOperand); + // Address space of the global variable follows the global variable + // in the global variable debug info (see createGlobalVariable in + // lib/Analysis/DIBuilder.cpp). + NewOperand = + ConstantInt::get(Type::getInt32Ty(M->getContext()), + I->second->getType()->getAddressSpace()); + } + } + } else if (isa(Operand)) { + NewOperand = remapMDNode(M, cast(Operand)); + } + } + OperandChanged |= Operand != NewOperand; + NewOperands.push_back(NewOperand); + } + + // If none of the operands has been modified, return N as it is. + if (!OperandChanged) { + return N; + } + + // If any of the operands has been modified, create a new MDNode with the new + // operands. + return MDNode::get(M->getContext(), makeArrayRef(NewOperands)); +} diff --git a/lib/Target/NVPTX/NVPTXIntrinsics.td b/lib/Target/NVPTX/NVPTXIntrinsics.td index 49e2568dfa2..2780ef40365 100644 --- a/lib/Target/NVPTX/NVPTXIntrinsics.td +++ b/lib/Target/NVPTX/NVPTXIntrinsics.td @@ -1510,38 +1510,12 @@ multiclass G_TO_NG { defm cvta_local : NG_TO_G<"local", int_nvvm_ptr_local_to_gen>; defm cvta_shared : NG_TO_G<"shared", int_nvvm_ptr_shared_to_gen>; defm cvta_global : NG_TO_G<"global", int_nvvm_ptr_global_to_gen>; +defm cvta_const : NG_TO_G<"const", int_nvvm_ptr_constant_to_gen>; defm cvta_to_local : G_TO_NG<"local", int_nvvm_ptr_gen_to_local>; defm cvta_to_shared : G_TO_NG<"shared", int_nvvm_ptr_gen_to_shared>; defm cvta_to_global : G_TO_NG<"global", int_nvvm_ptr_gen_to_global>; - -def cvta_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - "mov.u32 \t$result, $src;", - [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen Int32Regs:$src))]>; -def cvta_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - "mov.u64 \t$result, $src;", - [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen Int64Regs:$src))]>; - - - -// @TODO: Revisit this. There is a type -// contradiction between iPTRAny and iPTR for the def. -/*def cvta_const_addr : NVPTXInst<(outs Int32Regs:$result), (ins imemAny:$src), - "mov.u32 \t$result, $src;", - [(set Int32Regs:$result, (int_nvvm_ptr_constant_to_gen - (Wrapper tglobaladdr:$src)))]>; -def cvta_const_addr_64 : NVPTXInst<(outs Int64Regs:$result), (ins imemAny:$src), - "mov.u64 \t$result, $src;", - [(set Int64Regs:$result, (int_nvvm_ptr_constant_to_gen - (Wrapper tglobaladdr:$src)))]>;*/ - - -def cvta_to_const : NVPTXInst<(outs Int32Regs:$result), (ins Int32Regs:$src), - "mov.u32 \t$result, $src;", - [(set Int32Regs:$result, (int_nvvm_ptr_gen_to_constant Int32Regs:$src))]>; -def cvta_to_const_64 : NVPTXInst<(outs Int64Regs:$result), (ins Int64Regs:$src), - "mov.u64 \t$result, $src;", - [(set Int64Regs:$result, (int_nvvm_ptr_gen_to_constant Int64Regs:$src))]>; +defm cvta_to_const : G_TO_NG<"const", int_nvvm_ptr_gen_to_constant>; // nvvm.ptr.gen.to.param diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 5f35edf2191..507fcb5d6af 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -49,6 +49,7 @@ using namespace llvm; namespace llvm { void initializeNVVMReflectPass(PassRegistry&); +void initializeGenericToNVVMPass(PassRegistry&); } extern "C" void LLVMInitializeNVPTXTarget() { @@ -62,6 +63,7 @@ extern "C" void LLVMInitializeNVPTXTarget() { // FIXME: This pass is really intended to be invoked during IR optimization, // but it's very NVPTX-specific. initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); + initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); } NVPTXTargetMachine::NVPTXTargetMachine( @@ -102,6 +104,7 @@ public: return getTM(); } + virtual void addIRPasses(); virtual bool addInstSelector(); virtual bool addPreRegAlloc(); }; @@ -112,6 +115,11 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { return PassConfig; } +void NVPTXPassConfig::addIRPasses() { + TargetPassConfig::addIRPasses(); + addPass(createGenericToNVVMPass()); +} + bool NVPTXPassConfig::addInstSelector() { addPass(createLowerAggrCopies()); addPass(createSplitBBatBarPass()); diff --git a/test/CodeGen/NVPTX/generic-to-nvvm.ll b/test/CodeGen/NVPTX/generic-to-nvvm.ll new file mode 100644 index 00000000000..c9cb2f71f42 --- /dev/null +++ b/test/CodeGen/NVPTX/generic-to-nvvm.ll @@ -0,0 +1,25 @@ +; RUN: llc < %s -march=nvptx -mcpu=sm_20 -drvcuda | FileCheck %s + +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" + +; Ensure global variables in address space 0 are promoted to address space 1 + +; CHECK: .global .align 4 .u32 myglobal = 42; +@myglobal = internal global i32 42, align 4 +; CHECK: .global .align 4 .u32 myconst = 42; +@myconst = internal constant i32 42, align 4 + + +define void @foo(i32* %a, i32* %b) { +; CHECK: cvta.global.u32 + %ld1 = load i32* @myglobal +; CHECK: cvta.global.u32 + %ld2 = load i32* @myconst + store i32 %ld1, i32* %a + store i32 %ld2, i32* %b + ret void +} + + +!nvvm.annotations = !{!0} +!0 = metadata !{void (i32*, i32*)* @foo, metadata !"kernel", i32 1}