diff --git a/lib/Target/NVPTX/CMakeLists.txt b/lib/Target/NVPTX/CMakeLists.txt index 47baef66961..7da2fed4cd5 100644 --- a/lib/Target/NVPTX/CMakeLists.txt +++ b/lib/Target/NVPTX/CMakeLists.txt @@ -22,6 +22,7 @@ set(NVPTXCodeGen_sources NVPTXAllocaHoisting.cpp NVPTXAsmPrinter.cpp NVPTXUtilities.cpp + NVVMReflect.cpp ) add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources}) diff --git a/lib/Target/NVPTX/NVPTXTargetMachine.cpp b/lib/Target/NVPTX/NVPTXTargetMachine.cpp index 17cd9b75ba7..67ca6b58e5a 100644 --- a/lib/Target/NVPTX/NVPTXTargetMachine.cpp +++ b/lib/Target/NVPTX/NVPTXTargetMachine.cpp @@ -47,6 +47,10 @@ using namespace llvm; +namespace llvm { +void initializeNVVMReflectPass(PassRegistry&); +} + extern "C" void LLVMInitializeNVPTXTarget() { // Register the target. RegisterTargetMachine X(TheNVPTXTarget32); @@ -55,6 +59,9 @@ extern "C" void LLVMInitializeNVPTXTarget() { RegisterMCAsmInfo A(TheNVPTXTarget32); RegisterMCAsmInfo B(TheNVPTXTarget64); + // FIXME: This pass is really intended to be invoked during IR optimization, + // but it's very NVPTX-specific. + initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); } NVPTXTargetMachine::NVPTXTargetMachine( diff --git a/lib/Target/NVPTX/NVVMReflect.cpp b/lib/Target/NVPTX/NVVMReflect.cpp new file mode 100644 index 00000000000..3bbd1a13da0 --- /dev/null +++ b/lib/Target/NVPTX/NVVMReflect.cpp @@ -0,0 +1,193 @@ +//===- NVVMReflect.cpp - NVVM Emulate conditional compilation -------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This pass replaces occurences of __nvvm_reflect("string") with an +// integer based on -nvvm-reflect-list string= option given to this pass. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/StringMap.h" +#include "llvm/Pass.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_os_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include +#include +#include +#include + +#define NVVM_REFLECT_FUNCTION "__nvvm_reflect" + +using namespace llvm; + +namespace llvm { void initializeNVVMReflectPass(PassRegistry &); } + +namespace { +class LLVM_LIBRARY_VISIBILITY NVVMReflect : public ModulePass { +private: + //std::map VarMap; + StringMap VarMap; + typedef std::map::iterator VarMapIter; + Function *reflectFunction; + +public: + static char ID; + NVVMReflect() : ModulePass(ID) { + VarMap.clear(); + reflectFunction = 0; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); } + virtual bool runOnModule(Module &); + + void setVarMap(); +}; +} + +static cl::opt +NVVMReflectEnabled("nvvm-reflect-enable", cl::init(true), + cl::desc("NVVM reflection, enabled by default")); + +char NVVMReflect::ID = 0; +INITIALIZE_PASS(NVVMReflect, "nvvm-reflect", + "Replace occurences of __nvvm_reflect() calls with 0/1", false, + false) + +static cl::list +ReflectList("nvvm-reflect-list", cl::value_desc("name=0/1"), + cl::desc("A list of string=num assignments, where num=0 or 1"), + cl::ValueRequired); + +/// This function does the same operation as perl's split. +/// For example, calling this with ("a=1,b=2,c=0", ",") will +/// return ["a=1", "b=2", "c=0"] in the return std::vector. +static std::vector +Tokenize(const std::string &str, const std::string &delim) { + std::vector tokens; + + size_t p0 = 0, p1 = std::string::npos; + while (p0 != std::string::npos) { + p1 = str.find_first_of(delim, p0); + if (p1 != p0) { + std::string token = str.substr(p0, p1 - p0); + tokens.push_back(token); + } + p0 = str.find_first_not_of(delim, p1); + } + + return tokens; +} + +/// The command line can look as follows : +/// -R a=1,b=2 -R c=3,d=0 -R e=2 +/// The strings "a=1,b=2", "c=3,d=0", "e=2" are available in the +/// ReflectList vector. First, each of ReflectList[i] is 'split' +/// using "," as the delimiter. Then each of this part is split +/// using "=" as the delimiter. +void NVVMReflect::setVarMap() { + for (unsigned i = 0, e = ReflectList.size(); i != e; ++i) { + // DEBUG(dbgs() << "Option : " << ReflectList[i] << std::endl); + std::vector nameValList = Tokenize(ReflectList[i], ","); + for (unsigned j = 0, ej = nameValList.size(); j != ej; ++j) { + std::vector nameValPair = Tokenize(nameValList[j], "="); + assert(nameValPair.size() == 2 && "name=val expected"); + std::stringstream valstream(nameValPair[1]); + int val; + valstream >> val; + assert((!(valstream.fail())) && "integer value expected"); + VarMap[nameValPair[0]] = val; + } + } +} + +bool NVVMReflect::runOnModule(Module &M) { + if (!NVVMReflectEnabled) + return false; + + setVarMap(); + + reflectFunction = M.getFunction(NVVM_REFLECT_FUNCTION); + + // If reflect function is not used, then there will be + // no entry in the module. + if (reflectFunction == 0) { + return false; + } + + // Validate _reflect function + assert(reflectFunction->isDeclaration() && + "_reflect function should not have a body"); + assert(reflectFunction->getReturnType()->isIntegerTy() && + "_reflect's return type should be integer"); + + std::vector toRemove; + + // Go through the uses of reflectFunction in this Function. + // Each of them should a CallInst with a ConstantArray argument. + // First validate that. If the c-string corresponding to the + // ConstantArray can be found successfully, see if it can be + // found in VarMap. If so, replace the uses of CallInst with the + // value found in VarMap. If not, replace the use with value 0. + for (Value::use_iterator iter = reflectFunction->use_begin(), + iterEnd = reflectFunction->use_end(); + iter != iterEnd; ++iter) { + assert(isa(*iter) && "Only a call instruction can use _reflect"); + CallInst *reflect = cast(*iter); + + assert((reflect->getNumOperands() == 2) && + "Only one operand expect for _reflect function"); + // In cuda, we will have an extra constant-to-generic conversion of + // the string. + const Value *conv = reflect->getArgOperand(0); + assert(isa(conv) && "Expected a const-to-gen conversion"); + const CallInst *convcall = cast(conv); + const Value *str = convcall->getArgOperand(0); + assert(isa(str) && + "Format of _reflect function not recognized"); + const ConstantExpr *gep = cast(str); + + const Value *sym = gep->getOperand(0); + assert(isa(sym) && "Format of _reflect function not recognized"); + + const Constant *symstr = cast(sym); + + assert(isa(symstr->getOperand(0)) && + "Format of _reflect function not recognized"); + + assert(cast(symstr->getOperand(0))->isCString() && + "Format of _reflect function not recognized"); + + std::string reflectArg = + cast(symstr->getOperand(0))->getAsString(); + + reflectArg = reflectArg.substr(0, reflectArg.size() - 1); + // DEBUG(dbgs() << "Arg of _reflect : " << reflectArg << std::endl); + + int reflectVal = 0; // The default value is 0 + if (VarMap.find(reflectArg) != VarMap.end()) { + reflectVal = VarMap[reflectArg]; + } + reflect->replaceAllUsesWith( + ConstantInt::get(reflect->getType(), reflectVal)); + toRemove.push_back(reflect); + } + if (toRemove.size() == 0) + return false; + + for (unsigned i = 0, e = toRemove.size(); i != e; ++i) + toRemove[i]->eraseFromParent(); + return true; +} diff --git a/test/CodeGen/NVPTX/nvvm-reflect.ll b/test/CodeGen/NVPTX/nvvm-reflect.ll new file mode 100644 index 00000000000..0d02194651e --- /dev/null +++ b/test/CodeGen/NVPTX/nvvm-reflect.ll @@ -0,0 +1,34 @@ +; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=0 -O2 | FileCheck %s --check-prefix=USE_MUL_0 +; RUN: opt < %s -S -nvvm-reflect -nvvm-reflect-list USE_MUL=1 -O2 | FileCheck %s --check-prefix=USE_MUL_1 + +@str = private addrspace(4) unnamed_addr constant [8 x i8] c"USE_MUL\00" + +declare i32 @__nvvm_reflect(i8*) +declare i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)*) + +define float @foo(float %a, float %b) { +; USE_MUL_0: define float @foo +; USE_MUL_0-NOT: call i32 @__nvvm_reflect +; USE_MUL_1: define float @foo +; USE_MUL_1-NOT: call i32 @__nvvm_reflect + %ptr = tail call i8* @llvm.nvvm.ptr.constant.to.gen.p0i8.p4i8(i8 addrspace(4)* getelementptr inbounds ([8 x i8] addrspace(4)* @str, i32 0, i32 0)) + %reflect = tail call i32 @__nvvm_reflect(i8* %ptr) + %cmp = icmp ugt i32 %reflect, 0 + br i1 %cmp, label %use_mul, label %use_add + +use_mul: +; USE_MUL_1: fmul float %a, %b +; USE_MUL_0-NOT: fadd float %a, %b + %ret1 = fmul float %a, %b + br label %exit + +use_add: +; USE_MUL_0: fadd float %a, %b +; USE_MUL_1-NOT: fmul float %a, %b + %ret2 = fadd float %a, %b + br label %exit + +exit: + %ret = phi float [%ret1, %use_mul], [%ret2, %use_add] + ret float %ret +}