[NVPTX] Emit .pragma "nounroll" for loops marked with nounroll

Summary:
CUDA driver can unroll loops when jit-compiling PTX. To prevent CUDA
driver from unrolling a loop marked with llvm.loop.unroll.disable is not
unrolled by CUDA driver, we need to emit .pragma "nounroll" at the
header of that loop.

This patch also extracts getting unroll metadata from loop ID metadata
into a shared helper function.

Test Plan: test/CodeGen/NVPTX/nounroll.ll

Reviewers: eliben, meheff, jholewinski

Reviewed By: jholewinski

Subscribers: jholewinski, llvm-commits

Differential Revision: http://reviews.llvm.org/D7041

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227703 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jingyue Wu 2015-02-01 02:27:45 +00:00
parent 58e57f1604
commit f15b696b79
7 changed files with 124 additions and 26 deletions

View File

@ -238,10 +238,6 @@ public:
/// ///
void EmitAlignment(unsigned NumBits, const GlobalObject *GO = nullptr) const; void EmitAlignment(unsigned NumBits, const GlobalObject *GO = nullptr) const;
/// This method prints the label for the specified MachineBasicBlock, an
/// alignment (if present) and a comment describing it if appropriate.
void EmitBasicBlockStart(const MachineBasicBlock &MBB) const;
/// Lower the specified LLVM Constant to an MCExpr. /// Lower the specified LLVM Constant to an MCExpr.
const MCExpr *lowerConstant(const Constant *CV); const MCExpr *lowerConstant(const Constant *CV);
@ -271,6 +267,12 @@ public:
/// function. /// function.
virtual void EmitFunctionBodyEnd() {} virtual void EmitFunctionBodyEnd() {}
/// Targets can override this to emit stuff at the start of a basic block.
/// By default, this method prints the label for the specified
/// MachineBasicBlock, an alignment (if present) and a comment describing it
/// if appropriate.
virtual void EmitBasicBlockStart(const MachineBasicBlock &MBB) const;
/// Targets can override this to emit stuff at the end of a basic block. /// Targets can override this to emit stuff at the end of a basic block.
virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB) {} virtual void EmitBasicBlockEnd(const MachineBasicBlock &MBB) {}

View File

@ -16,12 +16,15 @@
#ifndef LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H #ifndef LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H
#define LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H #define LLVM_TRANSFORMS_UTILS_UNROLLLOOP_H
#include "llvm/ADT/StringRef.h"
namespace llvm { namespace llvm {
class AssumptionCache; class AssumptionCache;
class Loop; class Loop;
class LoopInfo; class LoopInfo;
class LPPassManager; class LPPassManager;
class MDNode;
class Pass; class Pass;
bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime, bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime,
@ -30,6 +33,8 @@ bool UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, bool AllowRuntime,
bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI, bool UnrollRuntimeLoopProlog(Loop *L, unsigned Count, LoopInfo *LI,
LPPassManager* LPM); LPPassManager* LPM);
const MDNode *GetUnrollMetadata(const MDNode *LoopID, StringRef Name);
} }
#endif #endif

View File

@ -27,6 +27,7 @@
#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ConstantFolding.h"
#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/Analysis.h"
#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/IR/DebugInfo.h" #include "llvm/IR/DebugInfo.h"
@ -45,6 +46,7 @@
#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TimeValue.h" #include "llvm/Support/TimeValue.h"
#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Transforms/Utils/UnrollLoop.h"
#include <sstream> #include <sstream>
using namespace llvm; using namespace llvm;
@ -418,6 +420,44 @@ void NVPTXAsmPrinter::printReturnValStr(const MachineFunction &MF,
printReturnValStr(F, O); printReturnValStr(F, O);
} }
// Return true if MBB is the header of a loop marked with
// llvm.loop.unroll.disable.
// TODO(jingyue): consider "#pragma unroll 1" which is equivalent to "#pragma
// nounroll".
bool NVPTXAsmPrinter::isLoopHeaderOfNoUnroll(
const MachineBasicBlock &MBB) const {
MachineLoopInfo &LI = getAnalysis<MachineLoopInfo>();
// TODO(jingyue): isLoopHeader() should take "const MachineBasicBlock *".
// We insert .pragma "nounroll" only to the loop header.
if (!LI.isLoopHeader(const_cast<MachineBasicBlock *>(&MBB)))
return false;
// llvm.loop.unroll.disable is marked on the back edges of a loop. Therefore,
// we iterate through each back edge of the loop with header MBB, and check
// whether its metadata contains llvm.loop.unroll.disable.
for (auto I = MBB.pred_begin(); I != MBB.pred_end(); ++I) {
const MachineBasicBlock *PMBB = *I;
if (LI.getLoopFor(PMBB) != LI.getLoopFor(&MBB)) {
// Edges from other loops to MBB are not back edges.
continue;
}
if (const BasicBlock *PBB = PMBB->getBasicBlock()) {
if (const MDNode *LoopID =
PBB->getTerminator()->getMetadata("llvm.loop")) {
if (GetUnrollMetadata(LoopID, "llvm.loop.unroll.disable"))
return true;
}
}
}
return false;
}
void NVPTXAsmPrinter::EmitBasicBlockStart(const MachineBasicBlock &MBB) const {
AsmPrinter::EmitBasicBlockStart(MBB);
if (isLoopHeaderOfNoUnroll(MBB))
OutStreamer.EmitRawText(StringRef("\t.pragma \"nounroll\";\n"));
}
void NVPTXAsmPrinter::EmitFunctionEntryLabel() { void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
SmallString<128> Str; SmallString<128> Str;
raw_svector_ostream O(Str); raw_svector_ostream O(Str);

View File

@ -187,6 +187,7 @@ private:
const Function *F; const Function *F;
std::string CurrentFnName; std::string CurrentFnName;
void EmitBasicBlockStart(const MachineBasicBlock &MBB) const override;
void EmitFunctionEntryLabel() override; void EmitFunctionEntryLabel() override;
void EmitFunctionBodyStart() override; void EmitFunctionBodyStart() override;
void EmitFunctionBodyEnd() override; void EmitFunctionBodyEnd() override;
@ -281,6 +282,8 @@ private:
MCOperand &MCOp); MCOperand &MCOp);
void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp); void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp);
bool isLoopHeaderOfNoUnroll(const MachineBasicBlock &MBB) const;
LineReader *reader; LineReader *reader;
LineReader *getReader(std::string); LineReader *getReader(std::string);
@ -311,6 +314,11 @@ public:
delete reader; delete reader;
} }
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineLoopInfo>();
AsmPrinter::getAnalysisUsage(AU);
}
bool ignoreLoc(const MachineInstr &); bool ignoreLoc(const MachineInstr &);
std::string getVirtualRegisterName(unsigned) const; std::string getVirtualRegisterName(unsigned) const;

View File

@ -234,44 +234,27 @@ static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
// Returns the loop hint metadata node with the given name (for example, // Returns the loop hint metadata node with the given name (for example,
// "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is // "llvm.loop.unroll.count"). If no such metadata node exists, then nullptr is
// returned. // returned.
static const MDNode *GetUnrollMetadata(const Loop *L, StringRef Name) { static const MDNode *GetUnrollMetadataForLoop(const Loop *L, StringRef Name) {
MDNode *LoopID = L->getLoopID(); MDNode *LoopID = L->getLoopID();
if (!LoopID) if (!LoopID)
return nullptr; return nullptr;
return GetUnrollMetadata(LoopID, Name);
// First operand should refer to the loop id itself.
assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
if (!MD)
continue;
const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
if (!S)
continue;
if (Name.equals(S->getString()))
return MD;
}
return nullptr;
} }
// Returns true if the loop has an unroll(full) pragma. // Returns true if the loop has an unroll(full) pragma.
static bool HasUnrollFullPragma(const Loop *L) { static bool HasUnrollFullPragma(const Loop *L) {
return GetUnrollMetadata(L, "llvm.loop.unroll.full"); return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.full");
} }
// Returns true if the loop has an unroll(disable) pragma. // Returns true if the loop has an unroll(disable) pragma.
static bool HasUnrollDisablePragma(const Loop *L) { static bool HasUnrollDisablePragma(const Loop *L) {
return GetUnrollMetadata(L, "llvm.loop.unroll.disable"); return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable");
} }
// If loop has an unroll_count pragma return the (necessarily // If loop has an unroll_count pragma return the (necessarily
// positive) value from the pragma. Otherwise return 0. // positive) value from the pragma. Otherwise return 0.
static unsigned UnrollCountPragmaValue(const Loop *L) { static unsigned UnrollCountPragmaValue(const Loop *L) {
const MDNode *MD = GetUnrollMetadata(L, "llvm.loop.unroll.count"); const MDNode *MD = GetUnrollMetadataForLoop(L, "llvm.loop.unroll.count");
if (MD) { if (MD) {
assert(MD->getNumOperands() == 2 && assert(MD->getNumOperands() == 2 &&
"Unroll count hint metadata should have two operands."); "Unroll count hint metadata should have two operands.");

View File

@ -549,3 +549,26 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount,
return true; return true;
} }
/// Given an llvm.loop loop id metadata node, returns the loop hint metadata
/// node with the given name (for example, "llvm.loop.unroll.count"). If no
/// such metadata node exists, then nullptr is returned.
const MDNode *llvm::GetUnrollMetadata(const MDNode *LoopID, StringRef Name) {
// First operand should refer to the loop id itself.
assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
if (!MD)
continue;
const MDString *S = dyn_cast<MDString>(MD->getOperand(0));
if (!S)
continue;
if (Name.equals(S->getString()))
return MD;
}
return nullptr;
}

View File

@ -0,0 +1,37 @@
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
target triple = "nvptx64-unknown-unknown"
; Compiled from the following CUDA code:
;
; #pragma nounroll
; for (int i = 0; i < 2; ++i)
; output[i] = input[i];
define void @nounroll(float* %input, float* %output) {
; CHECK-LABEL: .visible .func nounroll(
entry:
br label %for.body
for.body:
; CHECK: .pragma "nounroll"
%i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%idxprom = sext i32 %i.06 to i64
%arrayidx = getelementptr inbounds float* %input, i64 %idxprom
%0 = load float* %arrayidx, align 4
; CHECK: ld.f32
%arrayidx2 = getelementptr inbounds float* %output, i64 %idxprom
store float %0, float* %arrayidx2, align 4
; CHECK: st.f32
%inc = add nuw nsw i32 %i.06, 1
%exitcond = icmp eq i32 %inc, 2
br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
; CHECK-NOT: ld.f32
; CHECK-NOT: st.f32
for.end:
ret void
}
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.unroll.disable"}