Add a new attribute, 'noduplicate'. If a function contains a noduplicate call, the call cannot be duplicated - Jump threading, loop unrolling, loop unswitching, and loop rotation are inhibited if they would duplicate the call.

Similarly inlining of the function is inhibited, if that would duplicate the call (in particular inlining is still allowed when there is only one callsite and the function has internal linkage).



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@170704 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
James Molloy 2012-12-20 16:04:27 +00:00
parent 6af228a92a
commit 67ae135759
21 changed files with 280 additions and 16 deletions

View File

@ -845,6 +845,17 @@ example:
show that no exceptions passes by it. This is normally the case for
the ELF x86-64 abi, but it can be disabled for some compilation
units.
``noduplicate``
This attribute indicates that calls to the function cannot be
duplicated. A call to a ``noduplicate`` function may be moved
within its parent function, but may not be duplicated within
its parent function.
A function containing a ``noduplicate`` call may still
be an inlining candidate, provided that the call is not
duplicated by inlining. That implies that the function has
internal linkage and only has one call site, so the original
call is dead after inlining.
.. _moduleasm:

View File

@ -46,8 +46,11 @@ namespace llvm {
/// \brief True if this function calls itself.
bool isRecursive;
/// \brief True if this function contains one or more indirect branches.
bool containsIndirectBr;
/// \brief True if this function cannot be duplicated.
///
/// True if this function contains one or more indirect branches, or it contains
/// one or more 'noduplicate' instructions.
bool notDuplicatable;
/// \brief True if this function calls alloca (in the C sense).
bool usesDynamicAlloca;
@ -79,7 +82,7 @@ namespace llvm {
unsigned NumRets;
CodeMetrics() : exposesReturnsTwice(false), isRecursive(false),
containsIndirectBr(false), usesDynamicAlloca(false),
notDuplicatable(false), usesDynamicAlloca(false),
NumInsts(0), NumBlocks(0), NumCalls(0),
NumInlineCandidates(0), NumVectorInsts(0),
NumRets(0) {}

View File

@ -67,6 +67,7 @@ public:
Nest, ///< Nested function static chain
NoAlias, ///< Considered to not alias after call
NoCapture, ///< Function creates no aliases of pointer
NoDuplicate, ///< Call cannot be duplicated
NoImplicitFloat, ///< Disable implicit floating point insts
NoInline, ///< inline=never
NonLazyBind, ///< Function is called early and/or
@ -223,7 +224,8 @@ public:
.removeAttribute(Attribute::NonLazyBind)
.removeAttribute(Attribute::ReturnsTwice)
.removeAttribute(Attribute::AddressSafety)
.removeAttribute(Attribute::MinSize);
.removeAttribute(Attribute::MinSize)
.removeAttribute(Attribute::NoDuplicate);
}
uint64_t Raw() const { return Bits; }

View File

@ -250,6 +250,14 @@ public:
addFnAttr(Attribute::NoUnwind);
}
/// @brief Determine if the call cannot be duplicated.
bool cannotDuplicate() const {
return getFnAttributes().hasAttribute(Attribute::NoDuplicate);
}
void setCannotDuplicate() {
addFnAttr(Attribute::NoDuplicate);
}
/// @brief True if the ABI mandates (or the user requested) that this
/// function be in a unwind table.
bool hasUWTable() const {

View File

@ -1336,6 +1336,13 @@ public:
Attribute::get(getContext(), Attribute::NoUnwind));
}
/// \brief Determine if the call cannot be duplicated.
bool cannotDuplicate() const {return hasFnAttr(Attribute::NoDuplicate); }
void setCannotDuplicate() {
addAttribute(AttributeSet::FunctionIndex,
Attribute::get(getContext(), Attribute::NoDuplicate));
}
/// \brief Determine if the call returns a structure through first
/// pointer argument.
bool hasStructRetAttr() const {

View File

@ -165,6 +165,14 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
++NumVectorInsts;
if (const CallInst *CI = dyn_cast<CallInst>(II))
if (CI->hasFnAttr(Attribute::NoDuplicate))
notDuplicatable = true;
if (const InvokeInst *InvI = dyn_cast<InvokeInst>(II))
if (InvI->hasFnAttr(Attribute::NoDuplicate))
notDuplicatable = true;
++NumInsts;
}
@ -182,8 +190,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
// if someone is using a blockaddress without an indirectbr, and that
// reference somehow ends up in another function or global, we probably
// don't want to inline this function.
if (isa<IndirectBrInst>(BB->getTerminator()))
containsIndirectBr = true;
notDuplicatable |= isa<IndirectBrInst>(BB->getTerminator());
// Remember NumInsts for this BB.
NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB;

View File

@ -54,6 +54,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
bool IsRecursiveCall;
bool ExposesReturnsTwice;
bool HasDynamicAlloca;
bool ContainsNoDuplicateCall;
/// Number of bytes allocated statically by the callee.
uint64_t AllocatedSize;
unsigned NumInstructions, NumVectorInstructions;
@ -128,8 +130,8 @@ public:
CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold)
: TD(TD), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0),
NumInstructions(0), NumVectorInstructions(0),
ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false),
AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0),
FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
NumConstantPtrCmps(0), NumConstantPtrDiffs(0),
@ -615,6 +617,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
ExposesReturnsTwice = true;
return false;
}
if (CS.isCall() &&
cast<CallInst>(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate))
ContainsNoDuplicateCall = true;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CS.getInstruction())) {
switch (II->getIntrinsicID()) {
@ -842,7 +847,9 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// If there is only one call of the function, and it has internal linkage,
// the cost of inlining it drops dramatically.
if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction())
bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() &&
&F == CS.getCalledFunction();
if (OnlyOneCallAndLocalLinkage)
Cost += InlineConstants::LastCallToStaticBonus;
// If the instruction after the call, or if the normal destination of the
@ -1008,6 +1015,12 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
}
}
// If this is a noduplicate call, we can still inline as long as
// inlining this would cause the removal of the caller (so the instruction
// is not actually duplicated, just moved).
if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall)
return false;
Threshold += VectorBonus;
return Cost < Threshold;
@ -1025,6 +1038,7 @@ void CallAnalyzer::dump() {
DEBUG_PRINT_STAT(NumInstructionsSimplified);
DEBUG_PRINT_STAT(SROACostSavings);
DEBUG_PRINT_STAT(SROACostSavingsLost);
DEBUG_PRINT_STAT(ContainsNoDuplicateCall);
#undef DEBUG_PRINT_STAT
}
#endif

View File

@ -213,10 +213,22 @@ bool Loop::isLoopSimplifyForm() const {
/// isSafeToClone - Return true if the loop body is safe to clone in practice.
/// Routines that reform the loop CFG and split edges often fail on indirectbr.
bool Loop::isSafeToClone() const {
// Return false if any loop blocks contain indirectbrs.
// Return false if any loop blocks contain indirectbrs, or there are any calls
// to noduplicate functions.
for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) {
if (isa<IndirectBrInst>((*I)->getTerminator()))
if (isa<IndirectBrInst>((*I)->getTerminator())) {
return false;
} else if (const InvokeInst *II = dyn_cast<InvokeInst>((*I)->getTerminator())) {
if (II->hasFnAttr(Attribute::NoDuplicate))
return false;
}
for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) {
if (const CallInst *CI = dyn_cast<CallInst>(BI)) {
if (CI->hasFnAttr(Attribute::NoDuplicate))
return false;
}
}
}
return true;
}

View File

@ -564,6 +564,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(nonlazybind);
KEYWORD(address_safety);
KEYWORD(minsize);
KEYWORD(noduplicate);
KEYWORD(type);
KEYWORD(opaque);

View File

@ -957,6 +957,7 @@ bool LLParser::ParseOptionalFuncAttrs(AttrBuilder &B) {
case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break;
case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break;
case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break;
case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break;
// Error handling.
case lltok::kw_zeroext:
@ -1042,6 +1043,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_byval: case lltok::kw_nest:
HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute");
break;
case lltok::kw_noreturn: case lltok::kw_nounwind:
case lltok::kw_uwtable: case lltok::kw_returns_twice:
case lltok::kw_noinline: case lltok::kw_readnone:
@ -1052,6 +1054,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
case lltok::kw_naked: case lltok::kw_nonlazybind:
case lltok::kw_address_safety: case lltok::kw_minsize:
case lltok::kw_alignstack: case lltok::kw_align:
case lltok::kw_noduplicate:
HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute");
break;
}

View File

@ -116,6 +116,7 @@ namespace lltok {
kw_nonlazybind,
kw_address_safety,
kw_minsize,
kw_noduplicate,
kw_type,
kw_opaque,

View File

@ -249,7 +249,11 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB,
// as having cost of 2 total, and if they are a vector intrinsic, we model
// them as having cost 1.
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
if (!isa<IntrinsicInst>(CI))
if (CI->hasFnAttr(Attribute::NoDuplicate))
// Blocks with NoDuplicate are modelled as having infinite cost, so they
// are never duplicated.
return ~0U;
else if (!isa<IntrinsicInst>(CI))
Size += 3;
else if (!CI->getType()->isVectorTy())
Size += 1;

View File

@ -274,10 +274,16 @@ bool LoopRotate::rotateLoop(Loop *L) {
if (OrigLatch == 0 || L->isLoopExiting(OrigLatch))
return false;
// Check size of original header and reject loop if it is very big.
// Check size of original header and reject loop if it is very big or we can't
// duplicate blocks inside it.
{
CodeMetrics Metrics;
Metrics.analyzeBasicBlock(OrigHeader);
if (Metrics.notDuplicatable) {
DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable"
<< " instructions: "; L->dump());
return false;
}
if (Metrics.NumInsts > MAX_HEADER_SIZE)
return false;
}

View File

@ -113,12 +113,13 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) {
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
const DataLayout *TD) {
bool &NotDuplicatable, const DataLayout *TD) {
CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
Metrics.analyzeBasicBlock(*I, TD);
NumCalls = Metrics.NumInlineCandidates;
NotDuplicatable = Metrics.notDuplicatable;
unsigned LoopSize = Metrics.NumInsts;
@ -181,8 +182,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
if (Threshold != NoThreshold) {
const DataLayout *TD = getAnalysisIfAvailable<DataLayout>();
unsigned NumInlineCandidates;
unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD);
bool notDuplicatable;
unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates,
notDuplicatable, TD);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
if (notDuplicatable) {
DEBUG(dbgs() << " Not unrolling loop which contains non duplicatable"
<< " instructions.\n");
return false;
}
if (NumInlineCandidates != 0) {
DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n");
return false;

View File

@ -248,6 +248,13 @@ bool LUAnalysisCache::countLoop(const Loop* L) {
Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount;
if (Metrics.notDuplicatable) {
DEBUG(dbgs() << "NOT unswitching loop %"
<< L->getHeader()->getName() << ", contents cannot be "
<< "duplicated!\n");
return false;
}
}
if (!Props.CanBeUnswitchedCount) {

View File

@ -213,6 +213,8 @@ std::string Attribute::getAsString() const {
Result += utostr(getAlignment());
Result += " ";
}
if (hasAttribute(Attribute::NoDuplicate))
Result += "noduplicate ";
// Trim the trailing space.
assert(!Result.empty() && "Unknown attribute!");
Result.erase(Result.end()-1);
@ -327,6 +329,7 @@ uint64_t AttributeImpl::getAttrMask(uint64_t Val) {
case Attribute::NonLazyBind: return 1U << 31;
case Attribute::AddressSafety: return 1ULL << 32;
case Attribute::MinSize: return 1ULL << 33;
case Attribute::NoDuplicate: return 1ULL << 34;
}
llvm_unreachable("Unsupported attribute type");
}

View File

@ -45,3 +45,48 @@ define i32 @test2(i1 %cond) {
; CHECK-NOT: = alloca
; CHECK: ret i32
}
declare void @barrier() noduplicate
define internal i32 @f() {
call void @barrier() noduplicate
ret i32 1
}
define i32 @g() {
call void @barrier() noduplicate
ret i32 2
}
define internal i32 @h() {
call void @barrier() noduplicate
ret i32 3
}
define i32 @test3() {
%b = call i32 @f()
ret i32 %b
}
; The call to @f cannot be inlined as there is another callsite
; calling @f, and @f contains a noduplicate call.
;
; The call to @g cannot be inlined as it has external linkage.
;
; The call to @h *can* be inlined.
; CHECK: @test
define i32 @test() {
; CHECK: call i32 @f()
%a = call i32 @f()
; CHECK: call i32 @g()
%b = call i32 @g()
; CHECK-NOT: call i32 @h()
%c = call i32 @h()
%d = add i32 %a, %b
%e = add i32 %d, %c
ret i32 %e
; CHECK: }
}

View File

@ -476,3 +476,39 @@ exit1:
; CHECK: }
}
; In this test we check that block duplication is inhibited by the presence
; of a function with the 'noduplicate' attribute.
declare void @g()
declare void @j()
declare void @k()
; CHECK: define void @h(i32 %p) {
define void @h(i32 %p) {
%x = icmp ult i32 %p, 5
br i1 %x, label %l1, label %l2
l1:
call void @j()
br label %l3
l2:
call void @k()
br label %l3
l3:
; CHECK: call void @g() noduplicate
; CHECK-NOT: call void @g() noduplicate
call void @g() noduplicate
%y = icmp ult i32 %p, 5
br i1 %y, label %l4, label %l5
l4:
call void @j()
ret void
l5:
call void @k()
ret void
; CHECK: }
}

View File

@ -33,3 +33,29 @@ for.end: ; preds = %for.cond
declare void @g(i32*)
; CHECK: @test2
define void @test2() nounwind ssp {
entry:
%array = alloca [20 x i32], align 16
br label %for.cond
for.cond: ; preds = %for.body, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%cmp = icmp slt i32 %i.0, 100
; CHECK: call void @f
; CHECK-NOT: call void @f
call void @f() noduplicate
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%inc = add nsw i32 %i.0, 1
call void @h()
br label %for.cond
for.end: ; preds = %for.cond
ret void
; CHECK: }
}
declare void @f() noduplicate
declare void @h()

View File

@ -22,3 +22,26 @@ l1: ; preds = %l1, %entry
l2: ; preds = %l1
ret i32 0
}
; This should not unroll since the call is 'noduplicate'.
; CHECK: @test2
define i32 @test2(i8** %P) nounwind ssp {
entry:
br label %l1
l1: ; preds = %l1, %entry
%x.0 = phi i32 [ 0, %entry ], [ %inc, %l1 ]
; CHECK: call void @f()
; CHECK-NOT: call void @f()
call void @f() noduplicate
%inc = add nsw i32 %x.0, 1
%exitcond = icmp eq i32 %inc, 3
br i1 %exitcond, label %l2, label %l1
l2: ; preds = %l1
ret i32 0
; CHECK: }
}
declare void @f()

View File

@ -1,4 +1,4 @@
; RUN: opt < %s -loop-unswitch -disable-output
; RUN: opt < %s -loop-unswitch -verify-loop-info -S < %s 2>&1 | FileCheck %s
define i32 @test(i32* %A, i1 %C) {
entry:
@ -29,3 +29,40 @@ return: ; preds = %endif, %then
ret i32 %tmp.13
}
; This simple test would normally unswitch, but should be inhibited by the presence of
; the noduplicate call.
; CHECK: @test2
define i32 @test2(i32* %var) {
%mem = alloca i32
store i32 2, i32* %mem
%c = load i32* %mem
br label %loop_begin
loop_begin:
%var_val = load i32* %var
switch i32 %c, label %default [
i32 1, label %inc
i32 2, label %dec
]
inc:
call void @incf() noreturn nounwind
br label %loop_begin
dec:
; CHECK: call void @decf()
; CHECK-NOT: call void @decf()
call void @decf() noreturn nounwind noduplicate
br label %loop_begin
default:
br label %loop_exit
loop_exit:
ret i32 0
; CHECK: }
}
declare void @incf() noreturn
declare void @decf() noreturn