diff --git a/docs/LangRef.rst b/docs/LangRef.rst index 1ea475dee65..3c49ed0a877 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -845,6 +845,17 @@ example: show that no exceptions passes by it. This is normally the case for the ELF x86-64 abi, but it can be disabled for some compilation units. +``noduplicate`` + This attribute indicates that calls to the function cannot be + duplicated. A call to a ``noduplicate`` function may be moved + within its parent function, but may not be duplicated within + its parent function. + + A function containing a ``noduplicate`` call may still + be an inlining candidate, provided that the call is not + duplicated by inlining. That implies that the function has + internal linkage and only has one call site, so the original + call is dead after inlining. .. _moduleasm: diff --git a/include/llvm/Analysis/CodeMetrics.h b/include/llvm/Analysis/CodeMetrics.h index 4398faa20a7..35eabec6eec 100644 --- a/include/llvm/Analysis/CodeMetrics.h +++ b/include/llvm/Analysis/CodeMetrics.h @@ -46,8 +46,11 @@ namespace llvm { /// \brief True if this function calls itself. bool isRecursive; - /// \brief True if this function contains one or more indirect branches. - bool containsIndirectBr; + /// \brief True if this function cannot be duplicated. + /// + /// True if this function contains one or more indirect branches, or it contains + /// one or more 'noduplicate' instructions. + bool notDuplicatable; /// \brief True if this function calls alloca (in the C sense). bool usesDynamicAlloca; @@ -79,7 +82,7 @@ namespace llvm { unsigned NumRets; CodeMetrics() : exposesReturnsTwice(false), isRecursive(false), - containsIndirectBr(false), usesDynamicAlloca(false), + notDuplicatable(false), usesDynamicAlloca(false), NumInsts(0), NumBlocks(0), NumCalls(0), NumInlineCandidates(0), NumVectorInsts(0), NumRets(0) {} diff --git a/include/llvm/Attributes.h b/include/llvm/Attributes.h index e158d7e8f29..ab4735fe367 100644 --- a/include/llvm/Attributes.h +++ b/include/llvm/Attributes.h @@ -67,6 +67,7 @@ public: Nest, ///< Nested function static chain NoAlias, ///< Considered to not alias after call NoCapture, ///< Function creates no aliases of pointer + NoDuplicate, ///< Call cannot be duplicated NoImplicitFloat, ///< Disable implicit floating point insts NoInline, ///< inline=never NonLazyBind, ///< Function is called early and/or @@ -223,7 +224,8 @@ public: .removeAttribute(Attribute::NonLazyBind) .removeAttribute(Attribute::ReturnsTwice) .removeAttribute(Attribute::AddressSafety) - .removeAttribute(Attribute::MinSize); + .removeAttribute(Attribute::MinSize) + .removeAttribute(Attribute::NoDuplicate); } uint64_t Raw() const { return Bits; } diff --git a/include/llvm/Function.h b/include/llvm/Function.h index 2f555e89d8e..f665db2a042 100644 --- a/include/llvm/Function.h +++ b/include/llvm/Function.h @@ -250,6 +250,14 @@ public: addFnAttr(Attribute::NoUnwind); } + /// @brief Determine if the call cannot be duplicated. + bool cannotDuplicate() const { + return getFnAttributes().hasAttribute(Attribute::NoDuplicate); + } + void setCannotDuplicate() { + addFnAttr(Attribute::NoDuplicate); + } + /// @brief True if the ABI mandates (or the user requested) that this /// function be in a unwind table. bool hasUWTable() const { diff --git a/include/llvm/Instructions.h b/include/llvm/Instructions.h index be865a10476..b8ce3a3b781 100644 --- a/include/llvm/Instructions.h +++ b/include/llvm/Instructions.h @@ -1336,6 +1336,13 @@ public: Attribute::get(getContext(), Attribute::NoUnwind)); } + /// \brief Determine if the call cannot be duplicated. + bool cannotDuplicate() const {return hasFnAttr(Attribute::NoDuplicate); } + void setCannotDuplicate() { + addAttribute(AttributeSet::FunctionIndex, + Attribute::get(getContext(), Attribute::NoDuplicate)); + } + /// \brief Determine if the call returns a structure through first /// pointer argument. bool hasStructRetAttr() const { diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 84b3266c7be..401c1beaa42 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -165,6 +165,14 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, if (isa(II) || II->getType()->isVectorTy()) ++NumVectorInsts; + if (const CallInst *CI = dyn_cast(II)) + if (CI->hasFnAttr(Attribute::NoDuplicate)) + notDuplicatable = true; + + if (const InvokeInst *InvI = dyn_cast(II)) + if (InvI->hasFnAttr(Attribute::NoDuplicate)) + notDuplicatable = true; + ++NumInsts; } @@ -182,8 +190,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, // if someone is using a blockaddress without an indirectbr, and that // reference somehow ends up in another function or global, we probably // don't want to inline this function. - if (isa(BB->getTerminator())) - containsIndirectBr = true; + notDuplicatable |= isa(BB->getTerminator()); // Remember NumInsts for this BB. NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index f88cabb6a4a..de89fea1193 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -54,6 +54,8 @@ class CallAnalyzer : public InstVisitor { bool IsRecursiveCall; bool ExposesReturnsTwice; bool HasDynamicAlloca; + bool ContainsNoDuplicateCall; + /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; unsigned NumInstructions, NumVectorInstructions; @@ -128,8 +130,8 @@ public: CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold) : TD(TD), F(Callee), Threshold(Threshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), - ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0), - NumInstructions(0), NumVectorInstructions(0), + ExposesReturnsTwice(false), HasDynamicAlloca(false), ContainsNoDuplicateCall(false), + AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), NumConstantPtrDiffs(0), @@ -615,6 +617,9 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { ExposesReturnsTwice = true; return false; } + if (CS.isCall() && + cast(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) + ContainsNoDuplicateCall = true; if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { switch (II->getIntrinsicID()) { @@ -842,7 +847,9 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { // If there is only one call of the function, and it has internal linkage, // the cost of inlining it drops dramatically. - if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction()) + bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && + &F == CS.getCalledFunction(); + if (OnlyOneCallAndLocalLinkage) Cost += InlineConstants::LastCallToStaticBonus; // If the instruction after the call, or if the normal destination of the @@ -1008,6 +1015,12 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { } } + // If this is a noduplicate call, we can still inline as long as + // inlining this would cause the removal of the caller (so the instruction + // is not actually duplicated, just moved). + if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) + return false; + Threshold += VectorBonus; return Cost < Threshold; @@ -1025,6 +1038,7 @@ void CallAnalyzer::dump() { DEBUG_PRINT_STAT(NumInstructionsSimplified); DEBUG_PRINT_STAT(SROACostSavings); DEBUG_PRINT_STAT(SROACostSavingsLost); + DEBUG_PRINT_STAT(ContainsNoDuplicateCall); #undef DEBUG_PRINT_STAT } #endif diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 1457bea0140..d88f8a8093b 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -213,10 +213,22 @@ bool Loop::isLoopSimplifyForm() const { /// isSafeToClone - Return true if the loop body is safe to clone in practice. /// Routines that reform the loop CFG and split edges often fail on indirectbr. bool Loop::isSafeToClone() const { - // Return false if any loop blocks contain indirectbrs. + // Return false if any loop blocks contain indirectbrs, or there are any calls + // to noduplicate functions. for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) { - if (isa((*I)->getTerminator())) + if (isa((*I)->getTerminator())) { return false; + } else if (const InvokeInst *II = dyn_cast((*I)->getTerminator())) { + if (II->hasFnAttr(Attribute::NoDuplicate)) + return false; + } + + for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) { + if (const CallInst *CI = dyn_cast(BI)) { + if (CI->hasFnAttr(Attribute::NoDuplicate)) + return false; + } + } } return true; } diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 350eef2b194..5300f9276b6 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -564,6 +564,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(nonlazybind); KEYWORD(address_safety); KEYWORD(minsize); + KEYWORD(noduplicate); KEYWORD(type); KEYWORD(opaque); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 7ba32b83f9a..c45ad0a083e 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -957,6 +957,7 @@ bool LLParser::ParseOptionalFuncAttrs(AttrBuilder &B) { case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break; case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break; case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break; + case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break; // Error handling. case lltok::kw_zeroext: @@ -1042,6 +1043,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_byval: case lltok::kw_nest: HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute"); break; + case lltok::kw_noreturn: case lltok::kw_nounwind: case lltok::kw_uwtable: case lltok::kw_returns_twice: case lltok::kw_noinline: case lltok::kw_readnone: @@ -1052,6 +1054,7 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_naked: case lltok::kw_nonlazybind: case lltok::kw_address_safety: case lltok::kw_minsize: case lltok::kw_alignstack: case lltok::kw_align: + case lltok::kw_noduplicate: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; } diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 2ed2c221a87..5b4d415d8ed 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -116,6 +116,7 @@ namespace lltok { kw_nonlazybind, kw_address_safety, kw_minsize, + kw_noduplicate, kw_type, kw_opaque, diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 4a4cd705e21..c004c9a1a7d 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -249,7 +249,11 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB, // as having cost of 2 total, and if they are a vector intrinsic, we model // them as having cost 1. if (const CallInst *CI = dyn_cast(I)) { - if (!isa(CI)) + if (CI->hasFnAttr(Attribute::NoDuplicate)) + // Blocks with NoDuplicate are modelled as having infinite cost, so they + // are never duplicated. + return ~0U; + else if (!isa(CI)) Size += 3; else if (!CI->getType()->isVectorTy()) Size += 1; diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp index 249baf51645..83e049b94c2 100644 --- a/lib/Transforms/Scalar/LoopRotation.cpp +++ b/lib/Transforms/Scalar/LoopRotation.cpp @@ -274,10 +274,16 @@ bool LoopRotate::rotateLoop(Loop *L) { if (OrigLatch == 0 || L->isLoopExiting(OrigLatch)) return false; - // Check size of original header and reject loop if it is very big. + // Check size of original header and reject loop if it is very big or we can't + // duplicate blocks inside it. { CodeMetrics Metrics; Metrics.analyzeBasicBlock(OrigHeader); + if (Metrics.notDuplicatable) { + DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non duplicatable" + << " instructions: "; L->dump()); + return false; + } if (Metrics.NumInsts > MAX_HEADER_SIZE) return false; } diff --git a/lib/Transforms/Scalar/LoopUnrollPass.cpp b/lib/Transforms/Scalar/LoopUnrollPass.cpp index de6b9952db7..2a04af33d99 100644 --- a/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -113,12 +113,13 @@ Pass *llvm::createLoopUnrollPass(int Threshold, int Count, int AllowPartial) { /// ApproximateLoopSize - Approximate the size of the loop. static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls, - const DataLayout *TD) { + bool &NotDuplicatable, const DataLayout *TD) { CodeMetrics Metrics; for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); I != E; ++I) Metrics.analyzeBasicBlock(*I, TD); NumCalls = Metrics.NumInlineCandidates; + NotDuplicatable = Metrics.notDuplicatable; unsigned LoopSize = Metrics.NumInsts; @@ -181,8 +182,15 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { if (Threshold != NoThreshold) { const DataLayout *TD = getAnalysisIfAvailable(); unsigned NumInlineCandidates; - unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, TD); + bool notDuplicatable; + unsigned LoopSize = ApproximateLoopSize(L, NumInlineCandidates, + notDuplicatable, TD); DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n"); + if (notDuplicatable) { + DEBUG(dbgs() << " Not unrolling loop which contains non duplicatable" + << " instructions.\n"); + return false; + } if (NumInlineCandidates != 0) { DEBUG(dbgs() << " Not unrolling loop with inlinable calls.\n"); return false; diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp index 17ffda60fdf..e2e42e5054f 100644 --- a/lib/Transforms/Scalar/LoopUnswitch.cpp +++ b/lib/Transforms/Scalar/LoopUnswitch.cpp @@ -248,6 +248,13 @@ bool LUAnalysisCache::countLoop(const Loop* L) { Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5); Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation); MaxSize -= Props.SizeEstimation * Props.CanBeUnswitchedCount; + + if (Metrics.notDuplicatable) { + DEBUG(dbgs() << "NOT unswitching loop %" + << L->getHeader()->getName() << ", contents cannot be " + << "duplicated!\n"); + return false; + } } if (!Props.CanBeUnswitchedCount) { diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp index 2782455995a..5bde5638a1e 100644 --- a/lib/VMCore/Attributes.cpp +++ b/lib/VMCore/Attributes.cpp @@ -213,6 +213,8 @@ std::string Attribute::getAsString() const { Result += utostr(getAlignment()); Result += " "; } + if (hasAttribute(Attribute::NoDuplicate)) + Result += "noduplicate "; // Trim the trailing space. assert(!Result.empty() && "Unknown attribute!"); Result.erase(Result.end()-1); @@ -327,6 +329,7 @@ uint64_t AttributeImpl::getAttrMask(uint64_t Val) { case Attribute::NonLazyBind: return 1U << 31; case Attribute::AddressSafety: return 1ULL << 32; case Attribute::MinSize: return 1ULL << 33; + case Attribute::NoDuplicate: return 1ULL << 34; } llvm_unreachable("Unsupported attribute type"); } diff --git a/test/Transforms/Inline/basictest.ll b/test/Transforms/Inline/basictest.ll index 609a3d4e153..39e25cb5d62 100644 --- a/test/Transforms/Inline/basictest.ll +++ b/test/Transforms/Inline/basictest.ll @@ -45,3 +45,48 @@ define i32 @test2(i1 %cond) { ; CHECK-NOT: = alloca ; CHECK: ret i32 } + +declare void @barrier() noduplicate + +define internal i32 @f() { + call void @barrier() noduplicate + ret i32 1 +} + +define i32 @g() { + call void @barrier() noduplicate + ret i32 2 +} + +define internal i32 @h() { + call void @barrier() noduplicate + ret i32 3 +} + +define i32 @test3() { + %b = call i32 @f() + ret i32 %b +} + +; The call to @f cannot be inlined as there is another callsite +; calling @f, and @f contains a noduplicate call. +; +; The call to @g cannot be inlined as it has external linkage. +; +; The call to @h *can* be inlined. + +; CHECK: @test +define i32 @test() { +; CHECK: call i32 @f() + %a = call i32 @f() +; CHECK: call i32 @g() + %b = call i32 @g() +; CHECK-NOT: call i32 @h() + %c = call i32 @h() + + %d = add i32 %a, %b + %e = add i32 %d, %c + + ret i32 %e +; CHECK: } +} diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll index 46271379bd0..f889c02c6f3 100644 --- a/test/Transforms/JumpThreading/basic.ll +++ b/test/Transforms/JumpThreading/basic.ll @@ -476,3 +476,39 @@ exit1: ; CHECK: } } +; In this test we check that block duplication is inhibited by the presence +; of a function with the 'noduplicate' attribute. + +declare void @g() +declare void @j() +declare void @k() + +; CHECK: define void @h(i32 %p) { +define void @h(i32 %p) { + %x = icmp ult i32 %p, 5 + br i1 %x, label %l1, label %l2 + +l1: + call void @j() + br label %l3 + +l2: + call void @k() + br label %l3 + +l3: +; CHECK: call void @g() noduplicate +; CHECK-NOT: call void @g() noduplicate + call void @g() noduplicate + %y = icmp ult i32 %p, 5 + br i1 %y, label %l4, label %l5 + +l4: + call void @j() + ret void + +l5: + call void @k() + ret void +; CHECK: } +} diff --git a/test/Transforms/LoopRotate/basic.ll b/test/Transforms/LoopRotate/basic.ll index b7bcb21d56f..cb5d3f9be8a 100644 --- a/test/Transforms/LoopRotate/basic.ll +++ b/test/Transforms/LoopRotate/basic.ll @@ -33,3 +33,29 @@ for.end: ; preds = %for.cond declare void @g(i32*) +; CHECK: @test2 +define void @test2() nounwind ssp { +entry: + %array = alloca [20 x i32], align 16 + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %cmp = icmp slt i32 %i.0, 100 +; CHECK: call void @f +; CHECK-NOT: call void @f + call void @f() noduplicate + br i1 %cmp, label %for.body, label %for.end + +for.body: ; preds = %for.cond + %inc = add nsw i32 %i.0, 1 + call void @h() + br label %for.cond + +for.end: ; preds = %for.cond + ret void +; CHECK: } +} + +declare void @f() noduplicate +declare void @h() diff --git a/test/Transforms/LoopUnroll/basic.ll b/test/Transforms/LoopUnroll/basic.ll index eeb3e9a57b0..ab5bc568ede 100644 --- a/test/Transforms/LoopUnroll/basic.ll +++ b/test/Transforms/LoopUnroll/basic.ll @@ -22,3 +22,26 @@ l1: ; preds = %l1, %entry l2: ; preds = %l1 ret i32 0 } + +; This should not unroll since the call is 'noduplicate'. + +; CHECK: @test2 +define i32 @test2(i8** %P) nounwind ssp { +entry: + br label %l1 + +l1: ; preds = %l1, %entry + %x.0 = phi i32 [ 0, %entry ], [ %inc, %l1 ] +; CHECK: call void @f() +; CHECK-NOT: call void @f() + call void @f() noduplicate + %inc = add nsw i32 %x.0, 1 + %exitcond = icmp eq i32 %inc, 3 + br i1 %exitcond, label %l2, label %l1 + +l2: ; preds = %l1 + ret i32 0 +; CHECK: } +} + +declare void @f() diff --git a/test/Transforms/LoopUnswitch/basictest.ll b/test/Transforms/LoopUnswitch/basictest.ll index 1e6f2cf15ee..e98d82b6522 100644 --- a/test/Transforms/LoopUnswitch/basictest.ll +++ b/test/Transforms/LoopUnswitch/basictest.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -loop-unswitch -disable-output +; RUN: opt < %s -loop-unswitch -verify-loop-info -S < %s 2>&1 | FileCheck %s define i32 @test(i32* %A, i1 %C) { entry: @@ -29,3 +29,40 @@ return: ; preds = %endif, %then ret i32 %tmp.13 } +; This simple test would normally unswitch, but should be inhibited by the presence of +; the noduplicate call. + +; CHECK: @test2 +define i32 @test2(i32* %var) { + %mem = alloca i32 + store i32 2, i32* %mem + %c = load i32* %mem + + br label %loop_begin + +loop_begin: + + %var_val = load i32* %var + + switch i32 %c, label %default [ + i32 1, label %inc + i32 2, label %dec + ] + +inc: + call void @incf() noreturn nounwind + br label %loop_begin +dec: +; CHECK: call void @decf() +; CHECK-NOT: call void @decf() + call void @decf() noreturn nounwind noduplicate + br label %loop_begin +default: + br label %loop_exit +loop_exit: + ret i32 0 +; CHECK: } +} + +declare void @incf() noreturn +declare void @decf() noreturn