Add functions for finding ephemeral values

This adds a set of utility functions for collecting 'ephemeral' values. These
are LLVM IR values that are used only by @llvm.assume intrinsics (directly or
indirectly), and thus will be removed prior to code generation, implying that
they should be considered free for certain purposes (like inlining). The
inliner's cost analysis, and a few other passes, have been updated to account
for ephemeral values using the provided functionality.

This functionality is important for the usability of @llvm.assume, because it
limits the "non-local" side-effects of adding llvm.assume on inlining, loop
unrolling, etc. (these are hints, and do not generate code, so they should not
directly contribute to estimates of execution cost).

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217335 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2014-09-07 13:49:57 +00:00
parent 3666e7f4c1
commit 3d03d60ca8
9 changed files with 222 additions and 17 deletions

View File

@ -16,10 +16,13 @@
#define LLVM_ANALYSIS_CODEMETRICS_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/CallSite.h"
namespace llvm {
class AssumptionTracker;
class BasicBlock;
class Loop;
class Function;
class Instruction;
class DataLayout;
@ -85,7 +88,18 @@ struct CodeMetrics {
NumInlineCandidates(0), NumVectorInsts(0), NumRets(0) {}
/// \brief Add information about a block to the current state.
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI);
void analyzeBasicBlock(const BasicBlock *BB, const TargetTransformInfo &TTI,
SmallPtrSetImpl<const Value*> &EphValues);
/// \brief Collect a loop's ephemeral values (those used only by an assume
/// or similar intrinsics in the loop).
static void collectEphemeralValues(const Loop *L, AssumptionTracker *AT,
SmallPtrSetImpl<const Value*> &EphValues);
/// \brief Collect a functions's ephemeral values (those used only by an
/// assume or similar intrinsics in the function).
static void collectEphemeralValues(const Function *L, AssumptionTracker *AT,
SmallPtrSetImpl<const Value*> &EphValues);
};
}

View File

@ -19,6 +19,7 @@
#include <climits>
namespace llvm {
class AssumptionTracker;
class CallSite;
class DataLayout;
class Function;
@ -100,6 +101,7 @@ public:
/// \brief Cost analyzer used by inliner.
class InlineCostAnalysis : public CallGraphSCCPass {
const TargetTransformInfo *TTI;
AssumptionTracker *AT;
public:
static char ID;

View File

@ -11,23 +11,99 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/Support/Debug.h"
#define DEBUG_TYPE "code-metrics"
using namespace llvm;
static void completeEphemeralValues(SmallVector<const Value *, 16> &WorkSet,
SmallPtrSetImpl<const Value*> &EphValues) {
SmallPtrSet<const Value *, 32> Visited;
// Make sure that all of the items in WorkSet are in our EphValues set.
EphValues.insert(WorkSet.begin(), WorkSet.end());
// Note: We don't speculate PHIs here, so we'll miss instruction chains kept
// alive only by ephemeral values.
while (!WorkSet.empty()) {
const Value *V = WorkSet.pop_back_val();
if (!Visited.insert(V))
continue;
// If all uses of this value are ephemeral, then so is this value.
bool FoundNEUse = false;
for (const User *I : V->users())
if (!EphValues.count(I)) {
FoundNEUse = true;
break;
}
if (FoundNEUse)
continue;
EphValues.insert(V);
DEBUG(dbgs() << "Ephemeral Value: " << *V << "\n");
if (const User *U = dyn_cast<User>(V))
for (const Value *J : U->operands()) {
if (isSafeToSpeculativelyExecute(J))
WorkSet.push_back(J);
}
}
}
// Find all ephemeral values.
void CodeMetrics::collectEphemeralValues(const Loop *L, AssumptionTracker *AT,
SmallPtrSetImpl<const Value*> &EphValues) {
SmallVector<const Value *, 16> WorkSet;
for (auto &I : AT->assumptions(L->getHeader()->getParent())) {
// Filter out call sites outside of the loop so we don't to a function's
// worth of work for each of its loops (and, in the common case, ephemeral
// values in the loop are likely due to @llvm.assume calls in the loop).
if (!L->contains(I->getParent()))
continue;
WorkSet.push_back(I);
}
completeEphemeralValues(WorkSet, EphValues);
}
void CodeMetrics::collectEphemeralValues(const Function *F, AssumptionTracker *AT,
SmallPtrSetImpl<const Value*> &EphValues) {
SmallVector<const Value *, 16> WorkSet;
for (auto &I : AT->assumptions(const_cast<Function*>(F)))
WorkSet.push_back(I);
completeEphemeralValues(WorkSet, EphValues);
}
/// analyzeBasicBlock - Fill in the current structure with information gleaned
/// from the specified block.
void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB,
const TargetTransformInfo &TTI) {
const TargetTransformInfo &TTI,
SmallPtrSetImpl<const Value*> &EphValues) {
++NumBlocks;
unsigned NumInstsBeforeThisBB = NumInsts;
for (BasicBlock::const_iterator II = BB->begin(), E = BB->end();
II != E; ++II) {
// Skip ephemeral values.
if (EphValues.count(II))
continue;
// Special handling for calls.
if (isa<CallInst>(II) || isa<InvokeInst>(II)) {
ImmutableCallSite CS(cast<Instruction>(II));

View File

@ -17,7 +17,9 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/CallSite.h"
@ -49,6 +51,9 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
/// The TargetTransformInfo available for this compilation.
const TargetTransformInfo &TTI;
/// The cache of @llvm.assume intrinsics.
AssumptionTracker *AT;
// The called function.
Function &F;
@ -104,7 +109,7 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V);
// Custom analysis routines.
bool analyzeBlock(BasicBlock *BB);
bool analyzeBlock(BasicBlock *BB, SmallPtrSetImpl<const Value *> &EphValues);
// Disable several entry points to the visitor so we don't accidentally use
// them by declaring but not defining them here.
@ -141,8 +146,8 @@ class CallAnalyzer : public InstVisitor<CallAnalyzer, bool> {
public:
CallAnalyzer(const DataLayout *DL, const TargetTransformInfo &TTI,
Function &Callee, int Threshold)
: DL(DL), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0),
AssumptionTracker *AT, Function &Callee, int Threshold)
: DL(DL), TTI(TTI), AT(AT), F(Callee), Threshold(Threshold), Cost(0),
IsCallerRecursive(false), IsRecursiveCall(false),
ExposesReturnsTwice(false), HasDynamicAlloca(false),
ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false),
@ -778,7 +783,7 @@ bool CallAnalyzer::visitCallSite(CallSite CS) {
// during devirtualization and so we want to give it a hefty bonus for
// inlining, but cap that bonus in the event that inlining wouldn't pan
// out. Pretend to inline the function, with a custom threshold.
CallAnalyzer CA(DL, TTI, *F, InlineConstants::IndirectCallThreshold);
CallAnalyzer CA(DL, TTI, AT, *F, InlineConstants::IndirectCallThreshold);
if (CA.analyzeCall(CS)) {
// We were able to inline the indirect call! Subtract the cost from the
// bonus we want to apply, but don't go below zero.
@ -881,7 +886,8 @@ bool CallAnalyzer::visitInstruction(Instruction &I) {
/// aborts early if the threshold has been exceeded or an impossible to inline
/// construct has been detected. It returns false if inlining is no longer
/// viable, and true if inlining remains viable.
bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
bool CallAnalyzer::analyzeBlock(BasicBlock *BB,
SmallPtrSetImpl<const Value *> &EphValues) {
for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
// FIXME: Currently, the number of instructions in a function regardless of
// our ability to simplify them during inline to constants or dead code,
@ -893,6 +899,10 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) {
if (isa<DbgInfoIntrinsic>(I))
continue;
// Skip ephemeral values.
if (EphValues.count(I))
continue;
++NumInstructions;
if (isa<ExtractElementInst>(I) || I->getType()->isVectorTy())
++NumVectorInstructions;
@ -1096,6 +1106,12 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size();
NumAllocaArgs = SROAArgValues.size();
// FIXME: If a caller has multiple calls to a callee, we end up recomputing
// the ephemeral values multiple times (and they're completely determined by
// the callee, so this is purely duplicate work).
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(&F, AT, EphValues);
// The worklist of live basic blocks in the callee *after* inlining. We avoid
// adding basic blocks of the callee which can be proven to be dead for this
// particular call site in order to get more accurate cost estimates. This
@ -1129,7 +1145,7 @@ bool CallAnalyzer::analyzeCall(CallSite CS) {
// Analyze the cost of this block. If we blow through the threshold, this
// returns false, and we can bail on out.
if (!analyzeBlock(BB)) {
if (!analyzeBlock(BB, EphValues)) {
if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca ||
HasIndirectBr)
return false;
@ -1217,6 +1233,7 @@ void CallAnalyzer::dump() {
INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis",
true, true)
@ -1228,12 +1245,14 @@ InlineCostAnalysis::~InlineCostAnalysis() {}
void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<AssumptionTracker>();
AU.addRequired<TargetTransformInfo>();
CallGraphSCCPass::getAnalysisUsage(AU);
}
bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) {
TTI = &getAnalysis<TargetTransformInfo>();
AT = &getAnalysis<AssumptionTracker>();
return false;
}
@ -1290,7 +1309,7 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee,
DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName()
<< "...\n");
CallAnalyzer CA(Callee->getDataLayout(), *TTI, *Callee, Threshold);
CallAnalyzer CA(Callee->getDataLayout(), *TTI, AT, *Callee, Threshold);
bool ShouldInline = CA.analyzeCall(CS);
DEBUG(CA.dump());

View File

@ -13,6 +13,7 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionTracker.h"
#include "llvm/Analysis/CodeMetrics.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/LoopPass.h"
@ -53,6 +54,7 @@ namespace {
// LCSSA form makes instruction renaming easier.
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<AssumptionTracker>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
@ -72,12 +74,14 @@ namespace {
unsigned MaxHeaderSize;
LoopInfo *LI;
const TargetTransformInfo *TTI;
AssumptionTracker *AT;
};
}
char LoopRotate::ID = 0;
INITIALIZE_PASS_BEGIN(LoopRotate, "loop-rotate", "Rotate Loops", false, false)
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
INITIALIZE_PASS_DEPENDENCY(LoopInfo)
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
INITIALIZE_PASS_DEPENDENCY(LCSSA)
@ -98,6 +102,7 @@ bool LoopRotate::runOnLoop(Loop *L, LPPassManager &LPM) {
LI = &getAnalysis<LoopInfo>();
TTI = &getAnalysis<TargetTransformInfo>();
AT = &getAnalysis<AssumptionTracker>();
// Simplify the loop latch before attempting to rotate the header
// upward. Rotation may not be needed if the loop tail can be folded into the
@ -323,8 +328,11 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
// Check size of original header and reject loop if it is very big or we can't
// duplicate blocks inside it.
{
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, AT, EphValues);
CodeMetrics Metrics;
Metrics.analyzeBasicBlock(OrigHeader, *TTI);
Metrics.analyzeBasicBlock(OrigHeader, *TTI, EphValues);
if (Metrics.notDuplicatable) {
DEBUG(dbgs() << "LoopRotation: NOT rotating - contains non-duplicatable"
<< " instructions: "; L->dump());

View File

@ -203,11 +203,15 @@ Pass *llvm::createSimpleLoopUnrollPass() {
/// ApproximateLoopSize - Approximate the size of the loop.
static unsigned ApproximateLoopSize(const Loop *L, unsigned &NumCalls,
bool &NotDuplicatable,
const TargetTransformInfo &TTI) {
const TargetTransformInfo &TTI,
AssumptionTracker *AT) {
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, AT, EphValues);
CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
Metrics.analyzeBasicBlock(*I, TTI);
Metrics.analyzeBasicBlock(*I, TTI, EphValues);
NumCalls = Metrics.NumInlineCandidates;
NotDuplicatable = Metrics.notDuplicatable;
@ -391,7 +395,7 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
unsigned NumInlineCandidates;
bool notDuplicatable;
unsigned LoopSize =
ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI);
ApproximateLoopSize(L, NumInlineCandidates, notDuplicatable, TTI, AT);
DEBUG(dbgs() << " Loop Size = " << LoopSize << "\n");
uint64_t UnrolledSize = (uint64_t)LoopSize * Count;
if (notDuplicatable) {

View File

@ -104,7 +104,8 @@ namespace {
// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.
bool countLoop(const Loop *L, const TargetTransformInfo &TTI);
bool countLoop(const Loop *L, const TargetTransformInfo &TTI,
AssumptionTracker *AT);
// Clean all data related to given loop.
void forgetLoop(const Loop *L);
@ -215,7 +216,8 @@ namespace {
// Analyze loop. Check its size, calculate is it possible to unswitch
// it. Returns true if we can unswitch this loop.
bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI,
AssumptionTracker *AT) {
LoopPropsMapIt PropsIt;
bool Inserted;
@ -232,13 +234,16 @@ bool LUAnalysisCache::countLoop(const Loop *L, const TargetTransformInfo &TTI) {
// large numbers of branches which cause loop unswitching to go crazy.
// This is a very ad-hoc heuristic.
SmallPtrSet<const Value *, 32> EphValues;
CodeMetrics::collectEphemeralValues(L, AT, EphValues);
// FIXME: This is overly conservative because it does not take into
// consideration code simplification opportunities and code that can
// be shared by the resultant unswitched loops.
CodeMetrics Metrics;
for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
I != E; ++I)
Metrics.analyzeBasicBlock(*I, TTI);
Metrics.analyzeBasicBlock(*I, TTI, EphValues);
Props.SizeEstimation = std::min(Metrics.NumInsts, Metrics.NumBlocks * 5);
Props.CanBeUnswitchedCount = MaxSize / (Props.SizeEstimation);
@ -426,7 +431,8 @@ bool LoopUnswitch::processCurrentLoop() {
// Probably we reach the quota of branches for this loop. If so
// stop unswitching.
if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>()))
if (!BranchesInfo.countLoop(currentLoop, getAnalysis<TargetTransformInfo>(),
AT))
return false;
// Loop over all of the basic blocks in the loop. If we find an interior

View File

@ -0,0 +1,32 @@
; RUN: opt -S -Oz %s | FileCheck %s
@a = global i32 4
define i1 @inner() {
%a1 = load volatile i32* @a
%x1 = add i32 %a1, %a1
%c = icmp eq i32 %x1, 0
; Here are enough instructions to prevent inlining, but because they are used
; only by the @llvm.assume intrinsic, they're free (and, thus, inlining will
; still happen).
%a2 = mul i32 %a1, %a1
%a3 = sub i32 %a1, 5
%a4 = udiv i32 %a3, -13
%a5 = mul i32 %a4, %a4
%a6 = add i32 %a5, %x1
%ca = icmp sgt i32 %a6, -7
tail call void @llvm.assume(i1 %ca)
ret i1 %c
}
; @inner() should be inlined for -Oz.
; CHECK-NOT: call i1 @inner
define i1 @outer() optsize {
%r = call i1 @inner()
ret i1 %r
}
declare void @llvm.assume(i1) nounwind

View File

@ -0,0 +1,44 @@
; RUN: opt < %s -S -loop-unroll -unroll-threshold=50 | FileCheck %s
; Make sure this loop is completely unrolled...
; CHECK-LABEL: @test1
; CHECK: for.body:
; CHECK-NOT: for.end:
define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
entry:
br label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
%sum.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
%arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
%0 = load i32* %arrayidx, align 4
; This loop will be completely unrolled, even with these extra instructions,
; but only because they're ephemeral (and, thus, free).
%1 = add nsw i32 %0, 2
%2 = add nsw i32 %1, 4
%3 = add nsw i32 %2, 4
%4 = add nsw i32 %3, 4
%5 = add nsw i32 %4, 4
%6 = add nsw i32 %5, 4
%7 = add nsw i32 %6, 4
%8 = add nsw i32 %7, 4
%9 = add nsw i32 %8, 4
%10 = add nsw i32 %9, 4
%ca = icmp sgt i32 %10, -7
call void @llvm.assume(i1 %ca)
%add = add nsw i32 %0, %sum.01
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, 5
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body
ret i32 %add
}
declare void @llvm.assume(i1) nounwind