mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-03-01 01:30:36 +00:00
[LoopVectorize] Ignore @llvm.assume for cost estimates and legality
A few minor changes to prevent @llvm.assume from interfering with loop vectorization. First, treat @llvm.assume like the lifetime intrinsics, which are scalarized (but don't otherwise interfere with the legality checking). Second, ignore the cost of ephemeral instructions in the loop (these will go away anyway during CodeGen). Alignment assumptions and other uses of @llvm.assume can often end up inside of loops that should be vectorized (this is not uncommon for assumptions generated by __attribute__((align_value(n))), for example). git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@219741 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3a1045d8db
commit
75277b9f70
@ -99,7 +99,7 @@ getIntrinsicIDForCall(CallInst *CI, const TargetLibraryInfo *TLI) {
|
|||||||
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
|
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(CI)) {
|
||||||
Intrinsic::ID ID = II->getIntrinsicID();
|
Intrinsic::ID ID = II->getIntrinsicID();
|
||||||
if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
|
if (isTriviallyVectorizable(ID) || ID == Intrinsic::lifetime_start ||
|
||||||
ID == Intrinsic::lifetime_end)
|
ID == Intrinsic::lifetime_end || ID == Intrinsic::assume)
|
||||||
return ID;
|
return ID;
|
||||||
else
|
else
|
||||||
return Intrinsic::not_intrinsic;
|
return Intrinsic::not_intrinsic;
|
||||||
|
@ -55,7 +55,9 @@
|
|||||||
#include "llvm/ADT/StringExtras.h"
|
#include "llvm/ADT/StringExtras.h"
|
||||||
#include "llvm/Analysis/AliasAnalysis.h"
|
#include "llvm/Analysis/AliasAnalysis.h"
|
||||||
#include "llvm/Analysis/AliasSetTracker.h"
|
#include "llvm/Analysis/AliasSetTracker.h"
|
||||||
|
#include "llvm/Analysis/AssumptionTracker.h"
|
||||||
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
#include "llvm/Analysis/BlockFrequencyInfo.h"
|
||||||
|
#include "llvm/Analysis/CodeMetrics.h"
|
||||||
#include "llvm/Analysis/LoopInfo.h"
|
#include "llvm/Analysis/LoopInfo.h"
|
||||||
#include "llvm/Analysis/LoopIterator.h"
|
#include "llvm/Analysis/LoopIterator.h"
|
||||||
#include "llvm/Analysis/LoopPass.h"
|
#include "llvm/Analysis/LoopPass.h"
|
||||||
@ -884,8 +886,12 @@ public:
|
|||||||
LoopVectorizationLegality *Legal,
|
LoopVectorizationLegality *Legal,
|
||||||
const TargetTransformInfo &TTI,
|
const TargetTransformInfo &TTI,
|
||||||
const DataLayout *DL, const TargetLibraryInfo *TLI,
|
const DataLayout *DL, const TargetLibraryInfo *TLI,
|
||||||
const Function *F, const LoopVectorizeHints *Hints)
|
AssumptionTracker *AT, const Function *F,
|
||||||
: TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI), TheFunction(F), Hints(Hints) {}
|
const LoopVectorizeHints *Hints)
|
||||||
|
: TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), DL(DL), TLI(TLI),
|
||||||
|
AT(AT), TheFunction(F), Hints(Hints) {
|
||||||
|
CodeMetrics::collectEphemeralValues(L, AT, EphValues);
|
||||||
|
}
|
||||||
|
|
||||||
/// Information about vectorization costs
|
/// Information about vectorization costs
|
||||||
struct VectorizationFactor {
|
struct VectorizationFactor {
|
||||||
@ -954,6 +960,9 @@ private:
|
|||||||
*TheFunction, DL, Message.str());
|
*TheFunction, DL, Message.str());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Values used only by @llvm.assume calls.
|
||||||
|
SmallPtrSet<const Value *, 32> EphValues;
|
||||||
|
|
||||||
/// The loop that we evaluate.
|
/// The loop that we evaluate.
|
||||||
Loop *TheLoop;
|
Loop *TheLoop;
|
||||||
/// Scev analysis.
|
/// Scev analysis.
|
||||||
@ -968,6 +977,8 @@ private:
|
|||||||
const DataLayout *DL;
|
const DataLayout *DL;
|
||||||
/// Target Library Info.
|
/// Target Library Info.
|
||||||
const TargetLibraryInfo *TLI;
|
const TargetLibraryInfo *TLI;
|
||||||
|
/// Tracker for @llvm.assume.
|
||||||
|
AssumptionTracker *AT;
|
||||||
const Function *TheFunction;
|
const Function *TheFunction;
|
||||||
// Loop Vectorize Hint.
|
// Loop Vectorize Hint.
|
||||||
const LoopVectorizeHints *Hints;
|
const LoopVectorizeHints *Hints;
|
||||||
@ -1251,6 +1262,7 @@ struct LoopVectorize : public FunctionPass {
|
|||||||
BlockFrequencyInfo *BFI;
|
BlockFrequencyInfo *BFI;
|
||||||
TargetLibraryInfo *TLI;
|
TargetLibraryInfo *TLI;
|
||||||
AliasAnalysis *AA;
|
AliasAnalysis *AA;
|
||||||
|
AssumptionTracker *AT;
|
||||||
bool DisableUnrolling;
|
bool DisableUnrolling;
|
||||||
bool AlwaysVectorize;
|
bool AlwaysVectorize;
|
||||||
|
|
||||||
@ -1266,6 +1278,7 @@ struct LoopVectorize : public FunctionPass {
|
|||||||
BFI = &getAnalysis<BlockFrequencyInfo>();
|
BFI = &getAnalysis<BlockFrequencyInfo>();
|
||||||
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
|
TLI = getAnalysisIfAvailable<TargetLibraryInfo>();
|
||||||
AA = &getAnalysis<AliasAnalysis>();
|
AA = &getAnalysis<AliasAnalysis>();
|
||||||
|
AT = &getAnalysis<AssumptionTracker>();
|
||||||
|
|
||||||
// Compute some weights outside of the loop over the loops. Compute this
|
// Compute some weights outside of the loop over the loops. Compute this
|
||||||
// using a BranchProbability to re-use its scaling math.
|
// using a BranchProbability to re-use its scaling math.
|
||||||
@ -1384,7 +1397,8 @@ struct LoopVectorize : public FunctionPass {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Use the cost model.
|
// Use the cost model.
|
||||||
LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, F, &Hints);
|
LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, DL, TLI, AT, F,
|
||||||
|
&Hints);
|
||||||
|
|
||||||
// Check the function attributes to find out if this function should be
|
// Check the function attributes to find out if this function should be
|
||||||
// optimized for size.
|
// optimized for size.
|
||||||
@ -1471,6 +1485,7 @@ struct LoopVectorize : public FunctionPass {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||||
|
AU.addRequired<AssumptionTracker>();
|
||||||
AU.addRequiredID(LoopSimplifyID);
|
AU.addRequiredID(LoopSimplifyID);
|
||||||
AU.addRequiredID(LCSSAID);
|
AU.addRequiredID(LCSSAID);
|
||||||
AU.addRequired<BlockFrequencyInfo>();
|
AU.addRequired<BlockFrequencyInfo>();
|
||||||
@ -3361,6 +3376,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
|
|||||||
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
|
Intrinsic::ID ID = getIntrinsicIDForCall(CI, TLI);
|
||||||
assert(ID && "Not an intrinsic call!");
|
assert(ID && "Not an intrinsic call!");
|
||||||
switch (ID) {
|
switch (ID) {
|
||||||
|
case Intrinsic::assume:
|
||||||
case Intrinsic::lifetime_end:
|
case Intrinsic::lifetime_end:
|
||||||
case Intrinsic::lifetime_start:
|
case Intrinsic::lifetime_start:
|
||||||
scalarizeInstruction(it);
|
scalarizeInstruction(it);
|
||||||
@ -5457,6 +5473,10 @@ unsigned LoopVectorizationCostModel::getWidestType() {
|
|||||||
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
|
for (BasicBlock::iterator it = BB->begin(), e = BB->end(); it != e; ++it) {
|
||||||
Type *T = it->getType();
|
Type *T = it->getType();
|
||||||
|
|
||||||
|
// Ignore ephemeral values.
|
||||||
|
if (EphValues.count(it))
|
||||||
|
continue;
|
||||||
|
|
||||||
// Only examine Loads, Stores and PHINodes.
|
// Only examine Loads, Stores and PHINodes.
|
||||||
if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
|
if (!isa<LoadInst>(it) && !isa<StoreInst>(it) && !isa<PHINode>(it))
|
||||||
continue;
|
continue;
|
||||||
@ -5719,6 +5739,10 @@ LoopVectorizationCostModel::calculateRegisterUsage() {
|
|||||||
// Ignore instructions that are never used within the loop.
|
// Ignore instructions that are never used within the loop.
|
||||||
if (!Ends.count(I)) continue;
|
if (!Ends.count(I)) continue;
|
||||||
|
|
||||||
|
// Ignore ephemeral values.
|
||||||
|
if (EphValues.count(I))
|
||||||
|
continue;
|
||||||
|
|
||||||
// Remove all of the instructions that end at this location.
|
// Remove all of the instructions that end at this location.
|
||||||
InstrList &List = TransposeEnds[i];
|
InstrList &List = TransposeEnds[i];
|
||||||
for (unsigned int j=0, e = List.size(); j < e; ++j)
|
for (unsigned int j=0, e = List.size(); j < e; ++j)
|
||||||
@ -5759,6 +5783,10 @@ unsigned LoopVectorizationCostModel::expectedCost(unsigned VF) {
|
|||||||
if (isa<DbgInfoIntrinsic>(it))
|
if (isa<DbgInfoIntrinsic>(it))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
// Ignore ephemeral values.
|
||||||
|
if (EphValues.count(it))
|
||||||
|
continue;
|
||||||
|
|
||||||
unsigned C = getInstructionCost(it, VF);
|
unsigned C = getInstructionCost(it, VF);
|
||||||
|
|
||||||
// Check if we should override the cost.
|
// Check if we should override the cost.
|
||||||
@ -6059,6 +6087,7 @@ static const char lv_name[] = "Loop Vectorization";
|
|||||||
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
|
INITIALIZE_PASS_BEGIN(LoopVectorize, LV_NAME, lv_name, false, false)
|
||||||
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
|
INITIALIZE_AG_DEPENDENCY(TargetTransformInfo)
|
||||||
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
|
INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
|
||||||
|
INITIALIZE_PASS_DEPENDENCY(AssumptionTracker)
|
||||||
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
|
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfo)
|
||||||
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
|
||||||
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
|
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
|
||||||
|
100
test/Transforms/LoopVectorize/X86/assume.ll
Normal file
100
test/Transforms/LoopVectorize/X86/assume.ll
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -S | FileCheck %s
|
||||||
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
||||||
|
; Function Attrs: nounwind uwtable
|
||||||
|
define void @test1(float* noalias nocapture %a, float* noalias nocapture readonly %b) #0 {
|
||||||
|
entry:
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test1
|
||||||
|
; CHECK: vector.body:
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: for.body:
|
||||||
|
; CHECK: ret void
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body, %entry
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
%arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
|
||||||
|
%0 = load float* %arrayidx, align 4
|
||||||
|
%cmp1 = fcmp ogt float %0, 1.000000e+02
|
||||||
|
tail call void @llvm.assume(i1 %cmp1)
|
||||||
|
%add = fadd float %0, 1.000000e+00
|
||||||
|
%arrayidx5 = getelementptr inbounds float* %a, i64 %indvars.iv
|
||||||
|
store float %add, float* %arrayidx5, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv, 1599
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end: ; preds = %for.body
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
; Function Attrs: nounwind
|
||||||
|
declare void @llvm.assume(i1) #1
|
||||||
|
|
||||||
|
attributes #0 = { nounwind uwtable }
|
||||||
|
attributes #1 = { nounwind }
|
||||||
|
|
||||||
|
%struct.data = type { float*, float* }
|
||||||
|
|
||||||
|
; Function Attrs: nounwind uwtable
|
||||||
|
define void @test2(%struct.data* nocapture readonly %d) #0 {
|
||||||
|
entry:
|
||||||
|
%b = getelementptr inbounds %struct.data* %d, i64 0, i32 1
|
||||||
|
%0 = load float** %b, align 8
|
||||||
|
%ptrint = ptrtoint float* %0 to i64
|
||||||
|
%maskedptr = and i64 %ptrint, 31
|
||||||
|
%maskcond = icmp eq i64 %maskedptr, 0
|
||||||
|
%a = getelementptr inbounds %struct.data* %d, i64 0, i32 0
|
||||||
|
%1 = load float** %a, align 8
|
||||||
|
%ptrint2 = ptrtoint float* %1 to i64
|
||||||
|
%maskedptr3 = and i64 %ptrint2, 31
|
||||||
|
%maskcond4 = icmp eq i64 %maskedptr3, 0
|
||||||
|
br label %for.body
|
||||||
|
|
||||||
|
; CHECK-LABEL: @test2
|
||||||
|
; CHECK: vector.body:
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: @llvm.assume
|
||||||
|
; CHECK: for.body:
|
||||||
|
; CHECK: ret void
|
||||||
|
|
||||||
|
for.body: ; preds = %for.body, %entry
|
||||||
|
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
|
||||||
|
tail call void @llvm.assume(i1 %maskcond)
|
||||||
|
%arrayidx = getelementptr inbounds float* %0, i64 %indvars.iv
|
||||||
|
%2 = load float* %arrayidx, align 4
|
||||||
|
%add = fadd float %2, 1.000000e+00
|
||||||
|
tail call void @llvm.assume(i1 %maskcond4)
|
||||||
|
%arrayidx5 = getelementptr inbounds float* %1, i64 %indvars.iv
|
||||||
|
store float %add, float* %arrayidx5, align 4
|
||||||
|
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||||
|
%exitcond = icmp eq i64 %indvars.iv, 1599
|
||||||
|
br i1 %exitcond, label %for.end, label %for.body
|
||||||
|
|
||||||
|
for.end: ; preds = %for.body
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user