mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-30 17:33:24 +00:00
SLPVectorizer: Change the order in which new instructions are added to the function.
We are not working on a DAG and I ran into a number of problems when I enabled the vectorizations of 'diamond-trees' (trees that share leafs). * Imroved the numbering API. * Changed the placement of new instructions to the last root. * Fixed a bug with external tree users with non-zero lane. * Fixed a bug in the placement of in-tree users. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182508 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
23d1d5eb56
commit
3f75c6cfb5
@ -20,6 +20,7 @@
|
||||
|
||||
#include "VecUtils.h"
|
||||
#include "llvm/Transforms/Vectorize.h"
|
||||
#include "llvm/ADT/MapVector.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Analysis/ScalarEvolution.h"
|
||||
#include "llvm/Analysis/TargetTransformInfo.h"
|
||||
@ -47,7 +48,7 @@ namespace {
|
||||
|
||||
/// The SLPVectorizer Pass.
|
||||
struct SLPVectorizer : public FunctionPass {
|
||||
typedef std::map<Value*, BoUpSLP::StoreList> StoreListMap;
|
||||
typedef MapVector<Value*, BoUpSLP::StoreList> StoreListMap;
|
||||
|
||||
/// Pass identification, replacement for typeid
|
||||
static char ID;
|
||||
|
@ -46,7 +46,7 @@ namespace llvm {
|
||||
|
||||
BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
|
||||
TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
|
||||
BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
|
||||
Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
|
||||
numberInstructions();
|
||||
}
|
||||
|
||||
@ -121,6 +121,7 @@ bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
|
||||
DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
|
||||
if (Cost < CostThreshold) {
|
||||
DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
|
||||
Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands,VF)));
|
||||
vectorizeTree(Operands, VF);
|
||||
i += VF - 1;
|
||||
Changed = true;
|
||||
@ -131,7 +132,7 @@ bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
|
||||
}
|
||||
|
||||
bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
|
||||
ValueSet Heads, Tails;
|
||||
SetVector<Value*> Heads, Tails;
|
||||
SmallDenseMap<Value*, Value*> ConsecutiveChain;
|
||||
|
||||
// We may run into multiple chains that merge into a single chain. We mark the
|
||||
@ -152,7 +153,8 @@ bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
|
||||
}
|
||||
|
||||
// For stores that start but don't end a link in the chain:
|
||||
for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
|
||||
for (SetVector<Value*>::iterator it = Heads.begin(), e = Heads.end();
|
||||
it != e; ++it) {
|
||||
if (Tails.count(*it)) continue;
|
||||
|
||||
// We found a store instr that starts a chain. Now follow the chain and try
|
||||
@ -224,9 +226,14 @@ Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
|
||||
}
|
||||
|
||||
void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
|
||||
int LastIdx = getLastIndex(Operands, Operands.size());
|
||||
Instruction *Loc = getInsertionPoint(LastIdx);
|
||||
Builder.SetInsertPoint(Loc);
|
||||
|
||||
assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx &&
|
||||
"Vectorizing with in-tree users");
|
||||
|
||||
Value *Vec = vectorizeTree(Operands, Operands.size());
|
||||
BasicBlock::iterator Loc = cast<Instruction>(Vec);
|
||||
IRBuilder<> Builder(++Loc);
|
||||
// After vectorizing the operands we need to generate extractelement
|
||||
// instructions and replace all of the uses of the scalar values with
|
||||
// the values that we extracted from the vectorized tree.
|
||||
@ -246,7 +253,13 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
|
||||
MustExtract.clear();
|
||||
|
||||
// Find the location of the last root.
|
||||
unsigned LastRootIndex = InstrIdx[GetLastInstr(VL, VL.size())];
|
||||
int LastRootIndex = getLastIndex(VL, VL.size());
|
||||
int FirstUserIndex = getFirstUserIndex(VL, VL.size());
|
||||
|
||||
// Don't vectorize if there are users of the tree roots inside the tree
|
||||
// itself.
|
||||
if (LastRootIndex > FirstUserIndex)
|
||||
return max_cost;
|
||||
|
||||
// Scan the tree and find which value is used by which lane, and which values
|
||||
// must be scalarized.
|
||||
@ -254,7 +267,7 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
|
||||
|
||||
// Check that instructions with multiple users can be vectorized. Mark unsafe
|
||||
// instructions.
|
||||
for (ValueSet::iterator it = MultiUserVals.begin(),
|
||||
for (SetVector<Value*>::iterator it = MultiUserVals.begin(),
|
||||
e = MultiUserVals.end(); it != e; ++it) {
|
||||
// Check that all of the users of this instr are within the tree
|
||||
// and that they are all from the same lane.
|
||||
@ -267,18 +280,21 @@ int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
|
||||
// We don't have an ordering problem if the user is not in this basic
|
||||
// block.
|
||||
Instruction *Inst = cast<Instruction>(*I);
|
||||
if (Inst->getParent() == BB) {
|
||||
// We don't have an ordering problem if the user is after the last
|
||||
// root.
|
||||
unsigned Idx = InstrIdx[Inst];
|
||||
if (Idx < LastRootIndex) {
|
||||
MustScalarize.insert(*it);
|
||||
DEBUG(dbgs()<<"SLP: Adding to MustScalarize "
|
||||
"because of an unsafe out of tree usage.\n");
|
||||
break;
|
||||
}
|
||||
if (Inst->getParent() != BB) {
|
||||
MustExtract.insert(*it);
|
||||
continue;
|
||||
}
|
||||
|
||||
// We don't have an ordering problem if the user is after the last root.
|
||||
int Idx = InstrIdx[Inst];
|
||||
if (Idx < LastRootIndex) {
|
||||
MustScalarize.insert(*it);
|
||||
DEBUG(dbgs()<<"SLP: Adding to MustScalarize "
|
||||
"because of an unsafe out of tree usage.\n");
|
||||
break;
|
||||
}
|
||||
|
||||
|
||||
DEBUG(dbgs()<<"SLP: Adding to MustExtract "
|
||||
"because of a safe out of tree usage.\n");
|
||||
MustExtract.insert(*it);
|
||||
@ -332,14 +348,6 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
||||
if (!I || Opcode != I->getOpcode()) return;
|
||||
}
|
||||
|
||||
// Mark instructions with multiple users.
|
||||
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
|
||||
Instruction *I = dyn_cast<Instruction>(VL[i]);
|
||||
// Remember to check if all of the users of this instr are vectorized
|
||||
// within our tree.
|
||||
if (I && I->getNumUses() > 1) MultiUserVals.insert(I);
|
||||
}
|
||||
|
||||
for (int i = 0, e = VL.size(); i < e; ++i) {
|
||||
// Check that the instruction is only used within
|
||||
// one lane.
|
||||
@ -348,6 +356,19 @@ void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
||||
LaneMap[VL[i]] = i;
|
||||
}
|
||||
|
||||
// Mark instructions with multiple users.
|
||||
for (unsigned i = 0, e = VL.size(); i < e; ++i) {
|
||||
Instruction *I = dyn_cast<Instruction>(VL[i]);
|
||||
// Remember to check if all of the users of this instr are vectorized
|
||||
// within our tree. At depth zero we have no local users, only external
|
||||
// users that we don't care about.
|
||||
if (Depth && I && I->getNumUses() > 1) {
|
||||
DEBUG(dbgs()<<"SLP: Adding to MultiUserVals "
|
||||
"because it has multiple users:" << *I << " \n");
|
||||
MultiUserVals.insert(I);
|
||||
}
|
||||
}
|
||||
|
||||
switch (Opcode) {
|
||||
case Instruction::ZExt:
|
||||
case Instruction::SExt:
|
||||
@ -461,11 +482,9 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
||||
|
||||
// Check if it is safe to sink the loads or the stores.
|
||||
if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
|
||||
int MaxIdx = InstrIdx[VL0];
|
||||
for (unsigned i = 1, e = VL.size(); i < e; ++i )
|
||||
MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
|
||||
|
||||
int MaxIdx = getLastIndex(VL, VL.size());
|
||||
Instruction *Last = InstrVec[MaxIdx];
|
||||
|
||||
for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
|
||||
if (VL[i] == Last) continue;
|
||||
Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
|
||||
@ -592,15 +611,40 @@ int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
|
||||
}
|
||||
}
|
||||
|
||||
Instruction *BoUpSLP::GetLastInstr(ArrayRef<Value *> VL, unsigned VF) {
|
||||
int BoUpSLP::getLastIndex(ArrayRef<Value *> VL, unsigned VF) {
|
||||
int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
|
||||
for (unsigned i = 0; i < VF; ++i )
|
||||
MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
|
||||
return InstrVec[MaxIdx + 1];
|
||||
return MaxIdx;
|
||||
}
|
||||
|
||||
int BoUpSLP::getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF) {
|
||||
// Find the first user of the values.
|
||||
int FirstUser = InstrVec.size();
|
||||
for (unsigned i = 0; i < VF; ++i) {
|
||||
for (Value::use_iterator U = VL[i]->use_begin(), UE = VL[i]->use_end();
|
||||
U != UE; ++U) {
|
||||
Instruction *Instr = dyn_cast<Instruction>(*U);
|
||||
if (!Instr || Instr->getParent() != BB)
|
||||
continue;
|
||||
|
||||
FirstUser = std::min(FirstUser, InstrIdx[Instr]);
|
||||
}
|
||||
}
|
||||
return FirstUser;
|
||||
}
|
||||
|
||||
int BoUpSLP::getLastIndex(Instruction *I, Instruction *J) {
|
||||
assert(I->getParent() == BB && "Invalid parent for instruction I");
|
||||
assert(J->getParent() == BB && "Invalid parent for instruction J");
|
||||
return std::max(InstrIdx[I],InstrIdx[J]);
|
||||
}
|
||||
|
||||
Instruction *BoUpSLP::getInsertionPoint(unsigned Index) {
|
||||
return InstrVec[Index + 1];
|
||||
}
|
||||
|
||||
Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
|
||||
IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
|
||||
Value *Vec = UndefValue::get(Ty);
|
||||
for (unsigned i=0; i < Ty->getNumElements(); ++i) {
|
||||
// Generate the 'InsertElement' instruction.
|
||||
@ -620,18 +664,26 @@ Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
|
||||
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
|
||||
Value *V = vectorizeTree_rec(VL, VF);
|
||||
|
||||
Instruction *LastInstr = GetLastInstr(VL, VL.size());
|
||||
int LastInstrIdx = InstrIdx[LastInstr];
|
||||
IRBuilder<> Builder(LastInstr);
|
||||
for (ValueSet::iterator it = MustExtract.begin(), e = MustExtract.end();
|
||||
it != e; ++it) {
|
||||
int LastInstrIdx = getLastIndex(VL, VL.size());
|
||||
for (SetVector<Value*>::iterator it = MustExtract.begin(),
|
||||
e = MustExtract.end(); it != e; ++it) {
|
||||
Instruction *I = cast<Instruction>(*it);
|
||||
|
||||
// This is a scalarized value, so we can use the original value.
|
||||
// No need to extract from the vector.
|
||||
if (!LaneMap.count(I))
|
||||
continue;
|
||||
|
||||
Value *Vec = VectorizedValues[I];
|
||||
assert(LaneMap.count(I) && "Unable to find the lane for the external use");
|
||||
// We decided not to vectorize I because one of its users was not
|
||||
// vectorizerd. This is okay.
|
||||
if (!Vec)
|
||||
continue;
|
||||
|
||||
Value *Idx = Builder.getInt32(LaneMap[I]);
|
||||
Value *Extract = Builder.CreateExtractElement(Vec, Idx);
|
||||
bool Replaced = false;
|
||||
for (Value::use_iterator U = I->use_begin(), UE = U->use_end(); U != UE;
|
||||
for (Value::use_iterator U = I->use_begin(), UE = I->use_end(); U != UE;
|
||||
++U) {
|
||||
Instruction *UI = cast<Instruction>(*U);
|
||||
if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx)
|
||||
@ -670,19 +722,27 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
|
||||
}
|
||||
|
||||
// Check that this is a simple vector constant.
|
||||
if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
|
||||
if (AllConst || AllSameScalar)
|
||||
return Scalarize(VL, VecTy);
|
||||
|
||||
// Scalarize unknown structures.
|
||||
Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
|
||||
if (!VL0) return Scalarize(VL, VecTy);
|
||||
if (!VL0)
|
||||
return Scalarize(VL, VecTy);
|
||||
|
||||
if (VectorizedValues.count(VL0)) return VectorizedValues[VL0];
|
||||
if (VectorizedValues.count(VL0)) {
|
||||
Value * Vec = VectorizedValues[VL0];
|
||||
for (int i = 0; i < VF; ++i)
|
||||
VectorizedValues[VL[i]] = Vec;
|
||||
return Vec;
|
||||
}
|
||||
|
||||
unsigned Opcode = VL0->getOpcode();
|
||||
for (unsigned i = 0, e = VF; i < e; ++i) {
|
||||
Instruction *I = dyn_cast<Instruction>(VL[i]);
|
||||
// If not all of the instructions are identical then we have to scalarize.
|
||||
if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
|
||||
if (!I || Opcode != I->getOpcode())
|
||||
return Scalarize(VL, VecTy);
|
||||
}
|
||||
|
||||
switch (Opcode) {
|
||||
@ -702,7 +762,6 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
|
||||
for (int i = 0; i < VF; ++i)
|
||||
INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
|
||||
Value *InVec = vectorizeTree_rec(INVL, VF);
|
||||
IRBuilder<> Builder(GetLastInstr(VL, VF));
|
||||
CastInst *CI = dyn_cast<CastInst>(VL0);
|
||||
Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
|
||||
|
||||
@ -737,7 +796,6 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
|
||||
|
||||
Value *LHS = vectorizeTree_rec(LHSVL, VF);
|
||||
Value *RHS = vectorizeTree_rec(RHSVL, VF);
|
||||
IRBuilder<> Builder(GetLastInstr(VL, VF));
|
||||
BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
|
||||
Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS);
|
||||
|
||||
@ -755,10 +813,13 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
|
||||
if (!isConsecutiveAccess(VL[i-1], VL[i]))
|
||||
return Scalarize(VL, VecTy);
|
||||
|
||||
IRBuilder<> Builder(GetLastInstr(VL, VF));
|
||||
Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
|
||||
VecTy->getPointerTo());
|
||||
LI = Builder.CreateLoad(VecPtr);
|
||||
// Loads are inserted at the head of the tree because we don't want to sink
|
||||
// them all the way down past store instructions.
|
||||
Instruction *Loc = getInsertionPoint(getLastIndex(VL, VL.size()));
|
||||
IRBuilder<> LoadBuilder(Loc);
|
||||
Value *VecPtr = LoadBuilder.CreateBitCast(LI->getPointerOperand(),
|
||||
VecTy->getPointerTo());
|
||||
LI = LoadBuilder.CreateLoad(VecPtr);
|
||||
LI->setAlignment(Alignment);
|
||||
|
||||
for (int i = 0; i < VF; ++i)
|
||||
@ -775,8 +836,6 @@ Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
|
||||
ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
|
||||
|
||||
Value *VecValue = vectorizeTree_rec(ValueOp, VF);
|
||||
|
||||
IRBuilder<> Builder(GetLastInstr(VL, VF));
|
||||
Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
|
||||
VecTy->getPointerTo());
|
||||
Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
|
||||
|
@ -16,9 +16,11 @@
|
||||
#define LLVM_TRANSFORMS_VECTORIZE_VECUTILS_H
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SetVector.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include <vector>
|
||||
|
||||
namespace llvm {
|
||||
@ -107,9 +109,19 @@ private:
|
||||
/// \returns the pointer to the barrier instruction if we can't sink.
|
||||
Value *isUnsafeToSink(Instruction *Src, Instruction *Dst);
|
||||
|
||||
/// \returns the instruction that appears last in the BB from \p VL.
|
||||
/// \returns the index of the last instrucion in the BB from \p VL.
|
||||
/// Only consider the first \p VF elements.
|
||||
Instruction *GetLastInstr(ArrayRef<Value *> VL, unsigned VF);
|
||||
int getLastIndex(ArrayRef<Value *> VL, unsigned VF);
|
||||
|
||||
/// \returns the index of the first User of \p VL.
|
||||
/// Only consider the first \p VF elements.
|
||||
int getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF);
|
||||
|
||||
/// \returns the instruction \p I or \p Jt hat appears last in the BB .
|
||||
int getLastIndex(Instruction *I, Instruction *J);
|
||||
|
||||
/// \returns the insertion point for \p Index.
|
||||
Instruction *getInsertionPoint(unsigned Index);
|
||||
|
||||
/// \returns a vector from a collection of scalars in \p VL.
|
||||
Value *Scalarize(ArrayRef<Value *> VL, VectorType *Ty);
|
||||
@ -130,17 +142,17 @@ private:
|
||||
/// Contains values that have users outside of the vectorized graph.
|
||||
/// We need to generate extract instructions for these values.
|
||||
/// NOTICE: The vectorization methods also use this set.
|
||||
ValueSet MustExtract;
|
||||
SetVector<Value*> MustExtract;
|
||||
|
||||
/// Contains a list of values that are used outside the current tree. This
|
||||
/// set must be reset between runs.
|
||||
ValueSet MultiUserVals;
|
||||
SetVector<Value*> MultiUserVals;
|
||||
/// Maps values in the tree to the vector lanes that uses them. This map must
|
||||
/// be reset between runs of getCost.
|
||||
std::map<Value*, int> LaneMap;
|
||||
/// A list of instructions to ignore while sinking
|
||||
/// memory instructions. This map must be reset between runs of getCost.
|
||||
SmallPtrSet<Value *, 8> MemBarrierIgnoreList;
|
||||
ValueSet MemBarrierIgnoreList;
|
||||
|
||||
// -- Containers that are used during vectorizeTree -- //
|
||||
|
||||
@ -155,6 +167,9 @@ private:
|
||||
/// Iterating over this list is faster than calling LICM.
|
||||
ValueList GatherInstructions;
|
||||
|
||||
/// Instruction builder to construct the vectorized tree.
|
||||
IRBuilder<> Builder;
|
||||
|
||||
// Analysis and block reference.
|
||||
BasicBlock *BB;
|
||||
ScalarEvolution *SE;
|
||||
|
34
test/Transforms/SLPVectorizer/X86/crash_povray.ll
Normal file
34
test/Transforms/SLPVectorizer/X86/crash_povray.ll
Normal file
@ -0,0 +1,34 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
%struct.hoge = type { double, double, double}
|
||||
|
||||
define void @zot(%struct.hoge* %arg) {
|
||||
bb:
|
||||
%tmp = load double* undef, align 8
|
||||
%tmp1 = fsub double %tmp, undef
|
||||
%tmp2 = load double* undef, align 8
|
||||
%tmp3 = fsub double %tmp2, undef
|
||||
%tmp4 = fmul double %tmp3, undef
|
||||
%tmp5 = fmul double %tmp3, undef
|
||||
%tmp6 = fsub double %tmp5, undef
|
||||
%tmp7 = getelementptr inbounds %struct.hoge* %arg, i64 0, i32 1
|
||||
store double %tmp6, double* %tmp7, align 8
|
||||
%tmp8 = fmul double %tmp1, undef
|
||||
%tmp9 = fsub double %tmp8, undef
|
||||
%tmp10 = getelementptr inbounds %struct.hoge* %arg, i64 0, i32 2
|
||||
store double %tmp9, double* %tmp10, align 8
|
||||
br i1 undef, label %bb11, label %bb12
|
||||
|
||||
bb11: ; preds = %bb
|
||||
br label %bb14
|
||||
|
||||
bb12: ; preds = %bb
|
||||
%tmp13 = fmul double undef, %tmp2
|
||||
br label %bb14
|
||||
|
||||
bb14: ; preds = %bb12, %bb11
|
||||
ret void
|
||||
}
|
50
test/Transforms/SLPVectorizer/X86/in-tree-user.ll
Normal file
50
test/Transforms/SLPVectorizer/X86/in-tree-user.ll
Normal file
@ -0,0 +1,50 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.7.0"
|
||||
|
||||
@.str = private unnamed_addr constant [6 x i8] c"bingo\00", align 1
|
||||
|
||||
; We can't vectorize when the roots are used inside the tree.
|
||||
;CHECK: @in_tree_user
|
||||
;CHECK-NOT: load <2 x double>
|
||||
;CHECK: ret
|
||||
define void @in_tree_user(double* nocapture %A, i32 %n) {
|
||||
entry:
|
||||
%conv = sitofp i32 %n to double
|
||||
br label %for.body
|
||||
|
||||
for.body: ; preds = %for.inc, %entry
|
||||
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
|
||||
%0 = shl nsw i64 %indvars.iv, 1
|
||||
%arrayidx = getelementptr inbounds double* %A, i64 %0
|
||||
%1 = load double* %arrayidx, align 8
|
||||
%mul1 = fmul double %conv, %1
|
||||
%mul2 = fmul double %mul1, 7.000000e+00
|
||||
%add = fadd double %mul2, 5.000000e+00
|
||||
%BadValue = fadd double %add, %add ; <------------------ In tree user.
|
||||
%2 = or i64 %0, 1
|
||||
%arrayidx6 = getelementptr inbounds double* %A, i64 %2
|
||||
%3 = load double* %arrayidx6, align 8
|
||||
%mul8 = fmul double %conv, %3
|
||||
%mul9 = fmul double %mul8, 4.000000e+00
|
||||
%add10 = fadd double %mul9, 9.000000e+00
|
||||
%cmp11 = fcmp ogt double %add, %add10
|
||||
br i1 %cmp11, label %if.then, label %for.inc
|
||||
|
||||
if.then: ; preds = %for.body
|
||||
%call = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @.str, i64 0, i64 0))
|
||||
br label %for.inc
|
||||
|
||||
for.inc: ; preds = %for.body, %if.then
|
||||
%indvars.iv.next = add i64 %indvars.iv, 1
|
||||
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
|
||||
%exitcond = icmp eq i32 %lftr.wideiv, 100
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.inc
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @printf(i8* nocapture, ...)
|
||||
|
@ -12,8 +12,8 @@ target triple = "x86_64-apple-macosx10.7.0"
|
||||
;}
|
||||
|
||||
;CHECK: @foo
|
||||
;CHECK: insertelement <4 x i32>
|
||||
;CHECK: load <4 x i32>
|
||||
;CHECK: insertelement <4 x i32>
|
||||
;CHECK: add <4 x i32>
|
||||
;CHECK: store <4 x i32>
|
||||
;CHECK: ret
|
||||
|
19
test/Transforms/SLPVectorizer/X86/ordering.ll
Normal file
19
test/Transforms/SLPVectorizer/X86/ordering.ll
Normal file
@ -0,0 +1,19 @@
|
||||
; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7
|
||||
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.8.0"
|
||||
|
||||
define void @updateModelQPFrame(i32 %m_Bits) {
|
||||
entry:
|
||||
%0 = load double* undef, align 8
|
||||
%mul = fmul double undef, %0
|
||||
%mul2 = fmul double undef, %mul
|
||||
%mul4 = fmul double %0, %mul2
|
||||
%mul5 = fmul double undef, 4.000000e+00
|
||||
%mul7 = fmul double undef, %mul5
|
||||
%conv = sitofp i32 %m_Bits to double
|
||||
%mul8 = fmul double %conv, %mul7
|
||||
%add = fadd double %mul4, %mul8
|
||||
%cmp11 = fcmp olt double %add, 0.000000e+00
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user