mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-27 13:30:05 +00:00
Add a Scalarizer pass.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195471 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a6c0249619
commit
0f778794c8
@ -41,6 +41,9 @@ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM);
|
||||
/** See llvm::createDeadStoreEliminationPass function. */
|
||||
void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM);
|
||||
|
||||
/** See llvm::createScalarizerPass function. */
|
||||
void LLVMAddScalarizerPass(LLVMPassManagerRef PM);
|
||||
|
||||
/** See llvm::createGVNPass function. */
|
||||
void LLVMAddGVNPass(LLVMPassManagerRef PM);
|
||||
|
||||
|
@ -120,6 +120,7 @@ void initializeAddressSanitizerModulePass(PassRegistry&);
|
||||
void initializeMemorySanitizerPass(PassRegistry&);
|
||||
void initializeThreadSanitizerPass(PassRegistry&);
|
||||
void initializeDataFlowSanitizerPass(PassRegistry&);
|
||||
void initializeScalarizerPass(PassRegistry&);
|
||||
void initializeEarlyCSEPass(PassRegistry&);
|
||||
void initializeExpandISelPseudosPass(PassRegistry&);
|
||||
void initializeFindUsedTypesPass(PassRegistry&);
|
||||
|
@ -154,6 +154,7 @@ namespace {
|
||||
(void) llvm::createSLPVectorizerPass();
|
||||
(void) llvm::createBBVectorizePass();
|
||||
(void) llvm::createPartiallyInlineLibCallsPass();
|
||||
(void) llvm::createScalarizerPass();
|
||||
|
||||
(void)new llvm::IntervalPartition();
|
||||
(void)new llvm::FindUsedTypes();
|
||||
|
@ -370,6 +370,12 @@ FunctionPass *createPartiallyInlineLibCallsPass();
|
||||
FunctionPass *createSampleProfileLoaderPass();
|
||||
FunctionPass *createSampleProfileLoaderPass(StringRef Name);
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// ScalarizerPass - Converts vector operations into scalar operations
|
||||
//
|
||||
FunctionPass *createScalarizerPass();
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
||||
|
@ -5,6 +5,7 @@ add_llvm_library(LLVMScalarOpts
|
||||
CorrelatedValuePropagation.cpp
|
||||
DCE.cpp
|
||||
DeadStoreElimination.cpp
|
||||
Scalarizer.cpp
|
||||
EarlyCSE.cpp
|
||||
GlobalMerge.cpp
|
||||
GVN.cpp
|
||||
|
@ -34,6 +34,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
|
||||
initializeCorrelatedValuePropagationPass(Registry);
|
||||
initializeDCEPass(Registry);
|
||||
initializeDeadInstEliminationPass(Registry);
|
||||
initializeScalarizerPass(Registry);
|
||||
initializeDSEPass(Registry);
|
||||
initializeGVNPass(Registry);
|
||||
initializeEarlyCSEPass(Registry);
|
||||
@ -81,6 +82,10 @@ void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
|
||||
unwrap(PM)->add(createDeadStoreEliminationPass());
|
||||
}
|
||||
|
||||
void LLVMAddScalarizerPass(LLVMPassManagerRef PM) {
|
||||
unwrap(PM)->add(createScalarizerPass());
|
||||
}
|
||||
|
||||
void LLVMAddGVNPass(LLVMPassManagerRef PM) {
|
||||
unwrap(PM)->add(createGVNPass());
|
||||
}
|
||||
|
637
lib/Transforms/Scalar/Scalarizer.cpp
Normal file
637
lib/Transforms/Scalar/Scalarizer.cpp
Normal file
@ -0,0 +1,637 @@
|
||||
//===--- Scalarizer.cpp - Scalarize vector operations ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This pass converts vector operations into scalar operations, in order
|
||||
// to expose optimization opportunities on the individual scalar operations.
|
||||
// It is mainly intended for targets that do not have vector units, but it
|
||||
// may also be useful for revectorizing code to different vector widths.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "scalarizer"
|
||||
#include "llvm/ADT/STLExtras.h"
|
||||
#include "llvm/IR/IRBuilder.h"
|
||||
#include "llvm/InstVisitor.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
// Used to store the scattered form of a vector.
|
||||
typedef SmallVector<Value *, 8> ValueVector;
|
||||
|
||||
// Used to map a vector Value to its scattered form. We use std::map
|
||||
// because we want iterators to persist across insertion and because the
|
||||
// values are relatively large.
|
||||
typedef std::map<Value *, ValueVector> ScatterMap;
|
||||
|
||||
// Lists Instructions that have been replaced with scalar implementations,
|
||||
// along with a pointer to their scattered forms.
|
||||
typedef SmallVector<std::pair<Instruction *, ValueVector *>, 16> GatherList;
|
||||
|
||||
// Provides a very limited vector-like interface for lazily accessing one
|
||||
// component of a scattered vector or vector pointer.
|
||||
class Scatterer {
|
||||
public:
|
||||
// Scatter V into Size components. If new instructions are needed,
|
||||
// insert them before BBI in BB. If Cache is nonnull, use it to cache
|
||||
// the results.
|
||||
Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
|
||||
ValueVector *cachePtr = 0);
|
||||
|
||||
// Return component I, creating a new Value for it if necessary.
|
||||
Value *operator[](unsigned I);
|
||||
|
||||
// Return the number of components.
|
||||
unsigned size() const { return Size; }
|
||||
|
||||
private:
|
||||
BasicBlock *BB;
|
||||
BasicBlock::iterator BBI;
|
||||
Value *V;
|
||||
ValueVector *CachePtr;
|
||||
PointerType *PtrTy;
|
||||
ValueVector Tmp;
|
||||
unsigned Size;
|
||||
};
|
||||
|
||||
// FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
|
||||
// called Name that compares X and Y in the same way as FCI.
|
||||
struct FCmpSplitter {
|
||||
FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
|
||||
Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
|
||||
const Twine &Name) const {
|
||||
return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
|
||||
}
|
||||
FCmpInst &FCI;
|
||||
};
|
||||
|
||||
// ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
|
||||
// called Name that compares X and Y in the same way as ICI.
|
||||
struct ICmpSplitter {
|
||||
ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
|
||||
Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
|
||||
const Twine &Name) const {
|
||||
return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
|
||||
}
|
||||
ICmpInst &ICI;
|
||||
};
|
||||
|
||||
// BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
|
||||
// a binary operator like BO called Name with operands X and Y.
|
||||
struct BinarySplitter {
|
||||
BinarySplitter(BinaryOperator &bo) : BO(bo) {}
|
||||
Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
|
||||
const Twine &Name) const {
|
||||
return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
|
||||
}
|
||||
BinaryOperator &BO;
|
||||
};
|
||||
|
||||
// GEPSpliiter()(Builder, X, Y, Name) uses Builder to create
|
||||
// a single GEP called Name with operands X and Y.
|
||||
struct GEPSplitter {
|
||||
GEPSplitter() {}
|
||||
Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
|
||||
const Twine &Name) const {
|
||||
return Builder.CreateGEP(Op0, Op1, Name);
|
||||
}
|
||||
};
|
||||
|
||||
// Information about a load or store that we're scalarizing.
|
||||
struct VectorLayout {
|
||||
VectorLayout() : VecTy(0), ElemTy(0), VecAlign(0), ElemSize(0) {}
|
||||
|
||||
// Return the alignment of element I.
|
||||
uint64_t getElemAlign(unsigned I) {
|
||||
return MinAlign(VecAlign, I * ElemSize);
|
||||
}
|
||||
|
||||
// The type of the vector.
|
||||
VectorType *VecTy;
|
||||
|
||||
// The type of each element.
|
||||
Type *ElemTy;
|
||||
|
||||
// The alignment of the vector.
|
||||
uint64_t VecAlign;
|
||||
|
||||
// The size of each element.
|
||||
uint64_t ElemSize;
|
||||
};
|
||||
|
||||
class Scalarizer : public FunctionPass,
|
||||
public InstVisitor<Scalarizer, bool> {
|
||||
public:
|
||||
static char ID;
|
||||
|
||||
Scalarizer() :
|
||||
FunctionPass(ID) {
|
||||
initializeScalarizerPass(*PassRegistry::getPassRegistry());
|
||||
}
|
||||
|
||||
virtual bool doInitialization(Module &M);
|
||||
virtual bool runOnFunction(Function &F);
|
||||
|
||||
// InstVisitor methods. They return true if the instruction was scalarized,
|
||||
// false if nothing changed.
|
||||
bool visitInstruction(Instruction &) { return false; }
|
||||
bool visitSelectInst(SelectInst &SI);
|
||||
bool visitICmpInst(ICmpInst &);
|
||||
bool visitFCmpInst(FCmpInst &);
|
||||
bool visitBinaryOperator(BinaryOperator &);
|
||||
bool visitGetElementPtrInst(GetElementPtrInst &);
|
||||
bool visitCastInst(CastInst &);
|
||||
bool visitBitCastInst(BitCastInst &);
|
||||
bool visitShuffleVectorInst(ShuffleVectorInst &);
|
||||
bool visitPHINode(PHINode &);
|
||||
bool visitLoadInst(LoadInst &);
|
||||
bool visitStoreInst(StoreInst &);
|
||||
|
||||
private:
|
||||
Scatterer scatter(Instruction *, Value *);
|
||||
void gather(Instruction *, const ValueVector &);
|
||||
bool canTransferMetadata(unsigned Kind);
|
||||
void transferMetadata(Instruction *, const ValueVector &);
|
||||
bool getVectorLayout(Type *, unsigned, VectorLayout &);
|
||||
bool finish();
|
||||
|
||||
template<typename T> bool splitBinary(Instruction &, const T &);
|
||||
|
||||
ScatterMap Scattered;
|
||||
GatherList Gathered;
|
||||
unsigned ParallelLoopAccessMDKind;
|
||||
const DataLayout *TDL;
|
||||
};
|
||||
|
||||
char Scalarizer::ID = 0;
|
||||
} // end anonymous namespace
|
||||
|
||||
// This is disabled by default because having separate loads and stores makes
|
||||
// it more likely that the -combiner-alias-analysis limits will be reached.
|
||||
static cl::opt<bool> ScalarizeLoadStore
|
||||
("scalarize-load-store", cl::Hidden, cl::init(false),
|
||||
cl::desc("Allow the scalarizer pass to scalarize loads and store"));
|
||||
|
||||
INITIALIZE_PASS(Scalarizer, "scalarizer", "Scalarize vector operations",
|
||||
false, false)
|
||||
|
||||
Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
|
||||
ValueVector *cachePtr)
|
||||
: BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
|
||||
Type *Ty = V->getType();
|
||||
PtrTy = dyn_cast<PointerType>(Ty);
|
||||
if (PtrTy)
|
||||
Ty = PtrTy->getElementType();
|
||||
Size = Ty->getVectorNumElements();
|
||||
if (!CachePtr)
|
||||
Tmp.resize(Size, 0);
|
||||
else if (CachePtr->empty())
|
||||
CachePtr->resize(Size, 0);
|
||||
else
|
||||
assert(Size == CachePtr->size() && "Inconsistent vector sizes");
|
||||
}
|
||||
|
||||
// Return component I, creating a new Value for it if necessary.
|
||||
Value *Scatterer::operator[](unsigned I) {
|
||||
ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
|
||||
// Try to reuse a previous value.
|
||||
if (CV[I])
|
||||
return CV[I];
|
||||
IRBuilder<> Builder(BB, BBI);
|
||||
if (PtrTy) {
|
||||
if (!CV[0]) {
|
||||
Type *Ty =
|
||||
PointerType::get(PtrTy->getElementType()->getVectorElementType(),
|
||||
PtrTy->getAddressSpace());
|
||||
CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
|
||||
}
|
||||
if (I != 0)
|
||||
CV[I] = Builder.CreateConstGEP1_32(CV[0], I,
|
||||
V->getName() + ".i" + Twine(I));
|
||||
} else {
|
||||
// Search through a chain of InsertElementInsts looking for element I.
|
||||
// Record other elements in the cache. The new V is still suitable
|
||||
// for all uncached indices.
|
||||
for (;;) {
|
||||
InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
|
||||
if (!Insert)
|
||||
break;
|
||||
ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
|
||||
if (!Idx)
|
||||
break;
|
||||
unsigned J = Idx->getZExtValue();
|
||||
CV[J] = Insert->getOperand(1);
|
||||
V = Insert->getOperand(0);
|
||||
if (I == J)
|
||||
return CV[J];
|
||||
}
|
||||
CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
|
||||
V->getName() + ".i" + Twine(I));
|
||||
}
|
||||
return CV[I];
|
||||
}
|
||||
|
||||
bool Scalarizer::doInitialization(Module &M) {
|
||||
ParallelLoopAccessMDKind =
|
||||
M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Scalarizer::runOnFunction(Function &F) {
|
||||
TDL = getAnalysisIfAvailable<DataLayout>();
|
||||
for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
|
||||
BasicBlock *BB = BBI;
|
||||
for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
|
||||
Instruction *I = II;
|
||||
bool Done = visit(I);
|
||||
++II;
|
||||
if (Done && I->getType()->isVoidTy())
|
||||
I->eraseFromParent();
|
||||
}
|
||||
}
|
||||
return finish();
|
||||
}
|
||||
|
||||
// Return a scattered form of V that can be accessed by Point. V must be a
|
||||
// vector or a pointer to a vector.
|
||||
Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
|
||||
if (Argument *VArg = dyn_cast<Argument>(V)) {
|
||||
// Put the scattered form of arguments in the entry block,
|
||||
// so that it can be used everywhere.
|
||||
Function *F = VArg->getParent();
|
||||
BasicBlock *BB = &F->getEntryBlock();
|
||||
return Scatterer(BB, BB->begin(), V, &Scattered[V]);
|
||||
}
|
||||
if (Instruction *VOp = dyn_cast<Instruction>(V)) {
|
||||
// Put the scattered form of an instruction directly after the
|
||||
// instruction.
|
||||
BasicBlock *BB = VOp->getParent();
|
||||
return Scatterer(BB, llvm::next(BasicBlock::iterator(VOp)),
|
||||
V, &Scattered[V]);
|
||||
}
|
||||
// In the fallback case, just put the scattered before Point and
|
||||
// keep the result local to Point.
|
||||
return Scatterer(Point->getParent(), Point, V);
|
||||
}
|
||||
|
||||
// Replace Op with the gathered form of the components in CV. Defer the
|
||||
// deletion of Op and creation of the gathered form to the end of the pass,
|
||||
// so that we can avoid creating the gathered form if all uses of Op are
|
||||
// replaced with uses of CV.
|
||||
void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
|
||||
// Since we're not deleting Op yet, stub out its operands, so that it
|
||||
// doesn't make anything live unnecessarily.
|
||||
for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
|
||||
Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
|
||||
|
||||
transferMetadata(Op, CV);
|
||||
|
||||
// If we already have a scattered form of Op (created from ExtractElements
|
||||
// of Op itself), replace them with the new form.
|
||||
ValueVector &SV = Scattered[Op];
|
||||
if (!SV.empty()) {
|
||||
for (unsigned I = 0, E = SV.size(); I != E; ++I) {
|
||||
Instruction *Old = cast<Instruction>(SV[I]);
|
||||
CV[I]->takeName(Old);
|
||||
Old->replaceAllUsesWith(CV[I]);
|
||||
Old->eraseFromParent();
|
||||
}
|
||||
}
|
||||
SV = CV;
|
||||
Gathered.push_back(GatherList::value_type(Op, &SV));
|
||||
}
|
||||
|
||||
// Return true if it is safe to transfer the given metadata tag from
|
||||
// vector to scalar instructions.
|
||||
bool Scalarizer::canTransferMetadata(unsigned Tag) {
|
||||
return (Tag == LLVMContext::MD_tbaa
|
||||
|| Tag == LLVMContext::MD_fpmath
|
||||
|| Tag == LLVMContext::MD_tbaa_struct
|
||||
|| Tag == LLVMContext::MD_invariant_load
|
||||
|| Tag == ParallelLoopAccessMDKind);
|
||||
}
|
||||
|
||||
// Transfer metadata from Op to the instructions in CV if it is known
|
||||
// to be safe to do so.
|
||||
void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
|
||||
SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
|
||||
Op->getAllMetadataOtherThanDebugLoc(MDs);
|
||||
for (unsigned I = 0, E = CV.size(); I != E; ++I) {
|
||||
if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
|
||||
for (SmallVectorImpl<std::pair<unsigned, MDNode *> >::iterator
|
||||
MI = MDs.begin(), ME = MDs.end(); MI != ME; ++MI)
|
||||
if (canTransferMetadata(MI->first))
|
||||
New->setMetadata(MI->first, MI->second);
|
||||
New->setDebugLoc(Op->getDebugLoc());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Try to fill in Layout from Ty, returning true on success. Alignment is
|
||||
// the alignment of the vector, or 0 if the ABI default should be used.
|
||||
bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
|
||||
VectorLayout &Layout) {
|
||||
if (!TDL)
|
||||
return false;
|
||||
|
||||
// Make sure we're dealing with a vector.
|
||||
Layout.VecTy = dyn_cast<VectorType>(Ty);
|
||||
if (!Layout.VecTy)
|
||||
return false;
|
||||
|
||||
// Check that we're dealing with full-byte elements.
|
||||
Layout.ElemTy = Layout.VecTy->getElementType();
|
||||
if (TDL->getTypeSizeInBits(Layout.ElemTy) !=
|
||||
TDL->getTypeStoreSizeInBits(Layout.ElemTy))
|
||||
return false;
|
||||
|
||||
if (Alignment)
|
||||
Layout.VecAlign = Alignment;
|
||||
else
|
||||
Layout.VecAlign = TDL->getABITypeAlignment(Layout.VecTy);
|
||||
Layout.ElemSize = TDL->getTypeStoreSize(Layout.ElemTy);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
|
||||
// to create an instruction like I with operands X and Y and name Name.
|
||||
template<typename Splitter>
|
||||
bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
|
||||
VectorType *VT = dyn_cast<VectorType>(I.getType());
|
||||
if (!VT)
|
||||
return false;
|
||||
|
||||
unsigned NumElems = VT->getNumElements();
|
||||
IRBuilder<> Builder(I.getParent(), &I);
|
||||
Scatterer Op0 = scatter(&I, I.getOperand(0));
|
||||
Scatterer Op1 = scatter(&I, I.getOperand(1));
|
||||
assert(Op0.size() == NumElems && "Mismatched binary operation");
|
||||
assert(Op1.size() == NumElems && "Mismatched binary operation");
|
||||
ValueVector Res;
|
||||
Res.resize(NumElems);
|
||||
for (unsigned Elem = 0; Elem < NumElems; ++Elem)
|
||||
Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
|
||||
I.getName() + ".i" + Twine(Elem));
|
||||
gather(&I, Res);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scalarizer::visitSelectInst(SelectInst &SI) {
|
||||
VectorType *VT = dyn_cast<VectorType>(SI.getType());
|
||||
if (!VT)
|
||||
return false;
|
||||
|
||||
unsigned NumElems = VT->getNumElements();
|
||||
IRBuilder<> Builder(SI.getParent(), &SI);
|
||||
Scatterer Op1 = scatter(&SI, SI.getOperand(1));
|
||||
Scatterer Op2 = scatter(&SI, SI.getOperand(2));
|
||||
assert(Op1.size() == NumElems && "Mismatched select");
|
||||
assert(Op2.size() == NumElems && "Mismatched select");
|
||||
ValueVector Res;
|
||||
Res.resize(NumElems);
|
||||
|
||||
if (SI.getOperand(0)->getType()->isVectorTy()) {
|
||||
Scatterer Op0 = scatter(&SI, SI.getOperand(0));
|
||||
assert(Op0.size() == NumElems && "Mismatched select");
|
||||
for (unsigned I = 0; I < NumElems; ++I)
|
||||
Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
|
||||
SI.getName() + ".i" + Twine(I));
|
||||
} else {
|
||||
Value *Op0 = SI.getOperand(0);
|
||||
for (unsigned I = 0; I < NumElems; ++I)
|
||||
Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
|
||||
SI.getName() + ".i" + Twine(I));
|
||||
}
|
||||
gather(&SI, Res);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scalarizer::visitICmpInst(ICmpInst &ICI) {
|
||||
return splitBinary(ICI, ICmpSplitter(ICI));
|
||||
}
|
||||
|
||||
bool Scalarizer::visitFCmpInst(FCmpInst &FCI) {
|
||||
return splitBinary(FCI, FCmpSplitter(FCI));
|
||||
}
|
||||
|
||||
bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
|
||||
return splitBinary(BO, BinarySplitter(BO));
|
||||
}
|
||||
|
||||
bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
|
||||
return splitBinary(GEPI, GEPSplitter());
|
||||
}
|
||||
|
||||
bool Scalarizer::visitCastInst(CastInst &CI) {
|
||||
VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
|
||||
if (!VT)
|
||||
return false;
|
||||
|
||||
unsigned NumElems = VT->getNumElements();
|
||||
IRBuilder<> Builder(CI.getParent(), &CI);
|
||||
Scatterer Op0 = scatter(&CI, CI.getOperand(0));
|
||||
assert(Op0.size() == NumElems && "Mismatched cast");
|
||||
ValueVector Res;
|
||||
Res.resize(NumElems);
|
||||
for (unsigned I = 0; I < NumElems; ++I)
|
||||
Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
|
||||
CI.getName() + ".i" + Twine(I));
|
||||
gather(&CI, Res);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
|
||||
VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
|
||||
VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
|
||||
if (!DstVT || !SrcVT)
|
||||
return false;
|
||||
|
||||
unsigned DstNumElems = DstVT->getNumElements();
|
||||
unsigned SrcNumElems = SrcVT->getNumElements();
|
||||
IRBuilder<> Builder(BCI.getParent(), &BCI);
|
||||
Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
|
||||
ValueVector Res;
|
||||
Res.resize(DstNumElems);
|
||||
|
||||
if (DstNumElems == SrcNumElems) {
|
||||
for (unsigned I = 0; I < DstNumElems; ++I)
|
||||
Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
|
||||
BCI.getName() + ".i" + Twine(I));
|
||||
} else if (DstNumElems > SrcNumElems) {
|
||||
// <M x t1> -> <N*M x t2>. Convert each t1 to <N x t2> and copy the
|
||||
// individual elements to the destination.
|
||||
unsigned FanOut = DstNumElems / SrcNumElems;
|
||||
Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
|
||||
unsigned ResI = 0;
|
||||
for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
|
||||
Value *V = Op0[Op0I];
|
||||
Instruction *VI;
|
||||
// Look through any existing bitcasts before converting to <N x t2>.
|
||||
// In the best case, the resulting conversion might be a no-op.
|
||||
while ((VI = dyn_cast<Instruction>(V)) &&
|
||||
VI->getOpcode() == Instruction::BitCast)
|
||||
V = VI->getOperand(0);
|
||||
V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
|
||||
Scatterer Mid = scatter(&BCI, V);
|
||||
for (unsigned MidI = 0; MidI < FanOut; ++MidI)
|
||||
Res[ResI++] = Mid[MidI];
|
||||
}
|
||||
} else {
|
||||
// <N*M x t1> -> <M x t2>. Convert each group of <N x t1> into a t2.
|
||||
unsigned FanIn = SrcNumElems / DstNumElems;
|
||||
Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
|
||||
unsigned Op0I = 0;
|
||||
for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
|
||||
Value *V = UndefValue::get(MidTy);
|
||||
for (unsigned MidI = 0; MidI < FanIn; ++MidI)
|
||||
V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
|
||||
BCI.getName() + ".i" + Twine(ResI)
|
||||
+ ".upto" + Twine(MidI));
|
||||
Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
|
||||
BCI.getName() + ".i" + Twine(ResI));
|
||||
}
|
||||
}
|
||||
gather(&BCI, Res);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
|
||||
VectorType *VT = dyn_cast<VectorType>(SVI.getType());
|
||||
if (!VT)
|
||||
return false;
|
||||
|
||||
unsigned NumElems = VT->getNumElements();
|
||||
Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
|
||||
Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
|
||||
ValueVector Res;
|
||||
Res.resize(NumElems);
|
||||
|
||||
for (unsigned I = 0; I < NumElems; ++I) {
|
||||
int Selector = SVI.getMaskValue(I);
|
||||
if (Selector < 0)
|
||||
Res[I] = UndefValue::get(VT->getElementType());
|
||||
else if (unsigned(Selector) < Op0.size())
|
||||
Res[I] = Op0[Selector];
|
||||
else
|
||||
Res[I] = Op1[Selector - Op0.size()];
|
||||
}
|
||||
gather(&SVI, Res);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scalarizer::visitPHINode(PHINode &PHI) {
|
||||
VectorType *VT = dyn_cast<VectorType>(PHI.getType());
|
||||
if (!VT)
|
||||
return false;
|
||||
|
||||
unsigned NumElems = VT->getNumElements();
|
||||
IRBuilder<> Builder(PHI.getParent(), &PHI);
|
||||
ValueVector Res;
|
||||
Res.resize(NumElems);
|
||||
|
||||
unsigned NumOps = PHI.getNumOperands();
|
||||
for (unsigned I = 0; I < NumElems; ++I)
|
||||
Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
|
||||
PHI.getName() + ".i" + Twine(I));
|
||||
|
||||
for (unsigned I = 0; I < NumOps; ++I) {
|
||||
Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
|
||||
BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
|
||||
for (unsigned J = 0; J < NumElems; ++J)
|
||||
cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
|
||||
}
|
||||
gather(&PHI, Res);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scalarizer::visitLoadInst(LoadInst &LI) {
|
||||
if (!ScalarizeLoadStore)
|
||||
return false;
|
||||
if (!LI.isSimple())
|
||||
return false;
|
||||
|
||||
VectorLayout Layout;
|
||||
if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout))
|
||||
return false;
|
||||
|
||||
unsigned NumElems = Layout.VecTy->getNumElements();
|
||||
IRBuilder<> Builder(LI.getParent(), &LI);
|
||||
Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
|
||||
ValueVector Res;
|
||||
Res.resize(NumElems);
|
||||
|
||||
for (unsigned I = 0; I < NumElems; ++I)
|
||||
Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
|
||||
LI.getName() + ".i" + Twine(I));
|
||||
gather(&LI, Res);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Scalarizer::visitStoreInst(StoreInst &SI) {
|
||||
if (!ScalarizeLoadStore)
|
||||
return false;
|
||||
if (!SI.isSimple())
|
||||
return false;
|
||||
|
||||
VectorLayout Layout;
|
||||
Value *FullValue = SI.getValueOperand();
|
||||
if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout))
|
||||
return false;
|
||||
|
||||
unsigned NumElems = Layout.VecTy->getNumElements();
|
||||
IRBuilder<> Builder(SI.getParent(), &SI);
|
||||
Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
|
||||
Scatterer Val = scatter(&SI, FullValue);
|
||||
|
||||
ValueVector Stores;
|
||||
Stores.resize(NumElems);
|
||||
for (unsigned I = 0; I < NumElems; ++I) {
|
||||
unsigned Align = Layout.getElemAlign(I);
|
||||
Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
|
||||
}
|
||||
transferMetadata(&SI, Stores);
|
||||
return true;
|
||||
}
|
||||
|
||||
// Delete the instructions that we scalarized. If a full vector result
|
||||
// is still needed, recreate it using InsertElements.
|
||||
bool Scalarizer::finish() {
|
||||
if (Gathered.empty())
|
||||
return false;
|
||||
for (GatherList::iterator GMI = Gathered.begin(), GME = Gathered.end();
|
||||
GMI != GME; ++GMI) {
|
||||
Instruction *Op = GMI->first;
|
||||
ValueVector &CV = *GMI->second;
|
||||
if (!Op->use_empty()) {
|
||||
// The value is still needed, so recreate it using a series of
|
||||
// InsertElements.
|
||||
Type *Ty = Op->getType();
|
||||
Value *Res = UndefValue::get(Ty);
|
||||
unsigned Count = Ty->getVectorNumElements();
|
||||
IRBuilder<> Builder(Op->getParent(), Op);
|
||||
for (unsigned I = 0; I < Count; ++I)
|
||||
Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
|
||||
Op->getName() + ".upto" + Twine(I));
|
||||
Res->takeName(Op);
|
||||
Op->replaceAllUsesWith(Res);
|
||||
}
|
||||
Op->eraseFromParent();
|
||||
}
|
||||
Gathered.clear();
|
||||
Scattered.clear();
|
||||
return true;
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createScalarizerPass() {
|
||||
return new Scalarizer();
|
||||
}
|
390
test/Transforms/Scalarizer/basic.ll
Normal file
390
test/Transforms/Scalarizer/basic.ll
Normal file
@ -0,0 +1,390 @@
|
||||
; RUN: opt %s -scalarizer -scalarize-load-store -dce -S | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
declare <4 x float> @ext(<4 x float>)
|
||||
@g = global <4 x float> zeroinitializer
|
||||
|
||||
define void @f1(<4 x float> %init, <4 x float> *%base, i32 %count) {
|
||||
; CHECK-LABEL: @f1(
|
||||
; CHECK: entry:
|
||||
; CHECK: %init.i0 = extractelement <4 x float> %init, i32 0
|
||||
; CHECK: %init.i1 = extractelement <4 x float> %init, i32 1
|
||||
; CHECK: %init.i2 = extractelement <4 x float> %init, i32 2
|
||||
; CHECK: %init.i3 = extractelement <4 x float> %init, i32 3
|
||||
; CHECK: br label %loop
|
||||
; CHECK: loop:
|
||||
; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
|
||||
; CHECK: %acc.i0 = phi float [ %init.i0, %entry ], [ %sel.i0, %loop ]
|
||||
; CHECK: %acc.i1 = phi float [ %init.i1, %entry ], [ %sel.i1, %loop ]
|
||||
; CHECK: %acc.i2 = phi float [ %init.i2, %entry ], [ %sel.i2, %loop ]
|
||||
; CHECK: %acc.i3 = phi float [ %init.i3, %entry ], [ %sel.i3, %loop ]
|
||||
; CHECK: %nexti = sub i32 %i, 1
|
||||
; CHECK: %ptr = getelementptr <4 x float>* %base, i32 %i
|
||||
; CHECK: %ptr.i0 = bitcast <4 x float>* %ptr to float*
|
||||
; CHECK: %val.i0 = load float* %ptr.i0, align 16
|
||||
; CHECK: %ptr.i1 = getelementptr float* %ptr.i0, i32 1
|
||||
; CHECK: %val.i1 = load float* %ptr.i1, align 4
|
||||
; CHECK: %ptr.i2 = getelementptr float* %ptr.i0, i32 2
|
||||
; CHECK: %val.i2 = load float* %ptr.i2, align 8
|
||||
; CHECK: %ptr.i3 = getelementptr float* %ptr.i0, i32 3
|
||||
; CHECK: %val.i3 = load float* %ptr.i3, align 4
|
||||
; CHECK: %add.i0 = fadd float %val.i0, %val.i2
|
||||
; CHECK: %add.i1 = fadd float %val.i1, %val.i3
|
||||
; CHECK: %add.i2 = fadd float %acc.i0, %acc.i2
|
||||
; CHECK: %add.i3 = fadd float %acc.i1, %acc.i3
|
||||
; CHECK: %add.upto0 = insertelement <4 x float> undef, float %add.i0, i32 0
|
||||
; CHECK: %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1
|
||||
; CHECK: %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2
|
||||
; CHECK: %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3
|
||||
; CHECK: %call = call <4 x float> @ext(<4 x float> %add)
|
||||
; CHECK: %call.i0 = extractelement <4 x float> %call, i32 0
|
||||
; CHECK: %cmp.i0 = fcmp ogt float %call.i0, 1.0
|
||||
; CHECK: %call.i1 = extractelement <4 x float> %call, i32 1
|
||||
; CHECK: %cmp.i1 = fcmp ogt float %call.i1, 2.0
|
||||
; CHECK: %call.i2 = extractelement <4 x float> %call, i32 2
|
||||
; CHECK: %cmp.i2 = fcmp ogt float %call.i2, 3.0
|
||||
; CHECK: %call.i3 = extractelement <4 x float> %call, i32 3
|
||||
; CHECK: %cmp.i3 = fcmp ogt float %call.i3, 4.0
|
||||
; CHECK: %sel.i0 = select i1 %cmp.i0, float %call.i0, float 5.0
|
||||
; CHECK: %sel.i1 = select i1 %cmp.i1, float %call.i1, float 6.0
|
||||
; CHECK: %sel.i2 = select i1 %cmp.i2, float %call.i2, float 7.0
|
||||
; CHECK: %sel.i3 = select i1 %cmp.i3, float %call.i3, float 8.0
|
||||
; CHECK: store float %sel.i0, float* %ptr.i0
|
||||
; CHECK: store float %sel.i1, float* %ptr.i1
|
||||
; CHECK: store float %sel.i2, float* %ptr.i2
|
||||
; CHECK: store float %sel.i3, float* %ptr.i3
|
||||
; CHECK: %test = icmp eq i32 %nexti, 0
|
||||
; CHECK: br i1 %test, label %loop, label %exit
|
||||
; CHECK: exit:
|
||||
; CHECK: ret void
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
|
||||
%acc = phi <4 x float> [ %init, %entry ], [ %sel, %loop ]
|
||||
%nexti = sub i32 %i, 1
|
||||
|
||||
%ptr = getelementptr <4 x float> *%base, i32 %i
|
||||
%val = load <4 x float> *%ptr
|
||||
%dval = bitcast <4 x float> %val to <2 x double>
|
||||
%dacc = bitcast <4 x float> %acc to <2 x double>
|
||||
%shuffle1 = shufflevector <2 x double> %dval, <2 x double> %dacc,
|
||||
<2 x i32> <i32 0, i32 2>
|
||||
%shuffle2 = shufflevector <2 x double> %dval, <2 x double> %dacc,
|
||||
<2 x i32> <i32 1, i32 3>
|
||||
%f1 = bitcast <2 x double> %shuffle1 to <4 x float>
|
||||
%f2 = bitcast <2 x double> %shuffle2 to <4 x float>
|
||||
%add = fadd <4 x float> %f1, %f2
|
||||
%call = call <4 x float> @ext(<4 x float> %add)
|
||||
%cmp = fcmp ogt <4 x float> %call,
|
||||
<float 1.0, float 2.0, float 3.0, float 4.0>
|
||||
%sel = select <4 x i1> %cmp, <4 x float> %call,
|
||||
<4 x float> <float 5.0, float 6.0, float 7.0, float 8.0>
|
||||
store <4 x float> %sel, <4 x float> *%ptr
|
||||
|
||||
%test = icmp eq i32 %nexti, 0
|
||||
br i1 %test, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @f2(<4 x i32> %init, <4 x i8> *%base, i32 %count) {
|
||||
; CHECK-LABEL: define void @f2(<4 x i32> %init, <4 x i8>* %base, i32 %count) {
|
||||
; CHECK: entry:
|
||||
; CHECK: %init.i0 = extractelement <4 x i32> %init, i32 0
|
||||
; CHECK: %init.i1 = extractelement <4 x i32> %init, i32 1
|
||||
; CHECK: %init.i2 = extractelement <4 x i32> %init, i32 2
|
||||
; CHECK: %init.i3 = extractelement <4 x i32> %init, i32 3
|
||||
; CHECK: br label %loop
|
||||
; CHECK: loop:
|
||||
; CHECK: %i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
|
||||
; CHECK: %acc.i0 = phi i32 [ %init.i0, %entry ], [ %sel.i0, %loop ]
|
||||
; CHECK: %acc.i1 = phi i32 [ %init.i1, %entry ], [ %sel.i1, %loop ]
|
||||
; CHECK: %acc.i2 = phi i32 [ %init.i2, %entry ], [ %sel.i2, %loop ]
|
||||
; CHECK: %acc.i3 = phi i32 [ %init.i3, %entry ], [ %sel.i3, %loop ]
|
||||
; CHECK: %nexti = sub i32 %i, 1
|
||||
; CHECK: %ptr = getelementptr <4 x i8>* %base, i32 %i
|
||||
; CHECK: %ptr.i0 = bitcast <4 x i8>* %ptr to i8*
|
||||
; CHECK: %val.i0 = load i8* %ptr.i0, align 4
|
||||
; CHECK: %ptr.i1 = getelementptr i8* %ptr.i0, i32 1
|
||||
; CHECK: %val.i1 = load i8* %ptr.i1, align 1
|
||||
; CHECK: %ptr.i2 = getelementptr i8* %ptr.i0, i32 2
|
||||
; CHECK: %val.i2 = load i8* %ptr.i2, align 2
|
||||
; CHECK: %ptr.i3 = getelementptr i8* %ptr.i0, i32 3
|
||||
; CHECK: %val.i3 = load i8* %ptr.i3, align 1
|
||||
; CHECK: %ext.i0 = sext i8 %val.i0 to i32
|
||||
; CHECK: %ext.i1 = sext i8 %val.i1 to i32
|
||||
; CHECK: %ext.i2 = sext i8 %val.i2 to i32
|
||||
; CHECK: %ext.i3 = sext i8 %val.i3 to i32
|
||||
; CHECK: %add.i0 = add i32 %ext.i0, %acc.i0
|
||||
; CHECK: %add.i1 = add i32 %ext.i1, %acc.i1
|
||||
; CHECK: %add.i2 = add i32 %ext.i2, %acc.i2
|
||||
; CHECK: %add.i3 = add i32 %ext.i3, %acc.i3
|
||||
; CHECK: %cmp.i0 = icmp slt i32 %add.i0, -10
|
||||
; CHECK: %cmp.i1 = icmp slt i32 %add.i1, -11
|
||||
; CHECK: %cmp.i2 = icmp slt i32 %add.i2, -12
|
||||
; CHECK: %cmp.i3 = icmp slt i32 %add.i3, -13
|
||||
; CHECK: %sel.i0 = select i1 %cmp.i0, i32 %add.i0, i32 %i
|
||||
; CHECK: %sel.i1 = select i1 %cmp.i1, i32 %add.i1, i32 %i
|
||||
; CHECK: %sel.i2 = select i1 %cmp.i2, i32 %add.i2, i32 %i
|
||||
; CHECK: %sel.i3 = select i1 %cmp.i3, i32 %add.i3, i32 %i
|
||||
; CHECK: %trunc.i0 = trunc i32 %sel.i0 to i8
|
||||
; CHECK: %trunc.i1 = trunc i32 %sel.i1 to i8
|
||||
; CHECK: %trunc.i2 = trunc i32 %sel.i2 to i8
|
||||
; CHECK: %trunc.i3 = trunc i32 %sel.i3 to i8
|
||||
; CHECK: store i8 %trunc.i0, i8* %ptr.i0, align 4
|
||||
; CHECK: store i8 %trunc.i1, i8* %ptr.i1, align 1
|
||||
; CHECK: store i8 %trunc.i2, i8* %ptr.i2, align 2
|
||||
; CHECK: store i8 %trunc.i3, i8* %ptr.i3, align 1
|
||||
; CHECK: %test = icmp eq i32 %nexti, 0
|
||||
; CHECK: br i1 %test, label %loop, label %exit
|
||||
; CHECK: exit:
|
||||
; CHECK: ret void
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%i = phi i32 [ %count, %entry ], [ %nexti, %loop ]
|
||||
%acc = phi <4 x i32> [ %init, %entry ], [ %sel, %loop ]
|
||||
%nexti = sub i32 %i, 1
|
||||
|
||||
%ptr = getelementptr <4 x i8> *%base, i32 %i
|
||||
%val = load <4 x i8> *%ptr
|
||||
%ext = sext <4 x i8> %val to <4 x i32>
|
||||
%add = add <4 x i32> %ext, %acc
|
||||
%cmp = icmp slt <4 x i32> %add, <i32 -10, i32 -11, i32 -12, i32 -13>
|
||||
%single = insertelement <4 x i32> undef, i32 %i, i32 0
|
||||
%limit = shufflevector <4 x i32> %single, <4 x i32> undef,
|
||||
<4 x i32> zeroinitializer
|
||||
%sel = select <4 x i1> %cmp, <4 x i32> %add, <4 x i32> %limit
|
||||
%trunc = trunc <4 x i32> %sel to <4 x i8>
|
||||
store <4 x i8> %trunc, <4 x i8> *%ptr
|
||||
|
||||
%test = icmp eq i32 %nexti, 0
|
||||
br i1 %test, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that !tbaa information is preserved.
|
||||
define void @f3(<4 x i32> *%src, <4 x i32> *%dst) {
|
||||
; CHECK-LABEL: @f3(
|
||||
; CHECK: %val.i0 = load i32* %src.i0, align 16, !tbaa ![[TAG:[0-9]*]]
|
||||
; CHECK: %val.i1 = load i32* %src.i1, align 4, !tbaa ![[TAG]]
|
||||
; CHECK: %val.i2 = load i32* %src.i2, align 8, !tbaa ![[TAG]]
|
||||
; CHECK: %val.i3 = load i32* %src.i3, align 4, !tbaa ![[TAG]]
|
||||
; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa ![[TAG:[0-9]*]]
|
||||
; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa ![[TAG]]
|
||||
; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa ![[TAG]]
|
||||
; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa ![[TAG]]
|
||||
; CHECK: ret void
|
||||
%val = load <4 x i32> *%src, !tbaa !1
|
||||
%add = add <4 x i32> %val, %val
|
||||
store <4 x i32> %add, <4 x i32> *%dst, !tbaa !2
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that !tbaa.struct information is preserved.
|
||||
define void @f4(<4 x i32> *%src, <4 x i32> *%dst) {
|
||||
; CHECK-LABEL: @f4(
|
||||
; CHECK: %val.i0 = load i32* %src.i0, align 16, !tbaa.struct ![[TAG:[0-9]*]]
|
||||
; CHECK: %val.i1 = load i32* %src.i1, align 4, !tbaa.struct ![[TAG]]
|
||||
; CHECK: %val.i2 = load i32* %src.i2, align 8, !tbaa.struct ![[TAG]]
|
||||
; CHECK: %val.i3 = load i32* %src.i3, align 4, !tbaa.struct ![[TAG]]
|
||||
; CHECK: store i32 %add.i0, i32* %dst.i0, align 16, !tbaa.struct ![[TAG]]
|
||||
; CHECK: store i32 %add.i1, i32* %dst.i1, align 4, !tbaa.struct ![[TAG]]
|
||||
; CHECK: store i32 %add.i2, i32* %dst.i2, align 8, !tbaa.struct ![[TAG]]
|
||||
; CHECK: store i32 %add.i3, i32* %dst.i3, align 4, !tbaa.struct ![[TAG]]
|
||||
; CHECK: ret void
|
||||
%val = load <4 x i32> *%src, !tbaa.struct !5
|
||||
%add = add <4 x i32> %val, %val
|
||||
store <4 x i32> %add, <4 x i32> *%dst, !tbaa.struct !5
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that llvm.mem.parallel_loop_access information is preserved.
|
||||
define void @f5(i32 %count, <4 x i32> *%src, <4 x i32> *%dst) {
|
||||
; CHECK-LABEL: @f5(
|
||||
; CHECK: %val.i0 = load i32* %this_src.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG:[0-9]*]]
|
||||
; CHECK: %val.i1 = load i32* %this_src.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
|
||||
; CHECK: %val.i2 = load i32* %this_src.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]]
|
||||
; CHECK: %val.i3 = load i32* %this_src.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
|
||||
; CHECK: store i32 %add.i0, i32* %this_dst.i0, align 16, !llvm.mem.parallel_loop_access ![[TAG]]
|
||||
; CHECK: store i32 %add.i1, i32* %this_dst.i1, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
|
||||
; CHECK: store i32 %add.i2, i32* %this_dst.i2, align 8, !llvm.mem.parallel_loop_access ![[TAG]]
|
||||
; CHECK: store i32 %add.i3, i32* %this_dst.i3, align 4, !llvm.mem.parallel_loop_access ![[TAG]]
|
||||
; CHECK: ret void
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%index = phi i32 [ 0, %entry ], [ %next_index, %loop ]
|
||||
%this_src = getelementptr <4 x i32> *%src, i32 %index
|
||||
%this_dst = getelementptr <4 x i32> *%dst, i32 %index
|
||||
%val = load <4 x i32> *%this_src, !llvm.mem.parallel_loop_access !3
|
||||
%add = add <4 x i32> %val, %val
|
||||
store <4 x i32> %add, <4 x i32> *%this_dst, !llvm.mem.parallel_loop_access !3
|
||||
%next_index = add i32 %index, -1
|
||||
%continue = icmp ne i32 %next_index, %count
|
||||
br i1 %continue, label %loop, label %end, !llvm.loop !3
|
||||
|
||||
end:
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that fpmath information is preserved.
|
||||
define <4 x float> @f6(<4 x float> %x) {
|
||||
; CHECK-LABEL: @f6(
|
||||
; CHECK: %x.i0 = extractelement <4 x float> %x, i32 0
|
||||
; CHECK: %res.i0 = fadd float %x.i0, 1.0{{[e+0]*}}, !fpmath ![[TAG:[0-9]*]]
|
||||
; CHECK: %x.i1 = extractelement <4 x float> %x, i32 1
|
||||
; CHECK: %res.i1 = fadd float %x.i1, 2.0{{[e+0]*}}, !fpmath ![[TAG]]
|
||||
; CHECK: %x.i2 = extractelement <4 x float> %x, i32 2
|
||||
; CHECK: %res.i2 = fadd float %x.i2, 3.0{{[e+0]*}}, !fpmath ![[TAG]]
|
||||
; CHECK: %x.i3 = extractelement <4 x float> %x, i32 3
|
||||
; CHECK: %res.i3 = fadd float %x.i3, 4.0{{[e+0]*}}, !fpmath ![[TAG]]
|
||||
; CHECK: %res.upto0 = insertelement <4 x float> undef, float %res.i0, i32 0
|
||||
; CHECK: %res.upto1 = insertelement <4 x float> %res.upto0, float %res.i1, i32 1
|
||||
; CHECK: %res.upto2 = insertelement <4 x float> %res.upto1, float %res.i2, i32 2
|
||||
; CHECK: %res = insertelement <4 x float> %res.upto2, float %res.i3, i32 3
|
||||
; CHECK: ret <4 x float> %res
|
||||
%res = fadd <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0>,
|
||||
!fpmath !4
|
||||
ret <4 x float> %res
|
||||
}
|
||||
|
||||
; Check that random metadata isn't kept.
|
||||
define void @f7(<4 x i32> *%src, <4 x i32> *%dst) {
|
||||
; CHECK-LABEL: @f7(
|
||||
; CHECK-NOT: !foo
|
||||
; CHECK: ret void
|
||||
%val = load <4 x i32> *%src, !foo !5
|
||||
%add = add <4 x i32> %val, %val
|
||||
store <4 x i32> %add, <4 x i32> *%dst, !foo !5
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test GEP with vectors.
|
||||
define void @f8(<4 x float *> *%dest, <4 x float *> %ptr0, <4 x i32> %i0,
|
||||
float *%other) {
|
||||
; CHECK-LABEL: @f8(
|
||||
; CHECK: %dest.i0 = bitcast <4 x float*>* %dest to float**
|
||||
; CHECK: %dest.i1 = getelementptr float** %dest.i0, i32 1
|
||||
; CHECK: %dest.i2 = getelementptr float** %dest.i0, i32 2
|
||||
; CHECK: %dest.i3 = getelementptr float** %dest.i0, i32 3
|
||||
; CHECK: %i0.i1 = extractelement <4 x i32> %i0, i32 1
|
||||
; CHECK: %i0.i3 = extractelement <4 x i32> %i0, i32 3
|
||||
; CHECK: %ptr0.i0 = extractelement <4 x float*> %ptr0, i32 0
|
||||
; CHECK: %val.i0 = getelementptr float* %ptr0.i0, i32 100
|
||||
; CHECK: %val.i1 = getelementptr float* %other, i32 %i0.i1
|
||||
; CHECK: %ptr0.i2 = extractelement <4 x float*> %ptr0, i32 2
|
||||
; CHECK: %val.i2 = getelementptr float* %ptr0.i2, i32 100
|
||||
; CHECK: %ptr0.i3 = extractelement <4 x float*> %ptr0, i32 3
|
||||
; CHECK: %val.i3 = getelementptr float* %ptr0.i3, i32 %i0.i3
|
||||
; CHECK: store float* %val.i0, float** %dest.i0, align 32
|
||||
; CHECK: store float* %val.i1, float** %dest.i1, align 8
|
||||
; CHECK: store float* %val.i2, float** %dest.i2, align 16
|
||||
; CHECK: store float* %val.i3, float** %dest.i3, align 8
|
||||
; CHECK: ret void
|
||||
%i1 = insertelement <4 x i32> %i0, i32 100, i32 0
|
||||
%i2 = insertelement <4 x i32> %i1, i32 100, i32 2
|
||||
%ptr1 = insertelement <4 x float *> %ptr0, float *%other, i32 1
|
||||
%val = getelementptr <4 x float *> %ptr1, <4 x i32> %i2
|
||||
store <4 x float *> %val, <4 x float *> *%dest
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test the handling of unaligned loads.
|
||||
define void @f9(<4 x float> *%dest, <4 x float> *%src) {
|
||||
; CHECK: @f9(
|
||||
; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
|
||||
; CHECK: %dest.i1 = getelementptr float* %dest.i0, i32 1
|
||||
; CHECK: %dest.i2 = getelementptr float* %dest.i0, i32 2
|
||||
; CHECK: %dest.i3 = getelementptr float* %dest.i0, i32 3
|
||||
; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
|
||||
; CHECK: %val.i0 = load float* %src.i0, align 4
|
||||
; CHECK: %src.i1 = getelementptr float* %src.i0, i32 1
|
||||
; CHECK: %val.i1 = load float* %src.i1, align 4
|
||||
; CHECK: %src.i2 = getelementptr float* %src.i0, i32 2
|
||||
; CHECK: %val.i2 = load float* %src.i2, align 4
|
||||
; CHECK: %src.i3 = getelementptr float* %src.i0, i32 3
|
||||
; CHECK: %val.i3 = load float* %src.i3, align 4
|
||||
; CHECK: store float %val.i0, float* %dest.i0, align 8
|
||||
; CHECK: store float %val.i1, float* %dest.i1, align 4
|
||||
; CHECK: store float %val.i2, float* %dest.i2, align 8
|
||||
; CHECK: store float %val.i3, float* %dest.i3, align 4
|
||||
; CHECK: ret void
|
||||
%val = load <4 x float> *%src, align 4
|
||||
store <4 x float> %val, <4 x float> *%dest, align 8
|
||||
ret void
|
||||
}
|
||||
|
||||
; ...and again with subelement alignment.
|
||||
define void @f10(<4 x float> *%dest, <4 x float> *%src) {
|
||||
; CHECK: @f10(
|
||||
; CHECK: %dest.i0 = bitcast <4 x float>* %dest to float*
|
||||
; CHECK: %dest.i1 = getelementptr float* %dest.i0, i32 1
|
||||
; CHECK: %dest.i2 = getelementptr float* %dest.i0, i32 2
|
||||
; CHECK: %dest.i3 = getelementptr float* %dest.i0, i32 3
|
||||
; CHECK: %src.i0 = bitcast <4 x float>* %src to float*
|
||||
; CHECK: %val.i0 = load float* %src.i0, align 1
|
||||
; CHECK: %src.i1 = getelementptr float* %src.i0, i32 1
|
||||
; CHECK: %val.i1 = load float* %src.i1, align 1
|
||||
; CHECK: %src.i2 = getelementptr float* %src.i0, i32 2
|
||||
; CHECK: %val.i2 = load float* %src.i2, align 1
|
||||
; CHECK: %src.i3 = getelementptr float* %src.i0, i32 3
|
||||
; CHECK: %val.i3 = load float* %src.i3, align 1
|
||||
; CHECK: store float %val.i0, float* %dest.i0, align 2
|
||||
; CHECK: store float %val.i1, float* %dest.i1, align 2
|
||||
; CHECK: store float %val.i2, float* %dest.i2, align 2
|
||||
; CHECK: store float %val.i3, float* %dest.i3, align 2
|
||||
; CHECK: ret void
|
||||
%val = load <4 x float> *%src, align 1
|
||||
store <4 x float> %val, <4 x float> *%dest, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that sub-byte loads aren't scalarized.
|
||||
define void @f11(<32 x i1> *%dest, <32 x i1> *%src0) {
|
||||
; CHECK: @f11(
|
||||
; CHECK: %val0 = load <32 x i1>* %src0
|
||||
; CHECK: %val1 = load <32 x i1>* %src1
|
||||
; CHECK: store <32 x i1> %and, <32 x i1>* %dest
|
||||
; CHECK: ret void
|
||||
%src1 = getelementptr <32 x i1> *%src0, i32 1
|
||||
%val0 = load <32 x i1> *%src0
|
||||
%val1 = load <32 x i1> *%src1
|
||||
%and = and <32 x i1> %val0, %val1
|
||||
store <32 x i1> %and, <32 x i1> *%dest
|
||||
ret void
|
||||
}
|
||||
|
||||
; Test that variable inserts aren't scalarized.
|
||||
define void @f12(<4 x i32> *%dest, <4 x i32> *%src, i32 %index) {
|
||||
; CHECK: @f12(
|
||||
; CHECK: %val1 = insertelement <4 x i32> %val0, i32 1, i32 %index
|
||||
; CHECK-DAG: %val1.i0 = extractelement <4 x i32> %val1, i32 0
|
||||
; CHECK-DAG: %val1.i1 = extractelement <4 x i32> %val1, i32 1
|
||||
; CHECK-DAG: %val1.i2 = extractelement <4 x i32> %val1, i32 2
|
||||
; CHECK-DAG: %val1.i3 = extractelement <4 x i32> %val1, i32 3
|
||||
; CHECK-DAG: %val2.i0 = shl i32 1, %val1.i0
|
||||
; CHECK-DAG: %val2.i1 = shl i32 2, %val1.i1
|
||||
; CHECK-DAG: %val2.i2 = shl i32 3, %val1.i2
|
||||
; CHECK-DAG: %val2.i3 = shl i32 4, %val1.i3
|
||||
; CHECK: ret void
|
||||
%val0 = load <4 x i32> *%src
|
||||
%val1 = insertelement <4 x i32> %val0, i32 1, i32 %index
|
||||
%val2 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %val1
|
||||
store <4 x i32> %val2, <4 x i32> *%dest
|
||||
ret void
|
||||
}
|
||||
|
||||
!0 = metadata !{ metadata !"root" }
|
||||
!1 = metadata !{ metadata !"set1", metadata !0 }
|
||||
!2 = metadata !{ metadata !"set2", metadata !0 }
|
||||
!3 = metadata !{ metadata !3 }
|
||||
!4 = metadata !{ float 4.0 }
|
||||
!5 = metadata !{ i64 0, i64 8, null }
|
85
test/Transforms/Scalarizer/dbginfo.ll
Normal file
85
test/Transforms/Scalarizer/dbginfo.ll
Normal file
@ -0,0 +1,85 @@
|
||||
; RUN: opt %s -scalarizer -scalarize-load-store -S | FileCheck %s
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
|
||||
; Function Attrs: nounwind uwtable
|
||||
define void @f1(<4 x i32>* nocapture %a, <4 x i32>* nocapture readonly %b, <4 x i32>* nocapture readonly %c) #0 {
|
||||
; CHECK: @f1(
|
||||
; CHECK: %a.i0 = bitcast <4 x i32>* %a to i32*
|
||||
; CHECK: %a.i1 = getelementptr i32* %a.i0, i32 1
|
||||
; CHECK: %a.i2 = getelementptr i32* %a.i0, i32 2
|
||||
; CHECK: %a.i3 = getelementptr i32* %a.i0, i32 3
|
||||
; CHECK: %c.i0 = bitcast <4 x i32>* %c to i32*
|
||||
; CHECK: %c.i1 = getelementptr i32* %c.i0, i32 1
|
||||
; CHECK: %c.i2 = getelementptr i32* %c.i0, i32 2
|
||||
; CHECK: %c.i3 = getelementptr i32* %c.i0, i32 3
|
||||
; CHECK: %b.i0 = bitcast <4 x i32>* %b to i32*
|
||||
; CHECK: %b.i1 = getelementptr i32* %b.i0, i32 1
|
||||
; CHECK: %b.i2 = getelementptr i32* %b.i0, i32 2
|
||||
; CHECK: %b.i3 = getelementptr i32* %b.i0, i32 3
|
||||
; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %a}, i64 0, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}}
|
||||
; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %b}, i64 0, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}}
|
||||
; CHECK: tail call void @llvm.dbg.value(metadata !{<4 x i32>* %c}, i64 0, metadata !{{[0-9]+}}), !dbg !{{[0-9]+}}
|
||||
; CHECK: %bval.i0 = load i32* %b.i0, align 16, !dbg ![[TAG1:[0-9]+]], !tbaa ![[TAG2:[0-9]+]]
|
||||
; CHECK: %bval.i1 = load i32* %b.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
|
||||
; CHECK: %bval.i2 = load i32* %b.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
|
||||
; CHECK: %bval.i3 = load i32* %b.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
|
||||
; CHECK: %cval.i0 = load i32* %c.i0, align 16, !dbg ![[TAG1]], !tbaa ![[TAG2]]
|
||||
; CHECK: %cval.i1 = load i32* %c.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
|
||||
; CHECK: %cval.i2 = load i32* %c.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
|
||||
; CHECK: %cval.i3 = load i32* %c.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
|
||||
; CHECK: %add.i0 = add i32 %bval.i0, %cval.i0, !dbg ![[TAG1]]
|
||||
; CHECK: %add.i1 = add i32 %bval.i1, %cval.i1, !dbg ![[TAG1]]
|
||||
; CHECK: %add.i2 = add i32 %bval.i2, %cval.i2, !dbg ![[TAG1]]
|
||||
; CHECK: %add.i3 = add i32 %bval.i3, %cval.i3, !dbg ![[TAG1]]
|
||||
; CHECK: store i32 %add.i0, i32* %a.i0, align 16, !dbg ![[TAG1]], !tbaa ![[TAG2]]
|
||||
; CHECK: store i32 %add.i1, i32* %a.i1, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
|
||||
; CHECK: store i32 %add.i2, i32* %a.i2, align 8, !dbg ![[TAG1]], !tbaa ![[TAG2]]
|
||||
; CHECK: store i32 %add.i3, i32* %a.i3, align 4, !dbg ![[TAG1]], !tbaa ![[TAG2]]
|
||||
; CHECK: ret void
|
||||
entry:
|
||||
tail call void @llvm.dbg.value(metadata !{<4 x i32>* %a}, i64 0, metadata !15), !dbg !20
|
||||
tail call void @llvm.dbg.value(metadata !{<4 x i32>* %b}, i64 0, metadata !16), !dbg !20
|
||||
tail call void @llvm.dbg.value(metadata !{<4 x i32>* %c}, i64 0, metadata !17), !dbg !20
|
||||
%bval = load <4 x i32>* %b, align 16, !dbg !21, !tbaa !22
|
||||
%cval = load <4 x i32>* %c, align 16, !dbg !21, !tbaa !22
|
||||
%add = add <4 x i32> %bval, %cval, !dbg !21
|
||||
store <4 x i32> %add, <4 x i32>* %a, align 16, !dbg !21, !tbaa !22
|
||||
ret void, !dbg !25
|
||||
}
|
||||
|
||||
; Function Attrs: nounwind readnone
|
||||
declare void @llvm.dbg.value(metadata, i64, metadata) #1
|
||||
|
||||
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
|
||||
attributes #1 = { nounwind readnone }
|
||||
|
||||
!llvm.dbg.cu = !{!0}
|
||||
!llvm.module.flags = !{!18}
|
||||
!llvm.ident = !{!19}
|
||||
|
||||
!0 = metadata !{i32 786449, metadata !1, i32 12, metadata !"clang version 3.4 (trunk 194134) (llvm/trunk 194126)", i1 true, metadata !"", i32 0, metadata !2, metadata !2, metadata !3, metadata !2, metadata !2, metadata !""} ; [ DW_TAG_compile_unit ] [/home/richards/llvm/build//tmp/add.c] [DW_LANG_C99]
|
||||
!1 = metadata !{metadata !"/tmp/add.c", metadata !"/home/richards/llvm/build"}
|
||||
!2 = metadata !{i32 0}
|
||||
!3 = metadata !{metadata !4}
|
||||
!4 = metadata !{i32 786478, metadata !1, metadata !5, metadata !"f1", metadata !"f1", metadata !"", i32 3, metadata !6, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 true, void (<4 x i32>*, <4 x i32>*, <4 x i32>*)* @f1, null, null, metadata !14, i32 4} ; [ DW_TAG_subprogram ] [line 3] [def] [scope 4] [f]
|
||||
!5 = metadata !{i32 786473, metadata !1} ; [ DW_TAG_file_type ] [/home/richards/llvm/build//tmp/add.c]
|
||||
!6 = metadata !{i32 786453, i32 0, null, metadata !"", i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !7, i32 0, null, null, null} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
|
||||
!7 = metadata !{null, metadata !8, metadata !8, metadata !8}
|
||||
!8 = metadata !{i32 786447, null, null, metadata !"", i32 0, i64 64, i64 64, i64 0, i32 0, metadata !9} ; [ DW_TAG_pointer_type ] [line 0, size 64, align 64, offset 0] [from V4SI]
|
||||
!9 = metadata !{i32 786454, metadata !1, null, metadata !"V4SI", i32 1, i64 0, i64 0, i64 0, i32 0, metadata !10} ; [ DW_TAG_typedef ] [V4SI] [line 1, size 0, align 0, offset 0] [from ]
|
||||
!10 = metadata !{i32 786433, null, null, metadata !"", i32 0, i64 128, i64 128, i32 0, i32 2048, metadata !11, metadata !12, i32 0, null, null, null} ; [ DW_TAG_array_type ] [line 0, size 128, align 128, offset 0] [vector] [from int]
|
||||
!11 = metadata !{i32 786468, null, null, metadata !"int", i32 0, i64 32, i64 32, i64 0, i32 0, i32 5} ; [ DW_TAG_base_type ] [int] [line 0, size 32, align 32, offset 0, enc DW_ATE_signed]
|
||||
!12 = metadata !{metadata !13}
|
||||
!13 = metadata !{i32 786465, i64 0, i64 4} ; [ DW_TAG_subrange_type ] [0, 3]
|
||||
!14 = metadata !{metadata !15, metadata !16, metadata !17}
|
||||
!15 = metadata !{i32 786689, metadata !4, metadata !"a", metadata !5, i32 16777219, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [a] [line 3]
|
||||
!16 = metadata !{i32 786689, metadata !4, metadata !"b", metadata !5, i32 33554435, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [b] [line 3]
|
||||
!17 = metadata !{i32 786689, metadata !4, metadata !"c", metadata !5, i32 50331651, metadata !8, i32 0, i32 0} ; [ DW_TAG_arg_variable ] [c] [line 3]
|
||||
!18 = metadata !{i32 2, metadata !"Dwarf Version", i32 4}
|
||||
!19 = metadata !{metadata !"clang version 3.4 (trunk 194134) (llvm/trunk 194126)"}
|
||||
!20 = metadata !{i32 3, i32 0, metadata !4, null}
|
||||
!21 = metadata !{i32 5, i32 0, metadata !4, null}
|
||||
!22 = metadata !{metadata !23, metadata !23, i64 0}
|
||||
!23 = metadata !{metadata !"omnipotent char", metadata !24, i64 0}
|
||||
!24 = metadata !{metadata !"Simple C/C++ TBAA"}
|
||||
!25 = metadata !{i32 6, i32 0, metadata !4, null}
|
25
test/Transforms/Scalarizer/no-data-layout.ll
Normal file
25
test/Transforms/Scalarizer/no-data-layout.ll
Normal file
@ -0,0 +1,25 @@
|
||||
; RUN: opt %s -scalarizer -scalarize-load-store -S | FileCheck %s
|
||||
|
||||
; Test the handling of loads and stores when no data layout is available.
|
||||
define void @f1(<4 x float> *%dest, <4 x float> *%src) {
|
||||
; CHECK: @f1(
|
||||
; CHECK: %val = load <4 x float>* %src, align 4
|
||||
; CHECK: %val.i0 = extractelement <4 x float> %val, i32 0
|
||||
; CHECK: %add.i0 = fadd float %val.i0, %val.i0
|
||||
; CHECK: %val.i1 = extractelement <4 x float> %val, i32 1
|
||||
; CHECK: %add.i1 = fadd float %val.i1, %val.i1
|
||||
; CHECK: %val.i2 = extractelement <4 x float> %val, i32 2
|
||||
; CHECK: %add.i2 = fadd float %val.i2, %val.i2
|
||||
; CHECK: %val.i3 = extractelement <4 x float> %val, i32 3
|
||||
; CHECK: %add.i3 = fadd float %val.i3, %val.i3
|
||||
; CHECK: %add.upto0 = insertelement <4 x float> undef, float %add.i0, i32 0
|
||||
; CHECK: %add.upto1 = insertelement <4 x float> %add.upto0, float %add.i1, i32 1
|
||||
; CHECK: %add.upto2 = insertelement <4 x float> %add.upto1, float %add.i2, i32 2
|
||||
; CHECK: %add = insertelement <4 x float> %add.upto2, float %add.i3, i32 3
|
||||
; CHECK: store <4 x float> %add, <4 x float>* %dest, align 8
|
||||
; CHECK: ret void
|
||||
%val = load <4 x float> *%src, align 4
|
||||
%add = fadd <4 x float> %val, %val
|
||||
store <4 x float> %add, <4 x float> *%dest, align 8
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user