mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-11-04 05:17:07 +00:00 
			
		
		
		
	This commit adds the infrastructure for performing bottom-up SLP vectorization (and other optimizations) on parallel computations.
The infrastructure has three potential users:
  1. The loop vectorizer needs to be able to vectorize AOS data structures such as (sum += A[i] + A[i+1]).
  2. The BB-vectorizer needs this infrastructure for bottom-up SLP vectorization, because bottom-up vectorization is faster to compute.
  3. A loop-roller needs to be able to analyze consecutive chains and roll them into a loop, in order to reduce code size. A loop roller does not need to create vector instructions, and this infrastructure separates the chain analysis from the vectorization.
This patch also includes a simple (100 LOC) bottom up SLP vectorizer that uses the infrastructure, and can vectorize this code:
void SAXPY(int *x, int *y, int a, int i) {
  x[i]   = a * x[i]   + y[i];
  x[i+1] = a * x[i+1] + y[i+1];
  x[i+2] = a * x[i+2] + y[i+2];
  x[i+3] = a * x[i+3] + y[i+3];
}
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179117 91177308-0d34-0410-b5e6-96231b3b80d8
		
	
		
			
				
	
	
		
			440 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			440 lines
		
	
	
		
			15 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
//===- VecUtils.h --- Vectorization Utilities -----------------------------===//
 | 
						|
//
 | 
						|
//                     The LLVM Compiler Infrastructure
 | 
						|
//
 | 
						|
// This file is distributed under the University of Illinois Open Source
 | 
						|
// License. See LICENSE.TXT for details.
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
#define DEBUG_TYPE "VecUtils"
 | 
						|
 | 
						|
#include "VecUtils.h"
 | 
						|
#include "llvm/ADT/DenseMap.h"
 | 
						|
#include "llvm/ADT/SmallPtrSet.h"
 | 
						|
#include "llvm/ADT/SmallSet.h"
 | 
						|
#include "llvm/ADT/SmallVector.h"
 | 
						|
#include "llvm/Analysis/AliasAnalysis.h"
 | 
						|
#include "llvm/Analysis/ScalarEvolution.h"
 | 
						|
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 | 
						|
#include "llvm/Analysis/TargetTransformInfo.h"
 | 
						|
#include "llvm/Analysis/Verifier.h"
 | 
						|
#include "llvm/IR/Constants.h"
 | 
						|
#include "llvm/IR/DataLayout.h"
 | 
						|
#include "llvm/IR/Function.h"
 | 
						|
#include "llvm/IR/Instructions.h"
 | 
						|
#include "llvm/IR/Module.h"
 | 
						|
#include "llvm/IR/Type.h"
 | 
						|
#include "llvm/IR/Value.h"
 | 
						|
#include "llvm/Pass.h"
 | 
						|
#include "llvm/Support/CommandLine.h"
 | 
						|
#include "llvm/Support/Debug.h"
 | 
						|
#include "llvm/Support/raw_ostream.h"
 | 
						|
#include "llvm/Target/TargetLibraryInfo.h"
 | 
						|
#include "llvm/Transforms/Scalar.h"
 | 
						|
#include "llvm/Transforms/Utils/Local.h"
 | 
						|
#include <algorithm>
 | 
						|
#include <map>
 | 
						|
 | 
						|
using namespace llvm;
 | 
						|
 | 
						|
namespace llvm {
 | 
						|
 | 
						|
BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
 | 
						|
                             TargetTransformInfo *Tti, AliasAnalysis *Aa) :
 | 
						|
                             BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa) {
 | 
						|
  numberInstructions();
 | 
						|
}
 | 
						|
 | 
						|
void BoUpSLP::numberInstructions() {
 | 
						|
  int Loc = 0;
 | 
						|
  InstrIdx.clear();
 | 
						|
  InstrVec.clear();
 | 
						|
  // Number the instructions in the block.
 | 
						|
  for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
 | 
						|
    InstrIdx[it] = Loc++;
 | 
						|
    InstrVec.push_back(it);
 | 
						|
    assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
Value *BoUpSLP::getPointerOperand(Value *I) {
 | 
						|
  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
 | 
						|
  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
 | 
						|
  return 0;
 | 
						|
}
 | 
						|
 | 
						|
unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
 | 
						|
  if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
 | 
						|
  if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
 | 
						|
  return -1;
 | 
						|
}
 | 
						|
 | 
						|
bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
 | 
						|
  Value *PtrA = getPointerOperand(A);
 | 
						|
  Value *PtrB = getPointerOperand(B);
 | 
						|
  unsigned ASA = getAddressSpaceOperand(A);
 | 
						|
  unsigned ASB = getAddressSpaceOperand(B);
 | 
						|
 | 
						|
  // Check that the address spaces match and that the pointers are valid.
 | 
						|
  if (!PtrA || !PtrB || (ASA != ASB)) return false;
 | 
						|
 | 
						|
  // Check that A and B are of the same type.
 | 
						|
  if (PtrA->getType() != PtrB->getType()) return false;
 | 
						|
 | 
						|
  // Calculate the distance.
 | 
						|
  const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
 | 
						|
  const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
 | 
						|
  const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
 | 
						|
  const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
 | 
						|
 | 
						|
  // Non constant distance.
 | 
						|
  if (!ConstOffSCEV) return false;
 | 
						|
 | 
						|
  unsigned Offset = ConstOffSCEV->getValue()->getSExtValue();
 | 
						|
  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
 | 
						|
  // The Instructions are connsecutive if the size of the first load/store is
 | 
						|
  // the same as the offset.
 | 
						|
  unsigned Sz = (DL ? DL->getTypeStoreSize(Ty) : Ty->getScalarSizeInBits()/8);
 | 
						|
  return ((-Offset) == Sz);
 | 
						|
}
 | 
						|
 | 
						|
bool BoUpSLP::vectorizeStores(StoreList &Stores, int costThreshold) {
 | 
						|
  ValueSet Heads, Tails;
 | 
						|
  SmallDenseMap<Value*, Value*> ConsecutiveChain;
 | 
						|
  bool Changed = false;
 | 
						|
 | 
						|
  // Do a quadratic search on all of the given stores and find
 | 
						|
  // all of the pairs of loads that follow each other.
 | 
						|
  for (unsigned i = 0, e = Stores.size(); i < e; ++i)
 | 
						|
    for (unsigned j = 0; j < e; ++j) {
 | 
						|
      if (i == j) continue;
 | 
						|
      if (isConsecutiveAccess(Stores[i], Stores[j])) {
 | 
						|
        Tails.insert(Stores[j]);
 | 
						|
        Heads.insert(Stores[i]);
 | 
						|
        ConsecutiveChain[Stores[i]] = Stores[j];
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
  // For stores that start but don't end a link in the chain:
 | 
						|
  for (ValueSet::iterator it = Heads.begin(), e = Heads.end();it != e; ++it) {
 | 
						|
    if (Tails.count(*it)) continue;
 | 
						|
 | 
						|
    // We found a store instr that starts a chain. Now follow the chain and try
 | 
						|
    // to vectorize it.
 | 
						|
    ValueList Operands;
 | 
						|
    Value *I = *it;
 | 
						|
    int MinCost = 0, MinVF = 0;
 | 
						|
    while (Tails.count(I) || Heads.count(I)) {
 | 
						|
      Operands.push_back(I);
 | 
						|
      unsigned VF = Operands.size();
 | 
						|
      if (isPowerOf2_32(VF) && VF > 1) {
 | 
						|
        int cost = getTreeRollCost(Operands, 0);
 | 
						|
        DEBUG(dbgs() << "Found cost=" << cost << " for VF=" << VF << "\n");
 | 
						|
        if (cost < MinCost) { MinCost = cost; MinVF = VF; }
 | 
						|
      }
 | 
						|
      // Move to the next value in the chain.
 | 
						|
      I = ConsecutiveChain[I];
 | 
						|
    }
 | 
						|
 | 
						|
    if (MinCost <= costThreshold && MinVF > 1) {
 | 
						|
      DEBUG(dbgs() << "Decided to vectorize cost=" << MinCost << "\n");
 | 
						|
      vectorizeTree(Operands, MinVF);
 | 
						|
      Stores.clear();
 | 
						|
      // The current numbering is invalid because we added and removed instrs.
 | 
						|
      numberInstructions();
 | 
						|
      Changed = true;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return Changed;
 | 
						|
}
 | 
						|
 | 
						|
int BoUpSLP::getScalarizationCost(Type *Ty) {
 | 
						|
  int Cost = 0;
 | 
						|
  for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
 | 
						|
    Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
 | 
						|
  return Cost;
 | 
						|
}
 | 
						|
 | 
						|
AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
 | 
						|
  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
 | 
						|
  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
 | 
						|
  return AliasAnalysis::Location();
 | 
						|
}
 | 
						|
 | 
						|
Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
 | 
						|
  assert(Src->getParent() == Dst->getParent() && "Not the same BB");
 | 
						|
  BasicBlock::iterator I = Src, E = Dst;
 | 
						|
  /// Scan all of the instruction from SRC to DST and check if
 | 
						|
  /// the source may alias.
 | 
						|
  for (++I; I != E; ++I) {
 | 
						|
    // Ignore store instructions that are marked as 'ignore'.
 | 
						|
    if (MemBarrierIgnoreList.count(I)) continue;
 | 
						|
    if (Src->mayWriteToMemory()) /* Write */ {
 | 
						|
      if (!I->mayReadOrWriteMemory()) continue;
 | 
						|
    } else /* Read */ {
 | 
						|
      if (!I->mayWriteToMemory()) continue;
 | 
						|
    }
 | 
						|
    AliasAnalysis::Location A = getLocation(&*I);
 | 
						|
    AliasAnalysis::Location B = getLocation(Src);
 | 
						|
 | 
						|
    if (!A.Ptr || !B.Ptr || AA->alias(A, B))
 | 
						|
      return I;
 | 
						|
  }
 | 
						|
  return 0;
 | 
						|
}
 | 
						|
 | 
						|
int BoUpSLP::getTreeRollCost(ValueList &VL, unsigned Depth) {
 | 
						|
  if (Depth == 6) return max_cost;
 | 
						|
  Type *ScalarTy = VL[0]->getType();
 | 
						|
 | 
						|
  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
 | 
						|
    ScalarTy = SI->getValueOperand()->getType();
 | 
						|
 | 
						|
  /// Don't mess with vectors.
 | 
						|
  if (ScalarTy->isVectorTy()) return max_cost;
 | 
						|
 | 
						|
  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
 | 
						|
 | 
						|
  // Check if all of the operands are constants.
 | 
						|
  bool AllConst = true;
 | 
						|
  bool AllSameScalar = true;
 | 
						|
  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
 | 
						|
    AllConst &= isa<Constant>(VL[i]);
 | 
						|
    AllSameScalar &= (VL[0] == VL[i]);
 | 
						|
    // Must have a single use.
 | 
						|
    Instruction *I = dyn_cast<Instruction>(VL[i]);
 | 
						|
    // Need to scalarize instructions with multiple users or from other BBs.
 | 
						|
    if (I && ((I->getNumUses() > 1) || (I->getParent() != BB)))
 | 
						|
      return getScalarizationCost(VecTy);
 | 
						|
  }
 | 
						|
 | 
						|
  // Is this a simple vector constant.
 | 
						|
  if (AllConst) return 0;
 | 
						|
 | 
						|
  // If all of the operands are identical we can broadcast them.
 | 
						|
  if (AllSameScalar)
 | 
						|
    return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
 | 
						|
 | 
						|
  // Scalarize unknown structures.
 | 
						|
  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
 | 
						|
  if (!VL0) return getScalarizationCost(VecTy);
 | 
						|
  assert(VL0->getParent() == BB && "Wrong BB");
 | 
						|
 | 
						|
  unsigned Opcode = VL0->getOpcode();
 | 
						|
  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
 | 
						|
    Instruction *I = dyn_cast<Instruction>(VL[i]);
 | 
						|
    // If not all of the instructions are identical then we have to scalarize.
 | 
						|
    if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
 | 
						|
  }
 | 
						|
 | 
						|
  // Check if it is safe to sink the loads or the stores.
 | 
						|
  if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
 | 
						|
    int MaxIdx = InstrIdx[VL0];
 | 
						|
    for (unsigned i = 1, e = VL.size(); i < e; ++i )
 | 
						|
      MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
 | 
						|
 | 
						|
    Instruction *Last = InstrVec[MaxIdx];
 | 
						|
    for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
 | 
						|
      if (VL[i] == Last) continue;
 | 
						|
      Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
 | 
						|
      if (Barrier) {
 | 
						|
        DEBUG(dbgs() << "LR: Can't sink " << *VL[i] << "\n down to " <<
 | 
						|
              *Last << "\n because of " << *Barrier << "\n");
 | 
						|
        return max_cost;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  switch (Opcode) {
 | 
						|
  case Instruction::Add:
 | 
						|
  case Instruction::FAdd:
 | 
						|
  case Instruction::Sub:
 | 
						|
  case Instruction::FSub:
 | 
						|
  case Instruction::Mul:
 | 
						|
  case Instruction::FMul:
 | 
						|
  case Instruction::UDiv:
 | 
						|
  case Instruction::SDiv:
 | 
						|
  case Instruction::FDiv:
 | 
						|
  case Instruction::URem:
 | 
						|
  case Instruction::SRem:
 | 
						|
  case Instruction::FRem:
 | 
						|
  case Instruction::Shl:
 | 
						|
  case Instruction::LShr:
 | 
						|
  case Instruction::AShr:
 | 
						|
  case Instruction::And:
 | 
						|
  case Instruction::Or:
 | 
						|
  case Instruction::Xor: {
 | 
						|
    ValueList Operands;
 | 
						|
    int Cost = 0;
 | 
						|
    // Calculate the cost of all of the operands.
 | 
						|
    for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
 | 
						|
      // Prepare the operand vector.
 | 
						|
      for (unsigned j = 0; j < VL.size(); ++j)
 | 
						|
        Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
 | 
						|
      Cost += getTreeRollCost(Operands, Depth+1);
 | 
						|
      Operands.clear();
 | 
						|
    }
 | 
						|
 | 
						|
    // Calculate the cost of this instruction.
 | 
						|
    int ScalarCost = VecTy->getNumElements() *
 | 
						|
      TTI->getArithmeticInstrCost(Opcode, ScalarTy);
 | 
						|
    int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
 | 
						|
    Cost += (VecCost - ScalarCost);
 | 
						|
    return Cost;
 | 
						|
  }
 | 
						|
  case Instruction::Load: {
 | 
						|
    // If we are scalarize the loads, add the cost of forming the vector.
 | 
						|
    for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
 | 
						|
      if (!isConsecutiveAccess(VL[i], VL[i+1]))
 | 
						|
        return getScalarizationCost(VecTy);
 | 
						|
 | 
						|
    // Cost of wide load - cost of scalar loads.
 | 
						|
    int ScalarLdCost = VecTy->getNumElements() *
 | 
						|
      TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
 | 
						|
    int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
 | 
						|
    return VecLdCost - ScalarLdCost;
 | 
						|
  }
 | 
						|
  case Instruction::Store: {
 | 
						|
    // We know that we can merge the stores. Calculate the cost.
 | 
						|
    int ScalarStCost = VecTy->getNumElements() *
 | 
						|
      TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
 | 
						|
    int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
 | 
						|
    int StoreCost = VecStCost - ScalarStCost;
 | 
						|
 | 
						|
    ValueList Operands;
 | 
						|
    for (unsigned j = 0; j < VL.size(); ++j) {
 | 
						|
      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
 | 
						|
      MemBarrierIgnoreList.insert(VL[j]);
 | 
						|
    }
 | 
						|
 | 
						|
    int TotalCost =  StoreCost + getTreeRollCost(Operands, Depth + 1);
 | 
						|
    MemBarrierIgnoreList.clear();
 | 
						|
    return TotalCost;
 | 
						|
  }
 | 
						|
  default:
 | 
						|
    // Unable to vectorize unknown instructions.
 | 
						|
    return getScalarizationCost(VecTy);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
Instruction *BoUpSLP::GetLastInstr(ValueList &VL, unsigned VF) {
 | 
						|
  int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
 | 
						|
  for (unsigned i = 0; i < VF; ++i )
 | 
						|
    MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
 | 
						|
  return InstrVec[MaxIdx + 1];
 | 
						|
}
 | 
						|
 | 
						|
Value *BoUpSLP::Scalarize(ValueList &VL, VectorType *Ty) {
 | 
						|
  IRBuilder<> Builder(GetLastInstr(VL, Ty->getNumElements()));
 | 
						|
  Value *Vec = UndefValue::get(Ty);
 | 
						|
  for (unsigned i=0; i < Ty->getNumElements(); ++i)
 | 
						|
    Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
 | 
						|
  return Vec;
 | 
						|
}
 | 
						|
 | 
						|
Value *BoUpSLP::vectorizeTree(ValueList &VL, int VF) {
 | 
						|
  Type *ScalarTy = VL[0]->getType();
 | 
						|
  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
 | 
						|
    ScalarTy = SI->getValueOperand()->getType();
 | 
						|
  VectorType *VecTy = VectorType::get(ScalarTy, VF);
 | 
						|
 | 
						|
  // Check if all of the operands are constants or identical.
 | 
						|
  bool AllConst = true;
 | 
						|
  bool AllSameScalar = true;
 | 
						|
  for (unsigned i = 0, e = VF; i < e; ++i) {
 | 
						|
    AllConst &= !!dyn_cast<Constant>(VL[i]);
 | 
						|
    AllSameScalar &= (VL[0] == VL[i]);
 | 
						|
    // Must have a single use.
 | 
						|
    Instruction *I = dyn_cast<Instruction>(VL[i]);
 | 
						|
    if (I && (I->getNumUses() > 1 || I->getParent() != BB))
 | 
						|
      return Scalarize(VL, VecTy);
 | 
						|
  }
 | 
						|
 | 
						|
  // Is this a simple vector constant.
 | 
						|
  if (AllConst || AllSameScalar) return Scalarize(VL, VecTy);
 | 
						|
 | 
						|
  // Scalarize unknown structures.
 | 
						|
  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
 | 
						|
  if (!VL0) return Scalarize(VL, VecTy);
 | 
						|
 | 
						|
  unsigned Opcode = VL0->getOpcode();
 | 
						|
  for (unsigned i = 0, e = VF; i < e; ++i) {
 | 
						|
    Instruction *I = dyn_cast<Instruction>(VL[i]);
 | 
						|
    // If not all of the instructions are identical then we have to scalarize.
 | 
						|
    if (!I || Opcode != I->getOpcode()) return Scalarize(VL, VecTy);
 | 
						|
  }
 | 
						|
 | 
						|
  switch (Opcode) {
 | 
						|
  case Instruction::Add:
 | 
						|
  case Instruction::FAdd:
 | 
						|
  case Instruction::Sub:
 | 
						|
  case Instruction::FSub:
 | 
						|
  case Instruction::Mul:
 | 
						|
  case Instruction::FMul:
 | 
						|
  case Instruction::UDiv:
 | 
						|
  case Instruction::SDiv:
 | 
						|
  case Instruction::FDiv:
 | 
						|
  case Instruction::URem:
 | 
						|
  case Instruction::SRem:
 | 
						|
  case Instruction::FRem:
 | 
						|
  case Instruction::Shl:
 | 
						|
  case Instruction::LShr:
 | 
						|
  case Instruction::AShr:
 | 
						|
  case Instruction::And:
 | 
						|
  case Instruction::Or:
 | 
						|
  case Instruction::Xor: {
 | 
						|
    ValueList LHSVL, RHSVL;
 | 
						|
    for (int i = 0; i < VF; ++i) {
 | 
						|
      RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
 | 
						|
      LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
 | 
						|
    }
 | 
						|
 | 
						|
    Value *RHS = vectorizeTree(RHSVL, VF);
 | 
						|
    Value *LHS = vectorizeTree(LHSVL, VF);
 | 
						|
    IRBuilder<> Builder(GetLastInstr(VL, VF));
 | 
						|
    BinaryOperator *BinOp = dyn_cast<BinaryOperator>(VL0);
 | 
						|
    return Builder.CreateBinOp(BinOp->getOpcode(), RHS,LHS);
 | 
						|
  }
 | 
						|
  case Instruction::Load: {
 | 
						|
    LoadInst *LI = dyn_cast<LoadInst>(VL0);
 | 
						|
    unsigned Alignment = LI->getAlignment();
 | 
						|
 | 
						|
    // Check if all of the loads are consecutive.
 | 
						|
    for (unsigned i = 1, e = VF; i < e; ++i)
 | 
						|
      if (!isConsecutiveAccess(VL[i-1], VL[i]))
 | 
						|
        return Scalarize(VL, VecTy);
 | 
						|
 | 
						|
    IRBuilder<> Builder(GetLastInstr(VL, VF));
 | 
						|
    Value *VecPtr = Builder.CreateBitCast(LI->getPointerOperand(),
 | 
						|
                                          VecTy->getPointerTo());
 | 
						|
    LI = Builder.CreateLoad(VecPtr);
 | 
						|
    LI->setAlignment(Alignment);
 | 
						|
    return LI;
 | 
						|
  }
 | 
						|
  case Instruction::Store: {
 | 
						|
    StoreInst *SI = dyn_cast<StoreInst>(VL0);
 | 
						|
    unsigned Alignment = SI->getAlignment();
 | 
						|
 | 
						|
    ValueList ValueOp;
 | 
						|
    for (int i = 0; i < VF; ++i)
 | 
						|
      ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
 | 
						|
 | 
						|
    Value *VecValue = vectorizeTree(ValueOp, VF);
 | 
						|
 | 
						|
    IRBuilder<> Builder(GetLastInstr(VL, VF));
 | 
						|
    Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
 | 
						|
                                          VecTy->getPointerTo());
 | 
						|
    Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
 | 
						|
 | 
						|
    for (int i = 0; i < VF; ++i)
 | 
						|
      cast<Instruction>(VL[i])->eraseFromParent();
 | 
						|
    return 0;
 | 
						|
  }
 | 
						|
  default:
 | 
						|
    return Scalarize(VL, VecTy);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
} // end of namespace
 |