[LoopVectorize] Induction variables: support arbitrary constant step.

Previously, only -1 and +1 step values are supported for induction variables. This patch extends LV to support arbitrary constant steps. Initial patch by Alexey Volkov. Some bug fixes are added in the following version. Differential Revision: http://reviews.llvm.org/D6051 and http://reviews.llvm.org/D7193 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227557 91177308-0d34-0410-b5e6-96231b3b80d8
2025-06-23 01:24:30 +00:00 · 2015-01-30 05:02:21 +00:00
parent 7d3a44a692
commit e7769db118
4 changed files with 282 additions and 137 deletions
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -355,10 +355,9 @@ protected:
  /// element.
  virtual Value *getBroadcastInstrs(Value *V);
-  /// This function adds 0, 1, 2 ... to each vector element, starting at zero.
+  /// This function adds (StartIdx, StartIdx + Step, StartIdx + 2*Step, ...)
-  /// If Negate is set then negative numbers are added e.g. (0, -1, -2, ...).
+  /// to each vector element of Val. The sequence starts at StartIndex.
-  /// The sequence starts at StartIndex.
+  virtual Value *getStepVector(Value *Val, int StartIdx, Value *Step);
  virtual Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate);
  /// When we go over instructions in the basic block we rely on previous
  /// values within the current basic block or on loop invariant values.
@ -479,7 +478,7 @@ private:
                            bool IfPredicateStore = false) override;
  void vectorizeMemoryInstruction(Instruction *Instr) override;
  Value *getBroadcastInstrs(Value *V) override;
-  Value *getConsecutiveVector(Value* Val, int StartIdx, bool Negate) override;
+  Value *getStepVector(Value *Val, int StartIdx, Value *Step) override;
  Value *reverseVector(Value *Vec) override;
 };
@ -603,11 +602,9 @@ public:
  /// This enum represents the kinds of inductions that we support.
  enum InductionKind {
-    IK_NoInduction,         ///< Not an induction variable.
+    IK_NoInduction,  ///< Not an induction variable.
-    IK_IntInduction,        ///< Integer induction variable. Step = 1.
+    IK_IntInduction, ///< Integer induction variable. Step = C.
-    IK_ReverseIntInduction, ///< Reverse int induction variable. Step = -1.
+    IK_PtrInduction  ///< Pointer induction var. Step = C / sizeof(elem).
    IK_PtrInduction,        ///< Pointer induction var. Step = sizeof(elem).
    IK_ReversePtrInduction  ///< Reverse ptr indvar. Step = - sizeof(elem).
  };
  // This enum represents the kind of minmax reduction.
@ -697,12 +694,67 @@ public:
  /// A struct for saving information about induction variables.
  struct InductionInfo {
-    InductionInfo(Value *Start, InductionKind K) : StartValue(Start), IK(K) {}
+    InductionInfo(Value *Start, InductionKind K, ConstantInt *Step)
-    InductionInfo() : StartValue(nullptr), IK(IK_NoInduction) {}
+        : StartValue(Start), IK(K), StepValue(Step) {
      assert(IK != IK_NoInduction && "Not an induction");
      assert(StartValue && "StartValue is null");
      assert(StepValue && !StepValue->isZero() && "StepValue is zero");
      assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
             "StartValue is not a pointer for pointer induction");
      assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
             "StartValue is not an integer for integer induction");
      assert(StepValue->getType()->isIntegerTy() &&
             "StepValue is not an integer");
    }
    InductionInfo()
        : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
    /// Get the consecutive direction. Returns:
    ///   0 - unknown or non-consecutive.
    ///   1 - consecutive and increasing.
    ///  -1 - consecutive and decreasing.
    int getConsecutiveDirection() const {
      if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
        return StepValue->getSExtValue();
      return 0;
    }
    /// Compute the transformed value of Index at offset StartValue using step
    /// StepValue.
    /// For integer induction, returns StartValue + Index * StepValue.
    /// For pointer induction, returns StartValue[Index * StepValue].
    /// FIXME: The newly created binary instructions should contain nsw/nuw
    /// flags, which can be found from the original scalar operations.
    Value *transform(IRBuilder<> &B, Value *Index) const {
      switch (IK) {
      case IK_IntInduction:
        assert(Index->getType() == StartValue->getType() &&
               "Index type does not match StartValue type");
        if (StepValue->isMinusOne())
          return B.CreateSub(StartValue, Index);
        if (!StepValue->isOne())
          Index = B.CreateMul(Index, StepValue);
        return B.CreateAdd(StartValue, Index);
      case IK_PtrInduction:
        if (StepValue->isMinusOne())
          Index = B.CreateNeg(Index);
        else if (!StepValue->isOne())
          Index = B.CreateMul(Index, StepValue);
        return B.CreateGEP(StartValue, Index);
      case IK_NoInduction:
      default:
        return nullptr;
      }
    }
    /// Start value.
    TrackingVH<Value> StartValue;
    /// Induction kind.
    InductionKind IK;
    /// Step value.
    ConstantInt *StepValue;
  };
  /// ReductionList contains the reduction descriptors for all
@ -822,9 +874,9 @@ private:
  /// pattern corresponding to a min(X, Y) or max(X, Y).
  static ReductionInstDesc isMinMaxSelectCmpPattern(Instruction *I,
                                                    ReductionInstDesc &Prev);
-  /// Returns the induction kind of Phi. This function may return NoInduction
+  /// Returns the induction kind of Phi and record the step. This function may
-  /// if the PHI is not an induction variable.
+  /// return NoInduction if the PHI is not an induction variable.
-  InductionKind isInductionVariable(PHINode *Phi);
+  InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue);
  /// \brief Collect memory access with loop invariant strides.
  ///
@ -1592,11 +1644,13 @@ Value *InnerLoopVectorizer::getBroadcastInstrs(Value *V) {
  return Shuf;
 }
-Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
+Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx,
-                                                 bool Negate) {
+                                          Value *Step) {
  assert(Val->getType()->isVectorTy() && "Must be a vector");
  assert(Val->getType()->getScalarType()->isIntegerTy() &&
         "Elem must be an integer");
  assert(Step->getType() == Val->getType()->getScalarType() &&
         "Step has wrong type");
  // Create the types.
  Type *ITy = Val->getType()->getScalarType();
  VectorType *Ty = cast<VectorType>(Val->getType());
@ -1604,15 +1658,18 @@ Value *InnerLoopVectorizer::getConsecutiveVector(Value* Val, int StartIdx,
  SmallVector<Constant*, 8> Indices;
  // Create a vector of consecutive numbers from zero to VF.
-  for (int i = 0; i < VLen; ++i) {
+  for (int i = 0; i < VLen; ++i)
-    int64_t Idx = Negate ? (-i) : i;
+    Indices.push_back(ConstantInt::get(ITy, StartIdx + i));
    Indices.push_back(ConstantInt::get(ITy, StartIdx + Idx, Negate));
  }
  // Add the consecutive indices to the vector value.
  Constant *Cv = ConstantVector::get(Indices);
  assert(Cv->getType() == Val->getType() && "Invalid consecutive vec");
-  return Builder.CreateAdd(Val, Cv, "induction");
+  Step = Builder.CreateVectorSplat(VLen, Step);
  assert(Step->getType() == Val->getType() && "Invalid step vec");
  // FIXME: The newly created binary instructions should contain nsw/nuw flags,
  // which can be found from the original scalar operations.
  Step = Builder.CreateMul(Cv, Step);
  return Builder.CreateAdd(Val, Step, "induction");
 }
 /// \brief Find the operand of the GEP that should be checked for consecutive
@ -1650,10 +1707,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
  PHINode *Phi = dyn_cast_or_null<PHINode>(Ptr);
  if (Phi && Inductions.count(Phi)) {
    InductionInfo II = Inductions[Phi];
-    if (IK_PtrInduction == II.IK)
+    return II.getConsecutiveDirection();
      return 1;
    else if (IK_ReversePtrInduction == II.IK)
      return -1;
  }
  GetElementPtrInst *Gep = dyn_cast_or_null<GetElementPtrInst>(Ptr);
@ -1678,10 +1732,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
        return 0;
    InductionInfo II = Inductions[Phi];
-    if (IK_PtrInduction == II.IK)
+    return II.getConsecutiveDirection();
      return 1;
    else if (IK_ReversePtrInduction == II.IK)
      return -1;
  }
  unsigned InductionOperand = getGEPInductionOperand(DL, Gep);
@ -2496,33 +2547,13 @@ void InnerLoopVectorizer::createEmptyLoop() {
      Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
                                                   II.StartValue->getType(),
                                                   "cast.crd");
-      EndValue = BypassBuilder.CreateAdd(CRD, II.StartValue , "ind.end");
+      EndValue = II.transform(BypassBuilder, CRD);
-      break;
+      EndValue->setName("ind.end");
    }
    case LoopVectorizationLegality::IK_ReverseIntInduction: {
      // Convert the CountRoundDown variable to the PHI size.
      Value *CRD = BypassBuilder.CreateSExtOrTrunc(CountRoundDown,
                                                   II.StartValue->getType(),
                                                   "cast.crd");
      // Handle reverse integer induction counter.
      EndValue = BypassBuilder.CreateSub(II.StartValue, CRD, "rev.ind.end");
      break;
    }
    case LoopVectorizationLegality::IK_PtrInduction: {
-      // For pointer induction variables, calculate the offset using
+      EndValue = II.transform(BypassBuilder, CountRoundDown);
-      // the end index.
+      EndValue->setName("ptr.ind.end");
      EndValue = BypassBuilder.CreateGEP(II.StartValue, CountRoundDown,
                                         "ptr.ind.end");
      break;
    }
    case LoopVectorizationLegality::IK_ReversePtrInduction: {
      // The value at the end of the loop for the reverse pointer is calculated
      // by creating a GEP with a negative index starting from the start value.
      Value *Zero = ConstantInt::get(CountRoundDown->getType(), 0);
      Value *NegIdx = BypassBuilder.CreateSub(Zero, CountRoundDown,
                                              "rev.ind.end");
      EndValue = BypassBuilder.CreateGEP(II.StartValue, NegIdx,
                                         "rev.ptr.ind.end");
      break;
    }
    }// end of case
@ -3137,6 +3168,8 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
  LoopVectorizationLegality::InductionInfo II =
  Legal->getInductionVars()->lookup(P);
  // FIXME: The newly created binary instructions should contain nsw/nuw flags,
  // which can be found from the original scalar operations.
  switch (II.IK) {
    case LoopVectorizationLegality::IK_NoInduction:
      llvm_unreachable("Unknown induction");
@ -3154,80 +3187,42 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN,
        Value *NormalizedIdx = Builder.CreateSub(Induction, ExtendedIdx,
                                                 "normalized.idx");
        NormalizedIdx = Builder.CreateSExtOrTrunc(NormalizedIdx, PhiTy);
-        Broadcasted = Builder.CreateAdd(II.StartValue, NormalizedIdx,
+        Broadcasted = II.transform(Builder, NormalizedIdx);
-                                        "offset.idx");
+        Broadcasted->setName("offset.idx");
      }
      Broadcasted = getBroadcastInstrs(Broadcasted);
      // After broadcasting the induction variable we need to make the vector
      // consecutive by adding 0, 1, 2, etc.
      for (unsigned part = 0; part < UF; ++part)
-        Entry[part] = getConsecutiveVector(Broadcasted, VF * part, false);
+        Entry[part] = getStepVector(Broadcasted, VF * part, II.StepValue);
      return;
    }
    case LoopVectorizationLegality::IK_ReverseIntInduction:
    case LoopVectorizationLegality::IK_PtrInduction:
    case LoopVectorizationLegality::IK_ReversePtrInduction:
      // Handle reverse integer and pointer inductions.
      Value *StartIdx = ExtendedIdx;
      // This is the normalized GEP that starts counting at zero.
      Value *NormalizedIdx = Builder.CreateSub(Induction, StartIdx,
                                               "normalized.idx");
      // Handle the reverse integer induction variable case.
      if (LoopVectorizationLegality::IK_ReverseIntInduction == II.IK) {
        IntegerType *DstTy = cast<IntegerType>(II.StartValue->getType());
        Value *CNI = Builder.CreateSExtOrTrunc(NormalizedIdx, DstTy,
                                               "resize.norm.idx");
        Value *ReverseInd  = Builder.CreateSub(II.StartValue, CNI,
                                               "reverse.idx");
        // This is a new value so do not hoist it out.
        Value *Broadcasted = getBroadcastInstrs(ReverseInd);
        // After broadcasting the induction variable we need to make the
        // vector consecutive by adding  ... -3, -2, -1, 0.
        for (unsigned part = 0; part < UF; ++part)
          Entry[part] = getConsecutiveVector(Broadcasted, -(int)VF * part,
                                             true);
        return;
      }
      // Handle the pointer induction variable case.
      assert(P->getType()->isPointerTy() && "Unexpected type.");
-
+      // This is the normalized GEP that starts counting at zero.
-      // Is this a reverse induction ptr or a consecutive induction ptr.
+      Value *NormalizedIdx =
-      bool Reverse = (LoopVectorizationLegality::IK_ReversePtrInduction ==
+          Builder.CreateSub(Induction, ExtendedIdx, "normalized.idx");
                      II.IK);
      // This is the vector of results. Notice that we don't generate
      // vector geps because scalar geps result in better code.
      for (unsigned part = 0; part < UF; ++part) {
        if (VF == 1) {
-          int EltIndex = (part) * (Reverse ? -1 : 1);
+          int EltIndex = part;
          Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
-          Value *GlobalIdx;
+          Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
-          if (Reverse)
+          Value *SclrGep = II.transform(Builder, GlobalIdx);
-            GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
+          SclrGep->setName("next.gep");
          else
            GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
          Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
                                             "next.gep");
          Entry[part] = SclrGep;
          continue;
        }
        Value *VecVal = UndefValue::get(VectorType::get(P->getType(), VF));
        for (unsigned int i = 0; i < VF; ++i) {
-          int EltIndex = (i + part * VF) * (Reverse ? -1 : 1);
+          int EltIndex = i + part * VF;
          Constant *Idx = ConstantInt::get(Induction->getType(), EltIndex);
-          Value *GlobalIdx;
+          Value *GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx);
-          if (!Reverse)
+          Value *SclrGep = II.transform(Builder, GlobalIdx);
-            GlobalIdx = Builder.CreateAdd(NormalizedIdx, Idx, "gep.idx");
+          SclrGep->setName("next.gep");
          else
            GlobalIdx = Builder.CreateSub(Idx, NormalizedIdx, "gep.ridx");
          Value *SclrGep = Builder.CreateGEP(II.StartValue, GlobalIdx,
                                             "next.gep");
          VecVal = Builder.CreateInsertElement(VecVal, SclrGep,
                                               Builder.getInt32(i),
                                               "insert.gep");
@ -3247,7 +3242,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
      // Nothing to do for PHIs and BR, since we already took care of the
      // loop control flow instructions.
      continue;
-    case Instruction::PHI:{
+    case Instruction::PHI: {
      // Vectorize PHINodes.
      widenPHIInstruction(it, Entry, UF, VF, PV);
      continue;
@ -3368,8 +3363,12 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) {
        Value *ScalarCast = Builder.CreateCast(CI->getOpcode(), Induction,
                                               CI->getType());
        Value *Broadcasted = getBroadcastInstrs(ScalarCast);
        LoopVectorizationLegality::InductionInfo II =
            Legal->getInductionVars()->lookup(OldInduction);
        Constant *Step =
            ConstantInt::getSigned(CI->getType(), II.StepValue->getSExtValue());
        for (unsigned Part = 0; Part < UF; ++Part)
-          Entry[Part] = getConsecutiveVector(Broadcasted, VF * Part, false);
+          Entry[Part] = getStepVector(Broadcasted, VF * Part, Step);
        propagateMetadata(Entry, it);
        break;
      }
@ -3716,8 +3715,9 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
        // This is the value coming from the preheader.
        Value *StartValue = Phi->getIncomingValueForBlock(PreHeader);
        ConstantInt *StepValue = nullptr;
        // Check if this is an induction variable.
-        InductionKind IK = isInductionVariable(Phi);
+        InductionKind IK = isInductionVariable(Phi, StepValue);
        if (IK_NoInduction != IK) {
          // Get the widest type.
@ -3727,7 +3727,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
            WidestIndTy = getWiderType(*DL, PhiTy, WidestIndTy);
          // Int inductions are special because we only allow one IV.
-          if (IK == IK_IntInduction) {
+          if (IK == IK_IntInduction && StepValue->isOne()) {
            // Use the phi node with the widest type as induction. Use the last
            // one if there are multiple (no good reason for doing this other
            // than it is expedient).
@ -3736,7 +3736,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
          }
          DEBUG(dbgs() << "LV: Found an induction variable.\n");
-          Inductions[Phi] = InductionInfo(StartValue, IK);
+          Inductions[Phi] = InductionInfo(StartValue, IK, StepValue);
          // Until we explicitly handle the case of an induction variable with
          // an outside loop user we have to give up vectorizing this loop.
@ -5287,7 +5287,8 @@ LoopVectorizationLegality::isReductionInstr(Instruction *I,
 }
 LoopVectorizationLegality::InductionKind
-LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
+LoopVectorizationLegality::isInductionVariable(PHINode *Phi,
                                               ConstantInt *&StepValue) {
  Type *PhiTy = Phi->getType();
  // We only handle integer and pointer inductions variables.
  if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
@ -5300,22 +5301,19 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
    DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
    return IK_NoInduction;
  }
  const SCEV *Step = AR->getStepRecurrence(*SE);
  // Integer inductions need to have a stride of one.
  if (PhiTy->isIntegerTy()) {
    if (Step->isOne())
      return IK_IntInduction;
    if (Step->isAllOnesValue())
      return IK_ReverseIntInduction;
    return IK_NoInduction;
  }
  // Calculate the pointer stride and check if it is consecutive.
  const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
  if (!C)
    return IK_NoInduction;
  ConstantInt *CV = C->getValue();
  if (PhiTy->isIntegerTy()) {
    StepValue = CV;
    return IK_IntInduction;
  }
  assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
  Type *PointerElementType = PhiTy->getPointerElementType();
  // The pointer stride cannot be determined if the pointer element type is not
@ -5323,13 +5321,12 @@ LoopVectorizationLegality::isInductionVariable(PHINode *Phi) {
  if (!PointerElementType->isSized())
    return IK_NoInduction;
-  uint64_t Size = DL->getTypeAllocSize(PointerElementType);
+  int64_t Size = static_cast<int64_t>(DL->getTypeAllocSize(PointerElementType));
-  if (C->getValue()->equalsInt(Size))
+  int64_t CVSize = CV->getSExtValue();
-    return IK_PtrInduction;
+  if (CVSize % Size)
-  else if (C->getValue()->equalsInt(0 - Size))
+    return IK_NoInduction;
-    return IK_ReversePtrInduction;
+  StepValue = ConstantInt::getSigned(CV->getType(), CVSize / Size);
-
+  return IK_PtrInduction;
  return IK_NoInduction;
 }
 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
@ -6311,11 +6308,10 @@ Value *InnerLoopUnroller::getBroadcastInstrs(Value *V) {
  return V;
 }
-Value *InnerLoopUnroller::getConsecutiveVector(Value* Val, int StartIdx,
+Value *InnerLoopUnroller::getStepVector(Value *Val, int StartIdx, Value *Step) {
                                               bool Negate) {
  // When unrolling and the VF is 1, we only need to add a simple scalar.
  Type *ITy = Val->getType();
  assert(!ITy->isVectorTy() && "Val must be a scalar");
-  Constant *C = ConstantInt::get(ITy, StartIdx, Negate);
+  Constant *C = ConstantInt::get(ITy, StartIdx);
-  return Builder.CreateAdd(Val, C, "induction");
+  return Builder.CreateAdd(Val, Builder.CreateMul(C, Step), "induction");
 }
--- a/test/Transforms/LoopVectorize/arbitrary-induction-step.ll
+++ b/test/Transforms/LoopVectorize/arbitrary-induction-step.ll
@ -0,0 +1,150 @@
 ; RUN: opt -S < %s -loop-vectorize 2>&1 | FileCheck %s
 ; RUN: opt -S < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 | FileCheck %s --check-prefix=FORCE-VEC
 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
 target triple = "aarch64--linux-gnueabi"
 ; Test integer induction variable of step 2:
 ;   for (int i = 0; i < 1024; i+=2) {
 ;     int tmp = *A++;
 ;     sum += i * tmp;
 ;   }
 ; CHECK-LABEL: @ind_plus2(
 ; CHECK: load <4 x i32>*
 ; CHECK: load <4 x i32>*
 ; CHECK: mul nsw <4 x i32>
 ; CHECK: mul nsw <4 x i32>
 ; CHECK: add nsw <4 x i32>
 ; CHECK: add nsw <4 x i32>
 ; CHECK: %index.next = add i64 %index, 8
 ; CHECK: icmp eq i64 %index.next, 512
 ; FORCE-VEC-LABEL: @ind_plus2(
 ; FORCE-VEC: %wide.load = load <2 x i32>*
 ; FORCE-VEC: mul nsw <2 x i32>
 ; FORCE-VEC: add nsw <2 x i32>
 ; FORCE-VEC: %index.next = add i64 %index, 2
 ; FORCE-VEC: icmp eq i64 %index.next, 512
 define i32 @ind_plus2(i32* %A) {
 entry:
  br label %for.body
 for.body:                                         ; preds = %entry, %for.body
  %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ]
  %i = phi i32 [ 0, %entry ], [ %add1, %for.body ]
  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
  %inc.ptr = getelementptr inbounds i32* %A.addr, i64 1
  %0 = load i32* %A.addr, align 4
  %mul = mul nsw i32 %0, %i
  %add = add nsw i32 %mul, %sum
  %add1 = add nsw i32 %i, 2
  %cmp = icmp slt i32 %add1, 1024
  br i1 %cmp, label %for.body, label %for.end
 for.end:                                          ; preds = %for.body
  %add.lcssa = phi i32 [ %add, %for.body ]
  ret i32 %add.lcssa
 }
 ; Test integer induction variable of step -2:
 ;   for (int i = 1024; i > 0; i-=2) {
 ;     int tmp = *A++;
 ;     sum += i * tmp;
 ;   }
 ; CHECK-LABEL: @ind_minus2(
 ; CHECK: load <4 x i32>*
 ; CHECK: load <4 x i32>*
 ; CHECK: mul nsw <4 x i32>
 ; CHECK: mul nsw <4 x i32>
 ; CHECK: add nsw <4 x i32>
 ; CHECK: add nsw <4 x i32>
 ; CHECK: %index.next = add i64 %index, 8
 ; CHECK: icmp eq i64 %index.next, 512
 ; FORCE-VEC-LABEL: @ind_minus2(
 ; FORCE-VEC: %wide.load = load <2 x i32>*
 ; FORCE-VEC: mul nsw <2 x i32>
 ; FORCE-VEC: add nsw <2 x i32>
 ; FORCE-VEC: %index.next = add i64 %index, 2
 ; FORCE-VEC: icmp eq i64 %index.next, 512
 define i32 @ind_minus2(i32* %A) {
 entry:
  br label %for.body
 for.body:                                         ; preds = %entry, %for.body
  %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr, %for.body ]
  %i = phi i32 [ 1024, %entry ], [ %sub, %for.body ]
  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
  %inc.ptr = getelementptr inbounds i32* %A.addr, i64 1
  %0 = load i32* %A.addr, align 4
  %mul = mul nsw i32 %0, %i
  %add = add nsw i32 %mul, %sum
  %sub = add nsw i32 %i, -2
  %cmp = icmp sgt i32 %i, 2
  br i1 %cmp, label %for.body, label %for.end
 for.end:                                          ; preds = %for.body
  %add.lcssa = phi i32 [ %add, %for.body ]
  ret i32 %add.lcssa
 }
 ; Test pointer induction variable of step 2. As currently we don't support
 ; masked load/store, vectorization is possible but not beneficial. If loop
 ; vectorization is not enforced, LV will only do interleave.
 ;   for (int i = 0; i < 1024; i++) {
 ;     int tmp0 = *A++;
 ;     int tmp1 = *A++;
 ;     sum += tmp0 * tmp1;
 ;   }
 ; CHECK-LABEL: @ptr_ind_plus2(
 ; CHECK: load i32*
 ; CHECK: load i32*
 ; CHECK: load i32*
 ; CHECK: load i32*
 ; CHECK: mul nsw i32
 ; CHECK: mul nsw i32
 ; CHECK: add nsw i32
 ; CHECK: add nsw i32
 ; CHECK: %index.next = add i64 %index, 2
 ; CHECK: %21 = icmp eq i64 %index.next, 1024
 ; FORCE-VEC-LABEL: @ptr_ind_plus2(
 ; FORCE-VEC: load i32*
 ; FORCE-VEC: insertelement <2 x i32>
 ; FORCE-VEC: load i32*
 ; FORCE-VEC: insertelement <2 x i32>
 ; FORCE-VEC: load i32*
 ; FORCE-VEC: insertelement <2 x i32>
 ; FORCE-VEC: load i32*
 ; FORCE-VEC: insertelement <2 x i32>
 ; FORCE-VEC: mul nsw <2 x i32>
 ; FORCE-VEC: add nsw <2 x i32>
 ; FORCE-VEC: %index.next = add i64 %index, 2
 ; FORCE-VEC: icmp eq i64 %index.next, 1024
 define i32 @ptr_ind_plus2(i32* %A) {
 entry:
  br label %for.body
 for.body:                                         ; preds = %for.body, %entry
  %A.addr = phi i32* [ %A, %entry ], [ %inc.ptr1, %for.body ]
  %sum = phi i32 [ 0, %entry ], [ %add, %for.body ]
  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %inc.ptr = getelementptr inbounds i32* %A.addr, i64 1
  %0 = load i32* %A.addr, align 4
  %inc.ptr1 = getelementptr inbounds i32* %A.addr, i64 2
  %1 = load i32* %inc.ptr, align 4
  %mul = mul nsw i32 %1, %0
  %add = add nsw i32 %mul, %sum
  %inc = add nsw i32 %i, 1
  %exitcond = icmp eq i32 %inc, 1024
  br i1 %exitcond, label %for.end, label %for.body
 for.end:                                          ; preds = %for.body
  %add.lcssa = phi i32 [ %add, %for.body ]
  ret i32 %add.lcssa
 }
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@ -388,9 +388,8 @@ define void @example12() nounwind uwtable ssp {
  ret void
 }
 ; Can't vectorize because of reductions.
 ;CHECK-LABEL: @example13(
-;CHECK-NOT: <4 x i32>
+;CHECK: <4 x i32>
 ;CHECK: ret void
 define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp {
  br label %.preheader
--- a/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/test/Transforms/LoopVectorize/reverse_induction.ll
@ -97,7 +97,7 @@ loopend:
 ; CHECK: vector.body
 ; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK: %normalized.idx = sub i64 %index, 0
-; CHECK: %reverse.idx = sub i64 1023, %normalized.idx
+; CHECK: %offset.idx = sub i64 1023, %normalized.idx
 ; CHECK: trunc i64 %index to i8
 define void @reverse_forward_induction_i64_i8() {
@ -124,7 +124,7 @@ while.end:
 ; CHECK: vector.body:
 ; CHECK:  %index = phi i64 [ 129, %vector.ph ], [ %index.next, %vector.body ]
 ; CHECK:  %normalized.idx = sub i64 %index, 129
-; CHECK:  %reverse.idx = sub i64 1023, %normalized.idx
+; CHECK:  %offset.idx = sub i64 1023, %normalized.idx
 ; CHECK:  trunc i64 %index to i8
 define void @reverse_forward_induction_i64_i8_signed() {