llvm-6502/test/Transforms/LoopVectorize/induction.ll

; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s

target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"

; Make sure that we can handle multiple integer induction variables.
; CHECK-LABEL: @multi_int_induction(
; CHECK: vector.body:
; CHECK:  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
; CHECK:  %normalized.idx = sub i64 %index, 0
; CHECK:  %[[VAR:.*]] = trunc i64 %normalized.idx to i32
; CHECK:  %offset.idx = add i32 190, %[[VAR]]
define void @multi_int_induction(i32* %A, i32 %N) {
for.body.lr.ph:
  br label %for.body

for.body:
  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
  %count.09 = phi i32 [ 190, %for.body.lr.ph ], [ %inc, %for.body ]
  %arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv
  store i32 %count.09, i32* %arrayidx2, align 4
  %inc = add nsw i32 %count.09, 1
  %indvars.iv.next = add i64 %indvars.iv, 1
  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  %exitcond = icmp ne i32 %lftr.wideiv, %N
  br i1 %exitcond, label %for.body, label %for.end

for.end:
  ret void
}

; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND

; Make sure we remove unneeded vectorization of induction variables.
; In order for instcombine to cleanup the vectorized induction variables that we
; create in the loop vectorizer we need to perform some form of redundancy
; elimination to get rid of multiple uses.

; IND-LABEL: scalar_use

; IND:     br label %vector.body
; IND:     vector.body:
;   Vectorized induction variable.
; IND-NOT:  insertelement <2 x i64>
; IND-NOT:  shufflevector <2 x i64>
; IND:     br {{.*}}, label %vector.body

define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
entry:
  br label %for.body

for.body:
  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
  %ind.sum = add i64 %iv, %offset
  %arr.idx = getelementptr inbounds float* %a, i64 %ind.sum
  %l1 = load float* %arr.idx, align 4
  %ind.sum2 = add i64 %iv, %offset2
  %arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2
  %l2 = load float* %arr.idx2, align 4
  %m = fmul fast float %b, %l2
  %ad = fadd fast float %l1, %m
  store float %ad, float* %arr.idx, align 4
  %iv.next = add nuw nsw i64 %iv, 1
  %exitcond = icmp eq i64 %iv.next, %n
  br i1 %exitcond, label %loopexit, label %for.body

loopexit:
  ret void
}


; Make sure that the loop exit count computation does not overflow for i8 and
; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the
; induction variable to a bigger type the exit count computation will overflow
; to 0.
; PR17532

; CHECK-LABEL: i8_loop
; CHECK: icmp eq i32 {{.*}}, 256
define i32 @i8_loop() nounwind readnone ssp uwtable {
  br label %1

; <label>:1                                       ; preds = %1, %0
  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
  %b.0 = phi i8 [ 0, %0 ], [ %3, %1 ]
  %2 = and i32 %a.0, 4
  %3 = add i8 %b.0, -1
  %4 = icmp eq i8 %3, 0
  br i1 %4, label %5, label %1

; <label>:5                                       ; preds = %1
  ret i32 %2
}

; CHECK-LABEL: i16_loop
; CHECK: icmp eq i32 {{.*}}, 65536

define i32 @i16_loop() nounwind readnone ssp uwtable {
  br label %1

; <label>:1                                       ; preds = %1, %0
  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
  %b.0 = phi i16 [ 0, %0 ], [ %3, %1 ]
  %2 = and i32 %a.0, 4
  %3 = add i16 %b.0, -1
  %4 = icmp eq i16 %3, 0
  br i1 %4, label %5, label %1

; <label>:5                                       ; preds = %1
  ret i32 %2
}

; This loop has a backedge taken count of i32_max. We need to check for this
; condition and branch directly to the scalar loop.

; CHECK-LABEL: max_i32_backedgetaken
; CHECK:  %backedge.overflow = icmp eq i32 -1, -1
; CHECK:  br i1 %backedge.overflow, label %scalar.ph, label %overflow.checked

; CHECK: scalar.ph:
; CHECK:  %bc.resume.val = phi i32 [ %resume.val, %middle.block ], [ 0, %0 ]
; CHECK:  %bc.merge.rdx = phi i32 [ 1, %0 ], [ %5, %middle.block ]

define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {

  br label %1

; <label>:1                                       ; preds = %1, %0
  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
  %b.0 = phi i32 [ 0, %0 ], [ %3, %1 ]
  %2 = and i32 %a.0, 4
  %3 = add i32 %b.0, -1
  %4 = icmp eq i32 %3, 0
  br i1 %4, label %5, label %1

; <label>:5                                       ; preds = %1
  ret i32 %2
}

; When generating the overflow check we must sure that the induction start value
; is defined before the branch to the scalar preheader.

; CHECK-LABEL: testoverflowcheck
; CHECK: entry
; CHECK: %[[LOAD:.*]] = load i8
; CHECK: %[[VAL:.*]] =  zext i8 %[[LOAD]] to i32
; CHECK: br

; CHECK: scalar.ph
; CHECK: phi i32 [ %{{.*}}, %middle.block ], [ %[[VAL]], %entry ]

@e = global i8 1, align 1
@d = common global i32 0, align 4
@c = common global i32 0, align 4
define i32 @testoverflowcheck() {
entry:
  %.pr.i = load i8* @e, align 1
  %0 = load i32* @d, align 4
  %c.promoted.i = load i32* @c, align 4
  br label %cond.end.i

cond.end.i:
  %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ]
  %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ]
  %and.i = and i32 %0, %and3.i
  %inc.i = add i8 %inc4.i, 1
  %tobool.i = icmp eq i8 %inc.i, 0
  br i1 %tobool.i, label %loopexit, label %cond.end.i

loopexit:
  ret i32 %and.i
}
Rename getMaximumUnrollFactor -> getMaxInterleaveFactor; also rename option names controlling this variable. "Unroll" is not the appropriate name for this variable. Clang already uses the term "interleave" in pragmas and metadata for this. Differential Revision: http://reviews.llvm.org/D5066 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217528 91177308-0d34-0410-b5e6-96231b3b80d8 2014-09-10 17:58:16 +00:00			`; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S \| FileCheck %s`
LoopVectorize: Handle loops with multiple forward inductions We used to give up if we saw two integer inductions. After this patch, we base further induction variables on the chosen one like we do in the reverse induction and pointer induction case. Fixes PR15720. radar://13851975 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181746 91177308-0d34-0410-b5e6-96231b3b80d8 2013-05-14 00:21:18 +00:00
			`target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"`

			`; Make sure that we can handle multiple integer induction variables.`
Fix missing CHECK-LABELs git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191853 91177308-0d34-0410-b5e6-96231b3b80d8 2013-10-02 20:29:00 +00:00			`; CHECK-LABEL: @multi_int_induction(`
LoopVectorize: Handle loops with multiple forward inductions We used to give up if we saw two integer inductions. After this patch, we base further induction variables on the chosen one like we do in the reverse induction and pointer induction case. Fixes PR15720. radar://13851975 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@181746 91177308-0d34-0410-b5e6-96231b3b80d8 2013-05-14 00:21:18 +00:00			`; CHECK: vector.body:`
			`; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]`
			`; CHECK: %normalized.idx = sub i64 %index, 0`
			`; CHECK: %[[VAR:.*]] = trunc i64 %normalized.idx to i32`
			`; CHECK: %offset.idx = add i32 190, %[[VAR]]`
			`define void @multi_int_induction(i32* %A, i32 %N) {`
			`for.body.lr.ph:`
			`br label %for.body`

			`for.body:`
			`%indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]`
			`%count.09 = phi i32 [ 190, %for.body.lr.ph ], [ %inc, %for.body ]`
			`%arrayidx2 = getelementptr inbounds i32* %A, i64 %indvars.iv`
			`store i32 %count.09, i32* %arrayidx2, align 4`
			`%inc = add nsw i32 %count.09, 1`
			`%indvars.iv.next = add i64 %indvars.iv, 1`
			`%lftr.wideiv = trunc i64 %indvars.iv.next to i32`
			`%exitcond = icmp ne i32 %lftr.wideiv, %N`
			`br i1 %exitcond, label %for.body, label %for.end`

			`for.end:`
			`ret void`
			`}`

Rename getMaximumUnrollFactor -> getMaxInterleaveFactor; also rename option names controlling this variable. "Unroll" is not the appropriate name for this variable. Clang already uses the term "interleave" in pragmas and metadata for this. Differential Revision: http://reviews.llvm.org/D5066 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217528 91177308-0d34-0410-b5e6-96231b3b80d8 2014-09-10 17:58:16 +00:00			`; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S \| FileCheck %s --check-prefix=IND`
LoopVectorizer: Perform redundancy elimination on induction variables When the loop vectorizer was part of the SCC inliner pass manager gvn would run after the loop vectorizer followed by instcombine. This way redundancy (multiple uses) were removed and instcombine could perform scalarization on the induction variables. Having moved the loop vectorizer to later we no longer run any form of redundancy elimination before we perform instcombine. This caused vectorized induction variables to survive that did not before. On a recent iMac this helps linpack back from 6000Mflops to 7000Mflops. This should also help lpbench and paq8p. I ran a Release (without Asserts) build over the test-suite and did not see any negative impact on compile time. radar://15339680 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193891 91177308-0d34-0410-b5e6-96231b3b80d8 2013-11-01 22:18:19 +00:00
			`; Make sure we remove unneeded vectorization of induction variables.`
			`; In order for instcombine to cleanup the vectorized induction variables that we`
			`; create in the loop vectorizer we need to perform some form of redundancy`
			`; elimination to get rid of multiple uses.`

			`; IND-LABEL: scalar_use`

			`; IND: br label %vector.body`
			`; IND: vector.body:`
			`; Vectorized induction variable.`
			`; IND-NOT: insertelement <2 x i64>`
			`; IND-NOT: shufflevector <2 x i64>`
			`; IND: br {{.*}}, label %vector.body`

			`define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {`
			`entry:`
			`br label %for.body`

			`for.body:`
			`%iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]`
			`%ind.sum = add i64 %iv, %offset`
			`%arr.idx = getelementptr inbounds float* %a, i64 %ind.sum`
			`%l1 = load float* %arr.idx, align 4`
			`%ind.sum2 = add i64 %iv, %offset2`
			`%arr.idx2 = getelementptr inbounds float* %a, i64 %ind.sum2`
			`%l2 = load float* %arr.idx2, align 4`
			`%m = fmul fast float %b, %l2`
			`%ad = fadd fast float %l1, %m`
			`store float %ad, float* %arr.idx, align 4`
			`%iv.next = add nuw nsw i64 %iv, 1`
			`%exitcond = icmp eq i64 %iv.next, %n`
			`br i1 %exitcond, label %loopexit, label %for.body`

			`loopexit:`
			`ret void`
			`}`
LoopVectorizer: Extend the induction variable to a larger type In some case the loop exit count computation can overflow. Extend the type to prevent most of those cases. The problem is loops like: int main () { int a = 1; char b = 0; lbl: a &= 4; b--; if (b) goto lbl; return a; } The backedge count is 255. The induction variable type is i8. If we add one to 255 to get the exit count we overflow to zero. To work around this issue we extend the type of the induction variable to i32 in the case of i8 and i16. PR17532 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195008 91177308-0d34-0410-b5e6-96231b3b80d8 2013-11-18 13:14:32 +00:00

			`; Make sure that the loop exit count computation does not overflow for i8 and`
			`; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the`
			`; induction variable to a bigger type the exit count computation will overflow`
			`; to 0.`
			`; PR17532`

			`; CHECK-LABEL: i8_loop`
Fix broken CHECK lines. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199016 91177308-0d34-0410-b5e6-96231b3b80d8 2014-01-11 21:06:00 +00:00			`; CHECK: icmp eq i32 {{.*}}, 256`
LoopVectorizer: Extend the induction variable to a larger type In some case the loop exit count computation can overflow. Extend the type to prevent most of those cases. The problem is loops like: int main () { int a = 1; char b = 0; lbl: a &= 4; b--; if (b) goto lbl; return a; } The backedge count is 255. The induction variable type is i8. If we add one to 255 to get the exit count we overflow to zero. To work around this issue we extend the type of the induction variable to i32 in the case of i8 and i16. PR17532 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195008 91177308-0d34-0410-b5e6-96231b3b80d8 2013-11-18 13:14:32 +00:00			`define i32 @i8_loop() nounwind readnone ssp uwtable {`
			`br label %1`

			`; <label>:1 ; preds = %1, %0`
			`%a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]`
			`%b.0 = phi i8 [ 0, %0 ], [ %3, %1 ]`
			`%2 = and i32 %a.0, 4`
			`%3 = add i8 %b.0, -1`
			`%4 = icmp eq i8 %3, 0`
			`br i1 %4, label %5, label %1`

			`; <label>:5 ; preds = %1`
			`ret i32 %2`
			`}`

			`; CHECK-LABEL: i16_loop`
Fix broken CHECK lines. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@199016 91177308-0d34-0410-b5e6-96231b3b80d8 2014-01-11 21:06:00 +00:00			`; CHECK: icmp eq i32 {{.*}}, 65536`
LoopVectorizer: Extend the induction variable to a larger type In some case the loop exit count computation can overflow. Extend the type to prevent most of those cases. The problem is loops like: int main () { int a = 1; char b = 0; lbl: a &= 4; b--; if (b) goto lbl; return a; } The backedge count is 255. The induction variable type is i8. If we add one to 255 to get the exit count we overflow to zero. To work around this issue we extend the type of the induction variable to i32 in the case of i8 and i16. PR17532 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@195008 91177308-0d34-0410-b5e6-96231b3b80d8 2013-11-18 13:14:32 +00:00
			`define i32 @i16_loop() nounwind readnone ssp uwtable {`
			`br label %1`

			`; <label>:1 ; preds = %1, %0`
			`%a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]`
			`%b.0 = phi i16 [ 0, %0 ], [ %3, %1 ]`
			`%2 = and i32 %a.0, 4`
			`%3 = add i16 %b.0, -1`
			`%4 = icmp eq i16 %3, 0`
			`br i1 %4, label %5, label %1`

			`; <label>:5 ; preds = %1`
			`ret i32 %2`
			`}`
LoopVectorizer: Add a check that the backedge taken count + 1 does not overflow The loop vectorizer instantiates be-taken-count + 1 as the loop iteration count. If this expression overflows the generated code was invalid. In case of overflow the code now jumps to the scalar loop. Fixes PR17288. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@209854 91177308-0d34-0410-b5e6-96231b3b80d8 2014-05-29 22:10:01 +00:00
			`; This loop has a backedge taken count of i32_max. We need to check for this`
			`; condition and branch directly to the scalar loop.`

			`; CHECK-LABEL: max_i32_backedgetaken`
			`; CHECK: %backedge.overflow = icmp eq i32 -1, -1`
			`; CHECK: br i1 %backedge.overflow, label %scalar.ph, label %overflow.checked`

			`; CHECK: scalar.ph:`
			`; CHECK: %bc.resume.val = phi i32 [ %resume.val, %middle.block ], [ 0, %0 ]`
			`; CHECK: %bc.merge.rdx = phi i32 [ 1, %0 ], [ %5, %middle.block ]`

			`define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {`

			`br label %1`

			`; <label>:1 ; preds = %1, %0`
			`%a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]`
			`%b.0 = phi i32 [ 0, %0 ], [ %3, %1 ]`
			`%2 = and i32 %a.0, 4`
			`%3 = add i32 %b.0, -1`
			`%4 = icmp eq i32 %3, 0`
			`br i1 %4, label %5, label %1`

			`; <label>:5 ; preds = %1`
			`ret i32 %2`
			`}`
LoopVectorizer: Fix a dominance issue The induction variables start value needs to be defined before we branch (overflow check) to the scalar preheader where we used it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@211460 91177308-0d34-0410-b5e6-96231b3b80d8 2014-06-22 03:38:59 +00:00
			`; When generating the overflow check we must sure that the induction start value`
			`; is defined before the branch to the scalar preheader.`

			`; CHECK-LABEL: testoverflowcheck`
			`; CHECK: entry`
			`; CHECK: %[[LOAD:.*]] = load i8`
			`; CHECK: %[[VAL:.*]] = zext i8 %[[LOAD]] to i32`
			`; CHECK: br`

			`; CHECK: scalar.ph`
			`; CHECK: phi i32 [ %{{.*}}, %middle.block ], [ %[[VAL]], %entry ]`

			`@e = global i8 1, align 1`
			`@d = common global i32 0, align 4`
			`@c = common global i32 0, align 4`
			`define i32 @testoverflowcheck() {`
			`entry:`
			`%.pr.i = load i8* @e, align 1`
			`%0 = load i32* @d, align 4`
			`%c.promoted.i = load i32* @c, align 4`
			`br label %cond.end.i`

			`cond.end.i:`
			`%inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ]`
			`%and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ]`
			`%and.i = and i32 %0, %and3.i`
			`%inc.i = add i8 %inc4.i, 1`
			`%tobool.i = icmp eq i8 %inc.i, 0`
			`br i1 %tobool.i, label %loopexit, label %cond.end.i`

			`loopexit:`
			`ret i32 %and.i`
			`}`