Revert LoopStrengthReduce.cpp to pre-r94061 for now.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94123 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dan Gohman 2010-01-22 00:46:49 +00:00
parent 3ec68f97ea
commit 7979b72feb
17 changed files with 2387 additions and 2680 deletions

View File

@ -27,7 +27,10 @@ namespace llvm {
/// and destroy it when finished to allow the release of the associated
/// memory.
class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
public:
ScalarEvolution &SE;
private:
std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> >
InsertedExpressions;
std::set<Value*> InsertedValues;

File diff suppressed because it is too large Load Diff

View File

@ -1,32 +1,7 @@
; RUN: llc < %s -march=arm | FileCheck %s
; This loop is rewritten with an indvar which counts down, which
; frees up a register from holding the trip count.
define void @test(i32* %P, i32 %A, i32 %i) nounwind {
entry:
; CHECK: str r1, [{{r.*}}, +{{r.*}}, lsl #2]
icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
br i1 %0, label %return, label %bb
bb: ; preds = %bb, %entry
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
%i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
%tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
store i32 %A, i32* %tmp2
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
br i1 %1, label %return, label %bb
return: ; preds = %bb, %entry
ret void
}
; This loop has a non-address use of the count-up indvar, so
; it'll remain. Now the original store uses a negative-stride address.
define void @test_with_forced_iv(i32* %P, i32 %A, i32 %i) nounwind {
entry:
; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
br i1 %0, label %return, label %bb
@ -36,7 +11,6 @@ bb: ; preds = %bb, %entry
%i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
%tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
store i32 %A, i32* %tmp2
store i32 %indvar, i32* null
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
br i1 %1, label %return, label %bb

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed}
; RUN: llc < %s -stats |& grep {.*Number of re-materialization}
; This test really wants to check that the resultant "cond_true" block only
; has a single store in it, and that cond_true55 only has code to materialize
; the constant and do a store. We do *not* want something like this:

View File

@ -1,4 +1,5 @@
; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | not grep "Number of re-materialization"
; RUN: llc < %s -mtriple=arm-apple-darwin
; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 3
%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
%struct.LOCBOX = type { i32, i32, i32, i32 }

View File

@ -1,29 +1,25 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s
; rdar://7387640
; This now reduces to a single induction variable.
; TODO: It still gets a GPR shuffle at the end of the loop
; This is because something in instruction selection has decided
; that comparing the pre-incremented value with zero is better
; than comparing the post-incremented value with -4.
; FIXME: We still need to rewrite array reference iv of stride -4 with loop
; count iv of stride -1.
@G = external global i32 ; <i32*> [#uses=2]
@array = external global i32* ; <i32**> [#uses=1]
define arm_apcscc void @t() nounwind optsize {
; CHECK: t:
; CHECK: mov.w r2, #1000
; CHECK: mov.w r2, #4000
; CHECK: movw r3, #1001
entry:
%.pre = load i32* @G, align 4 ; <i32> [#uses=1]
br label %bb
bb: ; preds = %bb, %entry
; CHECK: LBB1_1:
; CHECK: cmp r2, #0
; CHECK: sub.w r9, r2, #1
; CHECK: mov r2, r9
; CHECK: subs r3, #1
; CHECK: cmp r3, #0
; CHECK: sub.w r2, r2, #4
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
%tmp5 = sub i32 1000, %indvar ; <i32> [#uses=1]

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
; CHECK: t1:
; CHECK: it ne
; CHECK: cmpne
@ -20,12 +20,12 @@ cond_next:
}
; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
define i32 @t2(i32 %a, i32 %b) nounwind {
define i32 @t2(i32 %a, i32 %b) {
entry:
; CHECK: t2:
; CHECK: ite gt
; CHECK: subgt
; CHECK: ite le
; CHECK: suble
; CHECK: subgt
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
br i1 %tmp1434, label %bb17, label %bb.outer
@ -60,14 +60,14 @@ bb17: ; preds = %cond_false, %cond_true, %entry
@x = external global i32* ; <i32**> [#uses=1]
define void @foo(i32 %a) nounwind {
define void @foo(i32 %a) {
entry:
%tmp = load i32** @x ; <i32*> [#uses=1]
store i32 %a, i32* %tmp
ret void
}
define void @t3(i32 %a, i32 %b) nounwind {
define void @t3(i32 %a, i32 %b) {
entry:
; CHECK: t3:
; CHECK: it lt

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
; RUN: grep {asm-printer} | grep 34
; RUN: grep {asm-printer} | grep 31
target datalayout = "e-p:32:32"
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
@ -40,7 +40,7 @@ cond_true: ; preds = %cond_true, %entry
%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
%tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1]
%tmp.upgrd.8 = icmp ne i32 %tmp147, %M ; <i1> [#uses=1]
%tmp.upgrd.8 = icmp slt i32 %tmp147, %M ; <i1> [#uses=1]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
br i1 %tmp.upgrd.8, label %cond_true, label %return

View File

@ -35,7 +35,7 @@ cond_next36.i: ; preds = %cond_next.i
bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i
; CHECK: %bb.i28.i
; CHECK: addl $2
; CHECK: addl $-2
; CHECK: addl $2
%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2]
%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1]
%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2]

View File

@ -1,11 +1,11 @@
; RUN: llc < %s -march=x86-64 -o %t
; RUN: not grep inc %t
; RUN: grep inc %t | count 1
; RUN: grep dec %t | count 2
; RUN: grep addq %t | count 10
; RUN: grep addq %t | count 13
; RUN: not grep addb %t
; RUN: grep leaq %t | count 9
; RUN: grep leal %t | count 2
; RUN: grep movq %t | count 10
; RUN: grep leal %t | count 3
; RUN: grep movq %t | count 5
; IV users in each of the loops from other loops shouldn't cause LSR
; to insert new induction variables. Previously it would create a

View File

@ -1,19 +1,5 @@
; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC
; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
; By starting the IV at -64 instead of 0, a cmp is eliminated,
; as the flags from the add can be used directly.
; STATIC: movl $-64, %ecx
; STATIC: movl %eax, _state+76(%ecx)
; STATIC: addl $16, %ecx
; STATIC: jne
; In PIC mode the symbol can't be folded, so the change-compare-stride
; trick applies.
; PIC: cmpl $64
; RUN: llc < %s -march=x86 | grep cmp | grep 64
; RUN: llc < %s -march=x86 | not grep inc
@state = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
@S = external global [0 x i32] ; <[0 x i32]*> [#uses=4]

View File

@ -1,10 +1,4 @@
; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
; CHECK: leal 16(%eax), %edx
; CHECK: align
; CHECK: addl $4, %edx
; CHECK: decl %ecx
; CHECK: jne LBB1_2
; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16
%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }

View File

@ -1,159 +0,0 @@
; RUN: llc < %s -march=x86-64 | FileCheck %s
target datalayout = "e-p:64:64:64"
target triple = "x86_64-unknown-unknown"
; Full strength reduction reduces register pressure from 5 to 4 here.
; CHECK: full_me:
; CHECK: movsd (%rsi), %xmm0
; CHECK: mulsd (%rdx), %xmm0
; CHECK: movsd %xmm0, (%rdi)
; CHECK: addq $8, %rsi
; CHECK: addq $8, %rdx
; CHECK: addq $8, %rdi
; CHECK: decq %rcx
; CHECK: jne
define void @full_me(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
entry:
%t0 = icmp sgt i64 %n, 0
br i1 %t0, label %loop, label %return
loop:
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
%Ai = getelementptr inbounds double* %A, i64 %i
%Bi = getelementptr inbounds double* %B, i64 %i
%Ci = getelementptr inbounds double* %C, i64 %i
%t1 = load double* %Bi
%t2 = load double* %Ci
%m = fmul double %t1, %t2
store double %m, double* %Ai
%i.next = add nsw i64 %i, 1
%exitcond = icmp eq i64 %i.next, %n
br i1 %exitcond, label %return, label %loop
return:
ret void
}
; In this test, the counting IV exit value is used, so full strength reduction
; would not reduce register pressure. IndVarSimplify ought to simplify such
; cases away, but it's useful here to verify that LSR's register pressure
; heuristics are working as expected.
; CHECK: count_me_0:
; CHECK: movsd (%rsi,%rax,8), %xmm0
; CHECK: mulsd (%rdx,%rax,8), %xmm0
; CHECK: movsd %xmm0, (%rdi,%rax,8)
; CHECK: incq %rax
; CHECK: cmpq %rax, %rcx
; CHECK: jne
define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
entry:
%t0 = icmp sgt i64 %n, 0
br i1 %t0, label %loop, label %return
loop:
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
%Ai = getelementptr inbounds double* %A, i64 %i
%Bi = getelementptr inbounds double* %B, i64 %i
%Ci = getelementptr inbounds double* %C, i64 %i
%t1 = load double* %Bi
%t2 = load double* %Ci
%m = fmul double %t1, %t2
store double %m, double* %Ai
%i.next = add nsw i64 %i, 1
%exitcond = icmp eq i64 %i.next, %n
br i1 %exitcond, label %return, label %loop
return:
%q = phi i64 [ 0, %entry ], [ %i.next, %loop ]
ret i64 %q
}
; In this test, the trip count value is used, so full strength reduction
; would not reduce register pressure.
; (though it would reduce register pressure inside the loop...)
; CHECK: count_me_1:
; CHECK: movsd (%rsi,%rax,8), %xmm0
; CHECK: mulsd (%rdx,%rax,8), %xmm0
; CHECK: movsd %xmm0, (%rdi,%rax,8)
; CHECK: incq %rax
; CHECK: cmpq %rax, %rcx
; CHECK: jne
define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
entry:
%t0 = icmp sgt i64 %n, 0
br i1 %t0, label %loop, label %return
loop:
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
%Ai = getelementptr inbounds double* %A, i64 %i
%Bi = getelementptr inbounds double* %B, i64 %i
%Ci = getelementptr inbounds double* %C, i64 %i
%t1 = load double* %Bi
%t2 = load double* %Ci
%m = fmul double %t1, %t2
store double %m, double* %Ai
%i.next = add nsw i64 %i, 1
%exitcond = icmp eq i64 %i.next, %n
br i1 %exitcond, label %return, label %loop
return:
%q = phi i64 [ 0, %entry ], [ %n, %loop ]
ret i64 %q
}
; This should be fully strength-reduced to reduce register pressure, however
; the current heuristics get distracted by all the reuse with the stride-1
; induction variable first.
; But even so, be clever and start the stride-1 variable at a non-zero value
; to eliminate an in-loop immediate value.
; CHECK: count_me_2:
; CHECK: movl $5, %eax
; CHECK: align
; CHECK: BB4_1:
; CHECK: movsd (%rdi,%rax,8), %xmm0
; CHECK: addsd (%rsi,%rax,8), %xmm0
; CHECK: movsd %xmm0, (%rdx,%rax,8)
; CHECK: movsd 40(%rdi,%rax,8), %xmm0
; CHECK: addsd 40(%rsi,%rax,8), %xmm0
; CHECK: movsd %xmm0, 40(%rdx,%rax,8)
; CHECK: incq %rax
; CHECK: cmpq $5005, %rax
; CHECK: jne
define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind {
entry:
br label %loop
loop:
%i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
%i5 = add i64 %i, 5
%Ai = getelementptr double* %A, i64 %i5
%t2 = load double* %Ai
%Bi = getelementptr double* %B, i64 %i5
%t4 = load double* %Bi
%t5 = fadd double %t2, %t4
%Ci = getelementptr double* %C, i64 %i5
store double %t5, double* %Ci
%i10 = add i64 %i, 10
%Ai10 = getelementptr double* %A, i64 %i10
%t9 = load double* %Ai10
%Bi10 = getelementptr double* %B, i64 %i10
%t11 = load double* %Bi10
%t12 = fadd double %t9, %t11
%Ci10 = getelementptr double* %C, i64 %i10
store double %t12, double* %Ci10
%i.next = add i64 %i, 1
%exitcond = icmp eq i64 %i.next, 5000
br i1 %exitcond, label %return, label %loop
return:
ret void
}

View File

@ -4,9 +4,9 @@
; RUN: not grep sar %t
; RUN: not grep shl %t
; RUN: grep add %t | count 2
; RUN: grep inc %t | count 3
; RUN: grep inc %t | count 4
; RUN: grep dec %t | count 2
; RUN: grep lea %t | count 3
; RUN: grep lea %t | count 2
; Optimize away zext-inreg and sext-inreg on the loop induction
; variable using trip-count information.
@ -127,9 +127,6 @@ return:
ret void
}
; TODO: If we could handle all the loads and stores as post-inc users, we could
; use {-1,+,1} in the induction variable register, and we'd get another inc,
; one fewer add, and a comparison with zero.
define void @another_count_up(double* %d, i64 %n) nounwind {
entry:
br label %loop

View File

@ -1,4 +1,5 @@
; RUN: llc -march=x86-64 < %s -o - | grep {cmpl \\$\[1\], %}
; RUN: opt < %s -loop-reduce -S | grep ugt
; PR2535
@.str = internal constant [4 x i8] c"%d\0A\00"
@ -15,7 +16,7 @@ forbody:
%add166 = or i32 %mul15, 1 ; <i32> [#uses=1] *
call i32 (i8*, ...)* @printf( i8* noalias getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
%inc = add i32 %i.0, 1 ; <i32> [#uses=3]
%cmp = icmp ne i32 %inc, 1027 ; <i1> [#uses=1]
%cmp = icmp ult i32 %inc, 1027 ; <i1> [#uses=1]
br i1 %cmp, label %forbody, label %afterfor
afterfor: ; preds = %forcond

View File

@ -1,9 +1,10 @@
; RUN: llc < %s -o - | grep {testl %ecx, %ecx}
; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpl \$4}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-apple-darwin9"
; The comparison happens before the relevant use, but it can still be rewritten
; to compare with zero.
; This is like change-compare-stride-trickiness-1.ll except the comparison
; happens before the relevant use, so the comparison stride can't be
; easily changed.
define void @foo() nounwind {
entry:

View File

@ -19,7 +19,7 @@ bb3: ; preds = %bb1
%tmp4 = add i32 %c_addr.1, -1 ; <i32> [#uses=1]
%c_addr.1.be = select i1 %tmp2, i32 %tmp3, i32 %tmp4 ; <i32> [#uses=1]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
; CHECK: add i32 %lsr.iv, -1
; CHECK: sub i32 %lsr.iv, 1
br label %bb6
bb6: ; preds = %bb3, %entry