mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-26 07:24:25 +00:00
Revert LoopStrengthReduce.cpp to pre-r94061 for now.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94123 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -27,7 +27,10 @@ namespace llvm {
|
|||||||
/// and destroy it when finished to allow the release of the associated
|
/// and destroy it when finished to allow the release of the associated
|
||||||
/// memory.
|
/// memory.
|
||||||
class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
|
class SCEVExpander : public SCEVVisitor<SCEVExpander, Value*> {
|
||||||
|
public:
|
||||||
ScalarEvolution &SE;
|
ScalarEvolution &SE;
|
||||||
|
|
||||||
|
private:
|
||||||
std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> >
|
std::map<std::pair<const SCEV *, Instruction *>, AssertingVH<Value> >
|
||||||
InsertedExpressions;
|
InsertedExpressions;
|
||||||
std::set<Value*> InsertedValues;
|
std::set<Value*> InsertedValues;
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,32 +1,7 @@
|
|||||||
; RUN: llc < %s -march=arm | FileCheck %s
|
; RUN: llc < %s -march=arm | FileCheck %s
|
||||||
|
|
||||||
; This loop is rewritten with an indvar which counts down, which
|
|
||||||
; frees up a register from holding the trip count.
|
|
||||||
|
|
||||||
define void @test(i32* %P, i32 %A, i32 %i) nounwind {
|
define void @test(i32* %P, i32 %A, i32 %i) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: str r1, [{{r.*}}, +{{r.*}}, lsl #2]
|
|
||||||
icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
|
|
||||||
br i1 %0, label %return, label %bb
|
|
||||||
|
|
||||||
bb: ; preds = %bb, %entry
|
|
||||||
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
|
|
||||||
%i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
|
|
||||||
%tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
|
|
||||||
store i32 %A, i32* %tmp2
|
|
||||||
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
|
|
||||||
icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
|
|
||||||
br i1 %1, label %return, label %bb
|
|
||||||
|
|
||||||
return: ; preds = %bb, %entry
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; This loop has a non-address use of the count-up indvar, so
|
|
||||||
; it'll remain. Now the original store uses a negative-stride address.
|
|
||||||
|
|
||||||
define void @test_with_forced_iv(i32* %P, i32 %A, i32 %i) nounwind {
|
|
||||||
entry:
|
|
||||||
; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
|
; CHECK: str r1, [{{r.*}}, -{{r.*}}, lsl #2]
|
||||||
icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
|
icmp eq i32 %i, 0 ; <i1>:0 [#uses=1]
|
||||||
br i1 %0, label %return, label %bb
|
br i1 %0, label %return, label %bb
|
||||||
@ -36,7 +11,6 @@ bb: ; preds = %bb, %entry
|
|||||||
%i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
|
%i_addr.09.0 = sub i32 %i, %indvar ; <i32> [#uses=1]
|
||||||
%tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
|
%tmp2 = getelementptr i32* %P, i32 %i_addr.09.0 ; <i32*> [#uses=1]
|
||||||
store i32 %A, i32* %tmp2
|
store i32 %A, i32* %tmp2
|
||||||
store i32 %indvar, i32* null
|
|
||||||
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
|
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
|
||||||
icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
|
icmp eq i32 %indvar.next, %i ; <i1>:1 [#uses=1]
|
||||||
br i1 %1, label %return, label %bb
|
br i1 %1, label %return, label %bb
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
; RUN: llc < %s -stats |& grep {39.*Number of machine instrs printed}
|
; RUN: llc < %s -stats |& grep {40.*Number of machine instrs printed}
|
||||||
; RUN: llc < %s -stats |& not grep {.*Number of re-materialization}
|
; RUN: llc < %s -stats |& grep {.*Number of re-materialization}
|
||||||
; This test really wants to check that the resultant "cond_true" block only
|
; This test really wants to check that the resultant "cond_true" block only
|
||||||
; has a single store in it, and that cond_true55 only has code to materialize
|
; has a single store in it, and that cond_true55 only has code to materialize
|
||||||
; the constant and do a store. We do *not* want something like this:
|
; the constant and do a store. We do *not* want something like this:
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | not grep "Number of re-materialization"
|
; RUN: llc < %s -mtriple=arm-apple-darwin
|
||||||
|
; RUN: llc < %s -mtriple=arm-apple-darwin -stats -info-output-file - | grep "Number of re-materialization" | grep 3
|
||||||
|
|
||||||
%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
|
%struct.CONTENTBOX = type { i32, i32, i32, i32, i32 }
|
||||||
%struct.LOCBOX = type { i32, i32, i32, i32 }
|
%struct.LOCBOX = type { i32, i32, i32, i32 }
|
||||||
|
@ -1,29 +1,25 @@
|
|||||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s
|
; RUN: llc < %s -mtriple=thumbv7-apple-darwin10 -relocation-model=pic | FileCheck %s
|
||||||
; rdar://7387640
|
; rdar://7387640
|
||||||
|
|
||||||
; This now reduces to a single induction variable.
|
; FIXME: We still need to rewrite array reference iv of stride -4 with loop
|
||||||
|
; count iv of stride -1.
|
||||||
; TODO: It still gets a GPR shuffle at the end of the loop
|
|
||||||
; This is because something in instruction selection has decided
|
|
||||||
; that comparing the pre-incremented value with zero is better
|
|
||||||
; than comparing the post-incremented value with -4.
|
|
||||||
|
|
||||||
@G = external global i32 ; <i32*> [#uses=2]
|
@G = external global i32 ; <i32*> [#uses=2]
|
||||||
@array = external global i32* ; <i32**> [#uses=1]
|
@array = external global i32* ; <i32**> [#uses=1]
|
||||||
|
|
||||||
define arm_apcscc void @t() nounwind optsize {
|
define arm_apcscc void @t() nounwind optsize {
|
||||||
; CHECK: t:
|
; CHECK: t:
|
||||||
; CHECK: mov.w r2, #1000
|
; CHECK: mov.w r2, #4000
|
||||||
|
; CHECK: movw r3, #1001
|
||||||
entry:
|
entry:
|
||||||
%.pre = load i32* @G, align 4 ; <i32> [#uses=1]
|
%.pre = load i32* @G, align 4 ; <i32> [#uses=1]
|
||||||
br label %bb
|
br label %bb
|
||||||
|
|
||||||
bb: ; preds = %bb, %entry
|
bb: ; preds = %bb, %entry
|
||||||
; CHECK: LBB1_1:
|
; CHECK: LBB1_1:
|
||||||
; CHECK: cmp r2, #0
|
; CHECK: subs r3, #1
|
||||||
; CHECK: sub.w r9, r2, #1
|
; CHECK: cmp r3, #0
|
||||||
; CHECK: mov r2, r9
|
; CHECK: sub.w r2, r2, #4
|
||||||
|
|
||||||
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]
|
%0 = phi i32 [ %.pre, %entry ], [ %3, %bb ] ; <i32> [#uses=1]
|
||||||
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
|
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %bb ] ; <i32> [#uses=2]
|
||||||
%tmp5 = sub i32 1000, %indvar ; <i32> [#uses=1]
|
%tmp5 = sub i32 1000, %indvar ; <i32> [#uses=1]
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
|
; RUN: llc < %s -mtriple=thumbv7-apple-darwin | FileCheck %s
|
||||||
|
|
||||||
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
|
define i32 @t1(i32 %a, i32 %b, i32 %c, i32 %d) {
|
||||||
; CHECK: t1:
|
; CHECK: t1:
|
||||||
; CHECK: it ne
|
; CHECK: it ne
|
||||||
; CHECK: cmpne
|
; CHECK: cmpne
|
||||||
@ -20,12 +20,12 @@ cond_next:
|
|||||||
}
|
}
|
||||||
|
|
||||||
; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
|
; FIXME: Check for # of unconditional branch after adding branch folding post ifcvt.
|
||||||
define i32 @t2(i32 %a, i32 %b) nounwind {
|
define i32 @t2(i32 %a, i32 %b) {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: t2:
|
; CHECK: t2:
|
||||||
; CHECK: ite gt
|
; CHECK: ite le
|
||||||
; CHECK: subgt
|
|
||||||
; CHECK: suble
|
; CHECK: suble
|
||||||
|
; CHECK: subgt
|
||||||
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
|
%tmp1434 = icmp eq i32 %a, %b ; <i1> [#uses=1]
|
||||||
br i1 %tmp1434, label %bb17, label %bb.outer
|
br i1 %tmp1434, label %bb17, label %bb.outer
|
||||||
|
|
||||||
@ -60,14 +60,14 @@ bb17: ; preds = %cond_false, %cond_true, %entry
|
|||||||
|
|
||||||
@x = external global i32* ; <i32**> [#uses=1]
|
@x = external global i32* ; <i32**> [#uses=1]
|
||||||
|
|
||||||
define void @foo(i32 %a) nounwind {
|
define void @foo(i32 %a) {
|
||||||
entry:
|
entry:
|
||||||
%tmp = load i32** @x ; <i32*> [#uses=1]
|
%tmp = load i32** @x ; <i32*> [#uses=1]
|
||||||
store i32 %a, i32* %tmp
|
store i32 %a, i32* %tmp
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @t3(i32 %a, i32 %b) nounwind {
|
define void @t3(i32 %a, i32 %b) {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: t3:
|
; CHECK: t3:
|
||||||
; CHECK: it lt
|
; CHECK: it lt
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
|
; RUN: llc < %s -march=x86 -mattr=+sse2 -stats -realign-stack=0 |&\
|
||||||
; RUN: grep {asm-printer} | grep 34
|
; RUN: grep {asm-printer} | grep 31
|
||||||
|
|
||||||
target datalayout = "e-p:32:32"
|
target datalayout = "e-p:32:32"
|
||||||
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
|
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
|
||||||
@ -40,7 +40,7 @@ cond_true: ; preds = %cond_true, %entry
|
|||||||
%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
|
%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
|
||||||
store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
|
store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
|
||||||
%tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1]
|
%tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1]
|
||||||
%tmp.upgrd.8 = icmp ne i32 %tmp147, %M ; <i1> [#uses=1]
|
%tmp.upgrd.8 = icmp slt i32 %tmp147, %M ; <i1> [#uses=1]
|
||||||
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
|
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
|
||||||
br i1 %tmp.upgrd.8, label %cond_true, label %return
|
br i1 %tmp.upgrd.8, label %cond_true, label %return
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ cond_next36.i: ; preds = %cond_next.i
|
|||||||
bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i
|
bb.i28.i: ; preds = %bb.i28.i, %cond_next36.i
|
||||||
; CHECK: %bb.i28.i
|
; CHECK: %bb.i28.i
|
||||||
; CHECK: addl $2
|
; CHECK: addl $2
|
||||||
; CHECK: addl $-2
|
; CHECK: addl $2
|
||||||
%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2]
|
%j.0.reg2mem.0.i16.i = phi i32 [ 0, %cond_next36.i ], [ %indvar.next39.i, %bb.i28.i ] ; <i32> [#uses=2]
|
||||||
%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1]
|
%din_addr.1.reg2mem.0.i17.i = phi double [ 0.000000e+00, %cond_next36.i ], [ %tmp16.i25.i, %bb.i28.i ] ; <double> [#uses=1]
|
||||||
%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2]
|
%tmp1.i18.i = fptosi double %din_addr.1.reg2mem.0.i17.i to i32 ; <i32> [#uses=2]
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
; RUN: llc < %s -march=x86-64 -o %t
|
; RUN: llc < %s -march=x86-64 -o %t
|
||||||
; RUN: not grep inc %t
|
; RUN: grep inc %t | count 1
|
||||||
; RUN: grep dec %t | count 2
|
; RUN: grep dec %t | count 2
|
||||||
; RUN: grep addq %t | count 10
|
; RUN: grep addq %t | count 13
|
||||||
; RUN: not grep addb %t
|
; RUN: not grep addb %t
|
||||||
; RUN: grep leaq %t | count 9
|
; RUN: grep leaq %t | count 9
|
||||||
; RUN: grep leal %t | count 2
|
; RUN: grep leal %t | count 3
|
||||||
; RUN: grep movq %t | count 10
|
; RUN: grep movq %t | count 5
|
||||||
|
|
||||||
; IV users in each of the loops from other loops shouldn't cause LSR
|
; IV users in each of the loops from other loops shouldn't cause LSR
|
||||||
; to insert new induction variables. Previously it would create a
|
; to insert new induction variables. Previously it would create a
|
||||||
|
@ -1,19 +1,5 @@
|
|||||||
; RUN: llc < %s -march=x86 -relocation-model=static -mtriple=i686-apple-darwin | FileCheck %s -check-prefix=STATIC
|
; RUN: llc < %s -march=x86 | grep cmp | grep 64
|
||||||
; RUN: llc < %s -march=x86 -relocation-model=pic | FileCheck %s -check-prefix=PIC
|
; RUN: llc < %s -march=x86 | not grep inc
|
||||||
|
|
||||||
; By starting the IV at -64 instead of 0, a cmp is eliminated,
|
|
||||||
; as the flags from the add can be used directly.
|
|
||||||
|
|
||||||
; STATIC: movl $-64, %ecx
|
|
||||||
|
|
||||||
; STATIC: movl %eax, _state+76(%ecx)
|
|
||||||
; STATIC: addl $16, %ecx
|
|
||||||
; STATIC: jne
|
|
||||||
|
|
||||||
; In PIC mode the symbol can't be folded, so the change-compare-stride
|
|
||||||
; trick applies.
|
|
||||||
|
|
||||||
; PIC: cmpl $64
|
|
||||||
|
|
||||||
@state = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
|
@state = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
|
||||||
@S = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
|
@S = external global [0 x i32] ; <[0 x i32]*> [#uses=4]
|
||||||
|
@ -1,10 +1,4 @@
|
|||||||
; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
|
; RUN: llc < %s -mtriple=i386-apple-darwin | grep leal | not grep 16
|
||||||
|
|
||||||
; CHECK: leal 16(%eax), %edx
|
|
||||||
; CHECK: align
|
|
||||||
; CHECK: addl $4, %edx
|
|
||||||
; CHECK: decl %ecx
|
|
||||||
; CHECK: jne LBB1_2
|
|
||||||
|
|
||||||
%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
|
%struct.CUMULATIVE_ARGS = type { i32, i32, i32, i32, i32, i32, i32 }
|
||||||
%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
|
%struct.bitmap_element = type { %struct.bitmap_element*, %struct.bitmap_element*, i32, [2 x i64] }
|
||||||
|
@ -1,159 +0,0 @@
|
|||||||
; RUN: llc < %s -march=x86-64 | FileCheck %s
|
|
||||||
target datalayout = "e-p:64:64:64"
|
|
||||||
target triple = "x86_64-unknown-unknown"
|
|
||||||
|
|
||||||
; Full strength reduction reduces register pressure from 5 to 4 here.
|
|
||||||
|
|
||||||
; CHECK: full_me:
|
|
||||||
; CHECK: movsd (%rsi), %xmm0
|
|
||||||
; CHECK: mulsd (%rdx), %xmm0
|
|
||||||
; CHECK: movsd %xmm0, (%rdi)
|
|
||||||
; CHECK: addq $8, %rsi
|
|
||||||
; CHECK: addq $8, %rdx
|
|
||||||
; CHECK: addq $8, %rdi
|
|
||||||
; CHECK: decq %rcx
|
|
||||||
; CHECK: jne
|
|
||||||
|
|
||||||
define void @full_me(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
|
||||||
entry:
|
|
||||||
%t0 = icmp sgt i64 %n, 0
|
|
||||||
br i1 %t0, label %loop, label %return
|
|
||||||
|
|
||||||
loop:
|
|
||||||
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
|
|
||||||
%Ai = getelementptr inbounds double* %A, i64 %i
|
|
||||||
%Bi = getelementptr inbounds double* %B, i64 %i
|
|
||||||
%Ci = getelementptr inbounds double* %C, i64 %i
|
|
||||||
%t1 = load double* %Bi
|
|
||||||
%t2 = load double* %Ci
|
|
||||||
%m = fmul double %t1, %t2
|
|
||||||
store double %m, double* %Ai
|
|
||||||
%i.next = add nsw i64 %i, 1
|
|
||||||
%exitcond = icmp eq i64 %i.next, %n
|
|
||||||
br i1 %exitcond, label %return, label %loop
|
|
||||||
|
|
||||||
return:
|
|
||||||
ret void
|
|
||||||
}
|
|
||||||
|
|
||||||
; In this test, the counting IV exit value is used, so full strength reduction
|
|
||||||
; would not reduce register pressure. IndVarSimplify ought to simplify such
|
|
||||||
; cases away, but it's useful here to verify that LSR's register pressure
|
|
||||||
; heuristics are working as expected.
|
|
||||||
|
|
||||||
; CHECK: count_me_0:
|
|
||||||
; CHECK: movsd (%rsi,%rax,8), %xmm0
|
|
||||||
; CHECK: mulsd (%rdx,%rax,8), %xmm0
|
|
||||||
; CHECK: movsd %xmm0, (%rdi,%rax,8)
|
|
||||||
; CHECK: incq %rax
|
|
||||||
; CHECK: cmpq %rax, %rcx
|
|
||||||
; CHECK: jne
|
|
||||||
|
|
||||||
define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
|
||||||
entry:
|
|
||||||
%t0 = icmp sgt i64 %n, 0
|
|
||||||
br i1 %t0, label %loop, label %return
|
|
||||||
|
|
||||||
loop:
|
|
||||||
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
|
|
||||||
%Ai = getelementptr inbounds double* %A, i64 %i
|
|
||||||
%Bi = getelementptr inbounds double* %B, i64 %i
|
|
||||||
%Ci = getelementptr inbounds double* %C, i64 %i
|
|
||||||
%t1 = load double* %Bi
|
|
||||||
%t2 = load double* %Ci
|
|
||||||
%m = fmul double %t1, %t2
|
|
||||||
store double %m, double* %Ai
|
|
||||||
%i.next = add nsw i64 %i, 1
|
|
||||||
%exitcond = icmp eq i64 %i.next, %n
|
|
||||||
br i1 %exitcond, label %return, label %loop
|
|
||||||
|
|
||||||
return:
|
|
||||||
%q = phi i64 [ 0, %entry ], [ %i.next, %loop ]
|
|
||||||
ret i64 %q
|
|
||||||
}
|
|
||||||
|
|
||||||
; In this test, the trip count value is used, so full strength reduction
|
|
||||||
; would not reduce register pressure.
|
|
||||||
; (though it would reduce register pressure inside the loop...)
|
|
||||||
|
|
||||||
; CHECK: count_me_1:
|
|
||||||
; CHECK: movsd (%rsi,%rax,8), %xmm0
|
|
||||||
; CHECK: mulsd (%rdx,%rax,8), %xmm0
|
|
||||||
; CHECK: movsd %xmm0, (%rdi,%rax,8)
|
|
||||||
; CHECK: incq %rax
|
|
||||||
; CHECK: cmpq %rax, %rcx
|
|
||||||
; CHECK: jne
|
|
||||||
|
|
||||||
define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
|
|
||||||
entry:
|
|
||||||
%t0 = icmp sgt i64 %n, 0
|
|
||||||
br i1 %t0, label %loop, label %return
|
|
||||||
|
|
||||||
loop:
|
|
||||||
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
|
|
||||||
%Ai = getelementptr inbounds double* %A, i64 %i
|
|
||||||
%Bi = getelementptr inbounds double* %B, i64 %i
|
|
||||||
%Ci = getelementptr inbounds double* %C, i64 %i
|
|
||||||
%t1 = load double* %Bi
|
|
||||||
%t2 = load double* %Ci
|
|
||||||
%m = fmul double %t1, %t2
|
|
||||||
store double %m, double* %Ai
|
|
||||||
%i.next = add nsw i64 %i, 1
|
|
||||||
%exitcond = icmp eq i64 %i.next, %n
|
|
||||||
br i1 %exitcond, label %return, label %loop
|
|
||||||
|
|
||||||
return:
|
|
||||||
%q = phi i64 [ 0, %entry ], [ %n, %loop ]
|
|
||||||
ret i64 %q
|
|
||||||
}
|
|
||||||
|
|
||||||
; This should be fully strength-reduced to reduce register pressure, however
|
|
||||||
; the current heuristics get distracted by all the reuse with the stride-1
|
|
||||||
; induction variable first.
|
|
||||||
|
|
||||||
; But even so, be clever and start the stride-1 variable at a non-zero value
|
|
||||||
; to eliminate an in-loop immediate value.
|
|
||||||
|
|
||||||
; CHECK: count_me_2:
|
|
||||||
; CHECK: movl $5, %eax
|
|
||||||
; CHECK: align
|
|
||||||
; CHECK: BB4_1:
|
|
||||||
; CHECK: movsd (%rdi,%rax,8), %xmm0
|
|
||||||
; CHECK: addsd (%rsi,%rax,8), %xmm0
|
|
||||||
; CHECK: movsd %xmm0, (%rdx,%rax,8)
|
|
||||||
; CHECK: movsd 40(%rdi,%rax,8), %xmm0
|
|
||||||
; CHECK: addsd 40(%rsi,%rax,8), %xmm0
|
|
||||||
; CHECK: movsd %xmm0, 40(%rdx,%rax,8)
|
|
||||||
; CHECK: incq %rax
|
|
||||||
; CHECK: cmpq $5005, %rax
|
|
||||||
; CHECK: jne
|
|
||||||
|
|
||||||
define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind {
|
|
||||||
entry:
|
|
||||||
br label %loop
|
|
||||||
|
|
||||||
loop:
|
|
||||||
%i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
|
|
||||||
%i5 = add i64 %i, 5
|
|
||||||
%Ai = getelementptr double* %A, i64 %i5
|
|
||||||
%t2 = load double* %Ai
|
|
||||||
%Bi = getelementptr double* %B, i64 %i5
|
|
||||||
%t4 = load double* %Bi
|
|
||||||
%t5 = fadd double %t2, %t4
|
|
||||||
%Ci = getelementptr double* %C, i64 %i5
|
|
||||||
store double %t5, double* %Ci
|
|
||||||
%i10 = add i64 %i, 10
|
|
||||||
%Ai10 = getelementptr double* %A, i64 %i10
|
|
||||||
%t9 = load double* %Ai10
|
|
||||||
%Bi10 = getelementptr double* %B, i64 %i10
|
|
||||||
%t11 = load double* %Bi10
|
|
||||||
%t12 = fadd double %t9, %t11
|
|
||||||
%Ci10 = getelementptr double* %C, i64 %i10
|
|
||||||
store double %t12, double* %Ci10
|
|
||||||
%i.next = add i64 %i, 1
|
|
||||||
%exitcond = icmp eq i64 %i.next, 5000
|
|
||||||
br i1 %exitcond, label %return, label %loop
|
|
||||||
|
|
||||||
return:
|
|
||||||
ret void
|
|
||||||
}
|
|
@ -4,9 +4,9 @@
|
|||||||
; RUN: not grep sar %t
|
; RUN: not grep sar %t
|
||||||
; RUN: not grep shl %t
|
; RUN: not grep shl %t
|
||||||
; RUN: grep add %t | count 2
|
; RUN: grep add %t | count 2
|
||||||
; RUN: grep inc %t | count 3
|
; RUN: grep inc %t | count 4
|
||||||
; RUN: grep dec %t | count 2
|
; RUN: grep dec %t | count 2
|
||||||
; RUN: grep lea %t | count 3
|
; RUN: grep lea %t | count 2
|
||||||
|
|
||||||
; Optimize away zext-inreg and sext-inreg on the loop induction
|
; Optimize away zext-inreg and sext-inreg on the loop induction
|
||||||
; variable using trip-count information.
|
; variable using trip-count information.
|
||||||
@ -127,9 +127,6 @@ return:
|
|||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
; TODO: If we could handle all the loads and stores as post-inc users, we could
|
|
||||||
; use {-1,+,1} in the induction variable register, and we'd get another inc,
|
|
||||||
; one fewer add, and a comparison with zero.
|
|
||||||
define void @another_count_up(double* %d, i64 %n) nounwind {
|
define void @another_count_up(double* %d, i64 %n) nounwind {
|
||||||
entry:
|
entry:
|
||||||
br label %loop
|
br label %loop
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
; RUN: llc -march=x86-64 < %s -o - | grep {cmpl \\$\[1\], %}
|
; RUN: opt < %s -loop-reduce -S | grep ugt
|
||||||
|
; PR2535
|
||||||
|
|
||||||
@.str = internal constant [4 x i8] c"%d\0A\00"
|
@.str = internal constant [4 x i8] c"%d\0A\00"
|
||||||
|
|
||||||
@ -15,7 +16,7 @@ forbody:
|
|||||||
%add166 = or i32 %mul15, 1 ; <i32> [#uses=1] *
|
%add166 = or i32 %mul15, 1 ; <i32> [#uses=1] *
|
||||||
call i32 (i8*, ...)* @printf( i8* noalias getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
|
call i32 (i8*, ...)* @printf( i8* noalias getelementptr ([4 x i8]* @.str, i32 0, i32 0), i32 %add166 ) nounwind
|
||||||
%inc = add i32 %i.0, 1 ; <i32> [#uses=3]
|
%inc = add i32 %i.0, 1 ; <i32> [#uses=3]
|
||||||
%cmp = icmp ne i32 %inc, 1027 ; <i1> [#uses=1]
|
%cmp = icmp ult i32 %inc, 1027 ; <i1> [#uses=1]
|
||||||
br i1 %cmp, label %forbody, label %afterfor
|
br i1 %cmp, label %forbody, label %afterfor
|
||||||
|
|
||||||
afterfor: ; preds = %forcond
|
afterfor: ; preds = %forcond
|
||||||
|
@ -1,9 +1,10 @@
|
|||||||
; RUN: llc < %s -o - | grep {testl %ecx, %ecx}
|
; RUN: llc %s -o - --x86-asm-syntax=att | grep {cmpl \$4}
|
||||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
|
||||||
target triple = "x86_64-apple-darwin9"
|
target triple = "x86_64-apple-darwin9"
|
||||||
|
|
||||||
; The comparison happens before the relevant use, but it can still be rewritten
|
; This is like change-compare-stride-trickiness-1.ll except the comparison
|
||||||
; to compare with zero.
|
; happens before the relevant use, so the comparison stride can't be
|
||||||
|
; easily changed.
|
||||||
|
|
||||||
define void @foo() nounwind {
|
define void @foo() nounwind {
|
||||||
entry:
|
entry:
|
||||||
|
@ -19,7 +19,7 @@ bb3: ; preds = %bb1
|
|||||||
%tmp4 = add i32 %c_addr.1, -1 ; <i32> [#uses=1]
|
%tmp4 = add i32 %c_addr.1, -1 ; <i32> [#uses=1]
|
||||||
%c_addr.1.be = select i1 %tmp2, i32 %tmp3, i32 %tmp4 ; <i32> [#uses=1]
|
%c_addr.1.be = select i1 %tmp2, i32 %tmp3, i32 %tmp4 ; <i32> [#uses=1]
|
||||||
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
|
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
|
||||||
; CHECK: add i32 %lsr.iv, -1
|
; CHECK: sub i32 %lsr.iv, 1
|
||||||
br label %bb6
|
br label %bb6
|
||||||
|
|
||||||
bb6: ; preds = %bb3, %entry
|
bb6: ; preds = %bb3, %entry
|
||||||
|
Reference in New Issue
Block a user