2010-10-18 18:04:47 +00:00
|
|
|
; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
|
2009-11-27 18:08:30 +00:00
|
|
|
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
|
2008-12-02 08:16:11 +00:00
|
|
|
|
2009-11-27 06:33:09 +00:00
|
|
|
define i32 @test1(i32* %p, i1 %C) {
|
|
|
|
; CHECK: @test1
|
2008-12-02 08:16:11 +00:00
|
|
|
block1:
|
|
|
|
br i1 %C, label %block2, label %block3
|
|
|
|
|
|
|
|
block2:
|
|
|
|
br label %block4
|
2009-11-27 06:33:09 +00:00
|
|
|
; CHECK: block2:
|
|
|
|
; CHECK-NEXT: load i32* %p
|
2008-12-02 08:16:11 +00:00
|
|
|
|
|
|
|
block3:
|
2009-11-27 06:42:42 +00:00
|
|
|
store i32 0, i32* %p
|
2008-12-02 08:16:11 +00:00
|
|
|
br label %block4
|
|
|
|
|
|
|
|
block4:
|
|
|
|
%PRE = load i32* %p
|
|
|
|
ret i32 %PRE
|
2009-11-27 06:33:09 +00:00
|
|
|
; CHECK: block4:
|
|
|
|
; CHECK-NEXT: phi i32
|
|
|
|
; CHECK-NEXT: ret i32
|
2008-12-02 08:16:11 +00:00
|
|
|
}
|
2009-11-27 06:42:42 +00:00
|
|
|
|
2009-11-27 08:25:10 +00:00
|
|
|
; This is a simple phi translation case.
|
2009-11-27 06:42:42 +00:00
|
|
|
define i32 @test2(i32* %p, i32* %q, i1 %C) {
|
|
|
|
; CHECK: @test2
|
|
|
|
block1:
|
|
|
|
br i1 %C, label %block2, label %block3
|
|
|
|
|
|
|
|
block2:
|
|
|
|
br label %block4
|
|
|
|
; CHECK: block2:
|
|
|
|
; CHECK-NEXT: load i32* %q
|
|
|
|
|
|
|
|
block3:
|
|
|
|
store i32 0, i32* %p
|
|
|
|
br label %block4
|
|
|
|
|
|
|
|
block4:
|
|
|
|
%P2 = phi i32* [%p, %block3], [%q, %block2]
|
|
|
|
%PRE = load i32* %P2
|
|
|
|
ret i32 %PRE
|
|
|
|
; CHECK: block4:
|
|
|
|
; CHECK-NEXT: phi i32 [
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i32
|
|
|
|
}
|
|
|
|
|
2009-11-27 08:25:10 +00:00
|
|
|
; This is a PRE case that requires phi translation through a GEP.
|
2009-11-27 06:42:42 +00:00
|
|
|
define i32 @test3(i32* %p, i32* %q, i32** %Hack, i1 %C) {
|
|
|
|
; CHECK: @test3
|
|
|
|
block1:
|
|
|
|
%B = getelementptr i32* %q, i32 1
|
|
|
|
store i32* %B, i32** %Hack
|
|
|
|
br i1 %C, label %block2, label %block3
|
|
|
|
|
|
|
|
block2:
|
|
|
|
br label %block4
|
|
|
|
; CHECK: block2:
|
|
|
|
; CHECK-NEXT: load i32* %B
|
|
|
|
|
|
|
|
block3:
|
|
|
|
%A = getelementptr i32* %p, i32 1
|
|
|
|
store i32 0, i32* %A
|
|
|
|
br label %block4
|
|
|
|
|
|
|
|
block4:
|
|
|
|
%P2 = phi i32* [%p, %block3], [%q, %block2]
|
|
|
|
%P3 = getelementptr i32* %P2, i32 1
|
|
|
|
%PRE = load i32* %P3
|
|
|
|
ret i32 %PRE
|
|
|
|
; CHECK: block4:
|
|
|
|
; CHECK-NEXT: phi i32 [
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i32
|
|
|
|
}
|
2009-11-27 08:25:10 +00:00
|
|
|
|
|
|
|
;; Here the loaded address is available, but the computation is in 'block3'
|
|
|
|
;; which does not dominate 'block2'.
|
|
|
|
define i32 @test4(i32* %p, i32* %q, i32** %Hack, i1 %C) {
|
|
|
|
; CHECK: @test4
|
|
|
|
block1:
|
|
|
|
br i1 %C, label %block2, label %block3
|
|
|
|
|
|
|
|
block2:
|
|
|
|
br label %block4
|
2009-11-28 16:08:18 +00:00
|
|
|
; CHECK: block2:
|
|
|
|
; CHECK: load i32*
|
|
|
|
; CHECK: br label %block4
|
2009-11-27 08:25:10 +00:00
|
|
|
|
|
|
|
block3:
|
|
|
|
%B = getelementptr i32* %q, i32 1
|
|
|
|
store i32* %B, i32** %Hack
|
|
|
|
|
|
|
|
%A = getelementptr i32* %p, i32 1
|
|
|
|
store i32 0, i32* %A
|
|
|
|
br label %block4
|
|
|
|
|
|
|
|
block4:
|
|
|
|
%P2 = phi i32* [%p, %block3], [%q, %block2]
|
|
|
|
%P3 = getelementptr i32* %P2, i32 1
|
|
|
|
%PRE = load i32* %P3
|
|
|
|
ret i32 %PRE
|
2009-11-28 16:08:18 +00:00
|
|
|
; CHECK: block4:
|
|
|
|
; CHECK-NEXT: phi i32 [
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i32
|
2009-11-27 08:25:10 +00:00
|
|
|
}
|
2009-11-27 18:08:30 +00:00
|
|
|
|
|
|
|
;void test5(int N, double *G) {
|
|
|
|
; int j;
|
|
|
|
; for (j = 0; j < N - 1; j++)
|
|
|
|
; G[j] = G[j] + G[j+1];
|
|
|
|
;}
|
|
|
|
|
|
|
|
define void @test5(i32 %N, double* nocapture %G) nounwind ssp {
|
|
|
|
; CHECK: @test5
|
|
|
|
entry:
|
|
|
|
%0 = add i32 %N, -1
|
|
|
|
%1 = icmp sgt i32 %0, 0
|
|
|
|
br i1 %1, label %bb.nph, label %return
|
|
|
|
|
|
|
|
bb.nph:
|
|
|
|
%tmp = zext i32 %0 to i64
|
|
|
|
br label %bb
|
|
|
|
|
|
|
|
; CHECK: bb.nph:
|
|
|
|
; CHECK: load double*
|
|
|
|
; CHECK: br label %bb
|
|
|
|
|
|
|
|
bb:
|
|
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
|
|
|
|
%tmp6 = add i64 %indvar, 1
|
|
|
|
%scevgep = getelementptr double* %G, i64 %tmp6
|
|
|
|
%scevgep7 = getelementptr double* %G, i64 %indvar
|
|
|
|
%2 = load double* %scevgep7, align 8
|
|
|
|
%3 = load double* %scevgep, align 8
|
|
|
|
%4 = fadd double %2, %3
|
|
|
|
store double %4, double* %scevgep7, align 8
|
|
|
|
%exitcond = icmp eq i64 %tmp6, %tmp
|
|
|
|
br i1 %exitcond, label %return, label %bb
|
|
|
|
|
|
|
|
; Should only be one load in the loop.
|
|
|
|
; CHECK: bb:
|
|
|
|
; CHECK: load double*
|
|
|
|
; CHECK-NOT: load double*
|
|
|
|
; CHECK: br i1 %exitcond
|
|
|
|
|
|
|
|
return:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
|
|
|
;void test6(int N, double *G) {
|
|
|
|
; int j;
|
|
|
|
; for (j = 0; j < N - 1; j++)
|
|
|
|
; G[j+1] = G[j] + G[j+1];
|
|
|
|
;}
|
|
|
|
|
|
|
|
define void @test6(i32 %N, double* nocapture %G) nounwind ssp {
|
|
|
|
; CHECK: @test6
|
|
|
|
entry:
|
|
|
|
%0 = add i32 %N, -1
|
|
|
|
%1 = icmp sgt i32 %0, 0
|
|
|
|
br i1 %1, label %bb.nph, label %return
|
|
|
|
|
|
|
|
bb.nph:
|
|
|
|
%tmp = zext i32 %0 to i64
|
|
|
|
br label %bb
|
|
|
|
|
|
|
|
; CHECK: bb.nph:
|
|
|
|
; CHECK: load double*
|
|
|
|
; CHECK: br label %bb
|
|
|
|
|
|
|
|
bb:
|
|
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %tmp6, %bb ]
|
|
|
|
%tmp6 = add i64 %indvar, 1
|
|
|
|
%scevgep = getelementptr double* %G, i64 %tmp6
|
|
|
|
%scevgep7 = getelementptr double* %G, i64 %indvar
|
|
|
|
%2 = load double* %scevgep7, align 8
|
|
|
|
%3 = load double* %scevgep, align 8
|
|
|
|
%4 = fadd double %2, %3
|
|
|
|
store double %4, double* %scevgep, align 8
|
|
|
|
%exitcond = icmp eq i64 %tmp6, %tmp
|
|
|
|
br i1 %exitcond, label %return, label %bb
|
|
|
|
|
|
|
|
; Should only be one load in the loop.
|
|
|
|
; CHECK: bb:
|
|
|
|
; CHECK: load double*
|
|
|
|
; CHECK-NOT: load double*
|
|
|
|
; CHECK: br i1 %exitcond
|
|
|
|
|
|
|
|
return:
|
|
|
|
ret void
|
|
|
|
}
|
|
|
|
|
2009-11-27 19:11:31 +00:00
|
|
|
;void test7(int N, double* G) {
|
|
|
|
; long j;
|
|
|
|
; G[1] = 1;
|
|
|
|
; for (j = 1; j < N - 1; j++)
|
|
|
|
; G[j+1] = G[j] + G[j+1];
|
|
|
|
;}
|
|
|
|
|
|
|
|
; This requires phi translation of the adds.
|
|
|
|
define void @test7(i32 %N, double* nocapture %G) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%0 = getelementptr inbounds double* %G, i64 1
|
|
|
|
store double 1.000000e+00, double* %0, align 8
|
|
|
|
%1 = add i32 %N, -1
|
|
|
|
%2 = icmp sgt i32 %1, 1
|
|
|
|
br i1 %2, label %bb.nph, label %return
|
|
|
|
|
|
|
|
bb.nph:
|
|
|
|
%tmp = sext i32 %1 to i64
|
|
|
|
%tmp7 = add i64 %tmp, -1
|
|
|
|
br label %bb
|
|
|
|
|
|
|
|
bb:
|
|
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
|
|
|
|
%tmp8 = add i64 %indvar, 2
|
|
|
|
%scevgep = getelementptr double* %G, i64 %tmp8
|
|
|
|
%tmp9 = add i64 %indvar, 1
|
|
|
|
%scevgep10 = getelementptr double* %G, i64 %tmp9
|
|
|
|
%3 = load double* %scevgep10, align 8
|
|
|
|
%4 = load double* %scevgep, align 8
|
|
|
|
%5 = fadd double %3, %4
|
|
|
|
store double %5, double* %scevgep, align 8
|
|
|
|
%exitcond = icmp eq i64 %tmp9, %tmp7
|
|
|
|
br i1 %exitcond, label %return, label %bb
|
|
|
|
|
|
|
|
; Should only be one load in the loop.
|
|
|
|
; CHECK: bb:
|
|
|
|
; CHECK: load double*
|
|
|
|
; CHECK-NOT: load double*
|
|
|
|
; CHECK: br i1 %exitcond
|
|
|
|
|
|
|
|
return:
|
|
|
|
ret void
|
|
|
|
}
|
2009-11-27 18:08:30 +00:00
|
|
|
|
2009-11-27 22:50:07 +00:00
|
|
|
;; Here the loaded address isn't available in 'block2' at all, requiring a new
|
|
|
|
;; GEP to be inserted into it.
|
|
|
|
define i32 @test8(i32* %p, i32* %q, i32** %Hack, i1 %C) {
|
|
|
|
; CHECK: @test8
|
2009-11-27 18:08:30 +00:00
|
|
|
block1:
|
|
|
|
br i1 %C, label %block2, label %block3
|
|
|
|
|
|
|
|
block2:
|
|
|
|
br label %block4
|
2009-11-28 16:08:18 +00:00
|
|
|
; CHECK: block2:
|
|
|
|
; CHECK: load i32*
|
|
|
|
; CHECK: br label %block4
|
2009-11-27 18:08:30 +00:00
|
|
|
|
|
|
|
block3:
|
|
|
|
%A = getelementptr i32* %p, i32 1
|
|
|
|
store i32 0, i32* %A
|
|
|
|
br label %block4
|
|
|
|
|
|
|
|
block4:
|
|
|
|
%P2 = phi i32* [%p, %block3], [%q, %block2]
|
|
|
|
%P3 = getelementptr i32* %P2, i32 1
|
|
|
|
%PRE = load i32* %P3
|
|
|
|
ret i32 %PRE
|
2009-11-28 16:08:18 +00:00
|
|
|
; CHECK: block4:
|
|
|
|
; CHECK-NEXT: phi i32 [
|
|
|
|
; CHECK-NOT: load
|
|
|
|
; CHECK: ret i32
|
2009-11-27 18:08:30 +00:00
|
|
|
}
|
|
|
|
|
2009-11-29 01:04:40 +00:00
|
|
|
;void test9(int N, double* G) {
|
|
|
|
; long j;
|
|
|
|
; for (j = 1; j < N - 1; j++)
|
|
|
|
; G[j+1] = G[j] + G[j+1];
|
|
|
|
;}
|
|
|
|
|
|
|
|
; This requires phi translation of the adds.
|
|
|
|
define void @test9(i32 %N, double* nocapture %G) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
add i32 0, 0
|
|
|
|
%1 = add i32 %N, -1
|
|
|
|
%2 = icmp sgt i32 %1, 1
|
|
|
|
br i1 %2, label %bb.nph, label %return
|
|
|
|
|
|
|
|
bb.nph:
|
|
|
|
%tmp = sext i32 %1 to i64
|
|
|
|
%tmp7 = add i64 %tmp, -1
|
|
|
|
br label %bb
|
|
|
|
|
|
|
|
; CHECK: bb.nph:
|
|
|
|
; CHECK: load double*
|
|
|
|
; CHECK: br label %bb
|
|
|
|
|
|
|
|
bb:
|
|
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %tmp9, %bb ]
|
|
|
|
%tmp8 = add i64 %indvar, 2
|
|
|
|
%scevgep = getelementptr double* %G, i64 %tmp8
|
|
|
|
%tmp9 = add i64 %indvar, 1
|
|
|
|
%scevgep10 = getelementptr double* %G, i64 %tmp9
|
|
|
|
%3 = load double* %scevgep10, align 8
|
|
|
|
%4 = load double* %scevgep, align 8
|
|
|
|
%5 = fadd double %3, %4
|
|
|
|
store double %5, double* %scevgep, align 8
|
|
|
|
%exitcond = icmp eq i64 %tmp9, %tmp7
|
|
|
|
br i1 %exitcond, label %return, label %bb
|
|
|
|
|
|
|
|
; Should only be one load in the loop.
|
|
|
|
; CHECK: bb:
|
|
|
|
; CHECK: load double*
|
|
|
|
; CHECK-NOT: load double*
|
|
|
|
; CHECK: br i1 %exitcond
|
|
|
|
|
|
|
|
return:
|
|
|
|
ret void
|
|
|
|
}
|
2009-11-27 18:08:30 +00:00
|
|
|
|
Add a testcase for:
void test(int N, double* G) {
long j;
for (j = 1; j < N - 1; j++)
G[j] = G[j] + G[j+1] + G[j-1];
}
which we now compile to one load in the loop:
LBB1_2: ## %bb
movsd 16(%rsi,%rax,8), %xmm2
incq %rdx
addsd %xmm2, %xmm1
addsd %xmm1, %xmm0
movapd %xmm2, %xmm1
movsd %xmm0, 8(%rsi,%rax,8)
incq %rax
cmpq %rcx, %rax
jne LBB1_2
instead of:
LBB1_2: ## %bb
movsd 8(%rsi,%rax,8), %xmm0
addsd 16(%rsi,%rax,8), %xmm0
addsd (%rsi,%rax,8), %xmm0
movsd %xmm0, 8(%rsi,%rax,8)
incq %rax
cmpq %rcx, %rax
jne LBB1_2
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@90048 91177308-0d34-0410-b5e6-96231b3b80d8
2009-11-29 01:15:43 +00:00
|
|
|
;void test10(int N, double* G) {
|
|
|
|
; long j;
|
|
|
|
; for (j = 1; j < N - 1; j++)
|
|
|
|
; G[j] = G[j] + G[j+1] + G[j-1];
|
|
|
|
;}
|
|
|
|
|
2009-11-29 01:28:58 +00:00
|
|
|
; PR5501
|
Add a testcase for:
void test(int N, double* G) {
long j;
for (j = 1; j < N - 1; j++)
G[j] = G[j] + G[j+1] + G[j-1];
}
which we now compile to one load in the loop:
LBB1_2: ## %bb
movsd 16(%rsi,%rax,8), %xmm2
incq %rdx
addsd %xmm2, %xmm1
addsd %xmm1, %xmm0
movapd %xmm2, %xmm1
movsd %xmm0, 8(%rsi,%rax,8)
incq %rax
cmpq %rcx, %rax
jne LBB1_2
instead of:
LBB1_2: ## %bb
movsd 8(%rsi,%rax,8), %xmm0
addsd 16(%rsi,%rax,8), %xmm0
addsd (%rsi,%rax,8), %xmm0
movsd %xmm0, 8(%rsi,%rax,8)
incq %rax
cmpq %rcx, %rax
jne LBB1_2
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@90048 91177308-0d34-0410-b5e6-96231b3b80d8
2009-11-29 01:15:43 +00:00
|
|
|
define void @test10(i32 %N, double* nocapture %G) nounwind ssp {
|
|
|
|
entry:
|
|
|
|
%0 = add i32 %N, -1
|
|
|
|
%1 = icmp sgt i32 %0, 1
|
|
|
|
br i1 %1, label %bb.nph, label %return
|
|
|
|
|
|
|
|
bb.nph:
|
|
|
|
%tmp = sext i32 %0 to i64
|
|
|
|
%tmp8 = add i64 %tmp, -1
|
|
|
|
br label %bb
|
|
|
|
; CHECK: bb.nph:
|
|
|
|
; CHECK: load double*
|
|
|
|
; CHECK: load double*
|
|
|
|
; CHECK: br label %bb
|
|
|
|
|
|
|
|
|
|
|
|
bb:
|
|
|
|
%indvar = phi i64 [ 0, %bb.nph ], [ %tmp11, %bb ]
|
|
|
|
%scevgep = getelementptr double* %G, i64 %indvar
|
|
|
|
%tmp9 = add i64 %indvar, 2
|
|
|
|
%scevgep10 = getelementptr double* %G, i64 %tmp9
|
|
|
|
%tmp11 = add i64 %indvar, 1
|
|
|
|
%scevgep12 = getelementptr double* %G, i64 %tmp11
|
|
|
|
%2 = load double* %scevgep12, align 8
|
|
|
|
%3 = load double* %scevgep10, align 8
|
|
|
|
%4 = fadd double %2, %3
|
|
|
|
%5 = load double* %scevgep, align 8
|
|
|
|
%6 = fadd double %4, %5
|
|
|
|
store double %6, double* %scevgep12, align 8
|
|
|
|
%exitcond = icmp eq i64 %tmp11, %tmp8
|
|
|
|
br i1 %exitcond, label %return, label %bb
|
|
|
|
|
|
|
|
; Should only be one load in the loop.
|
|
|
|
; CHECK: bb:
|
|
|
|
; CHECK: load double*
|
|
|
|
; CHECK-NOT: load double*
|
|
|
|
; CHECK: br i1 %exitcond
|
|
|
|
|
|
|
|
return:
|
|
|
|
ret void
|
|
|
|
}
|
2010-02-16 20:48:55 +00:00
|
|
|
|
|
|
|
; Test critical edge splitting.
|
|
|
|
define i32 @test11(i32* %p, i1 %C, i32 %N) {
|
|
|
|
; CHECK: @test11
|
|
|
|
block1:
|
|
|
|
br i1 %C, label %block2, label %block3
|
|
|
|
|
|
|
|
block2:
|
|
|
|
%cond = icmp sgt i32 %N, 1
|
|
|
|
br i1 %cond, label %block4, label %block5
|
|
|
|
; CHECK: load i32* %p
|
|
|
|
; CHECK-NEXT: br label %block4
|
|
|
|
|
|
|
|
block3:
|
|
|
|
store i32 0, i32* %p
|
|
|
|
br label %block4
|
|
|
|
|
|
|
|
block4:
|
|
|
|
%PRE = load i32* %p
|
|
|
|
br label %block5
|
|
|
|
|
|
|
|
block5:
|
|
|
|
%ret = phi i32 [ 0, %block2 ], [ %PRE, %block4 ]
|
|
|
|
ret i32 %ret
|
|
|
|
; CHECK: block4:
|
|
|
|
; CHECK-NEXT: phi i32
|
|
|
|
}
|