llvm-6502/test/CodeGen/X86/lsr-reuse.ll
Dan Gohman a10756ee65 Re-implement the main strength-reduction portion of LoopStrengthReduction.
This new version is much more aggressive about doing "full" reduction in
cases where it reduces register pressure, and also more aggressive about
rewriting induction variables to count down (or up) to zero when doing so
reduces register pressure.

It currently uses fairly simplistic algorithms for finding reuse
opportunities, but it introduces a new framework allows it to combine
multiple strategies at once to form hybrid solutions, instead of doing
all full-reduction or all base+index.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@94061 91177308-0d34-0410-b5e6-96231b3b80d8
2010-01-21 02:09:26 +00:00

160 lines
4.7 KiB
LLVM

; RUN: llc < %s -march=x86-64 | FileCheck %s
target datalayout = "e-p:64:64:64"
target triple = "x86_64-unknown-unknown"
; Full strength reduction reduces register pressure from 5 to 4 here.
; CHECK: full_me:
; CHECK: movsd (%rsi), %xmm0
; CHECK: mulsd (%rdx), %xmm0
; CHECK: movsd %xmm0, (%rdi)
; CHECK: addq $8, %rsi
; CHECK: addq $8, %rdx
; CHECK: addq $8, %rdi
; CHECK: decq %rcx
; CHECK: jne
define void @full_me(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
entry:
%t0 = icmp sgt i64 %n, 0
br i1 %t0, label %loop, label %return
loop:
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
%Ai = getelementptr inbounds double* %A, i64 %i
%Bi = getelementptr inbounds double* %B, i64 %i
%Ci = getelementptr inbounds double* %C, i64 %i
%t1 = load double* %Bi
%t2 = load double* %Ci
%m = fmul double %t1, %t2
store double %m, double* %Ai
%i.next = add nsw i64 %i, 1
%exitcond = icmp eq i64 %i.next, %n
br i1 %exitcond, label %return, label %loop
return:
ret void
}
; In this test, the counting IV exit value is used, so full strength reduction
; would not reduce register pressure. IndVarSimplify ought to simplify such
; cases away, but it's useful here to verify that LSR's register pressure
; heuristics are working as expected.
; CHECK: count_me_0:
; CHECK: movsd (%rsi,%rax,8), %xmm0
; CHECK: mulsd (%rdx,%rax,8), %xmm0
; CHECK: movsd %xmm0, (%rdi,%rax,8)
; CHECK: incq %rax
; CHECK: cmpq %rax, %rcx
; CHECK: jne
define i64 @count_me_0(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
entry:
%t0 = icmp sgt i64 %n, 0
br i1 %t0, label %loop, label %return
loop:
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
%Ai = getelementptr inbounds double* %A, i64 %i
%Bi = getelementptr inbounds double* %B, i64 %i
%Ci = getelementptr inbounds double* %C, i64 %i
%t1 = load double* %Bi
%t2 = load double* %Ci
%m = fmul double %t1, %t2
store double %m, double* %Ai
%i.next = add nsw i64 %i, 1
%exitcond = icmp eq i64 %i.next, %n
br i1 %exitcond, label %return, label %loop
return:
%q = phi i64 [ 0, %entry ], [ %i.next, %loop ]
ret i64 %q
}
; In this test, the trip count value is used, so full strength reduction
; would not reduce register pressure.
; (though it would reduce register pressure inside the loop...)
; CHECK: count_me_1:
; CHECK: movsd (%rsi,%rax,8), %xmm0
; CHECK: mulsd (%rdx,%rax,8), %xmm0
; CHECK: movsd %xmm0, (%rdi,%rax,8)
; CHECK: incq %rax
; CHECK: cmpq %rax, %rcx
; CHECK: jne
define i64 @count_me_1(double* nocapture %A, double* nocapture %B, double* nocapture %C, i64 %n) nounwind {
entry:
%t0 = icmp sgt i64 %n, 0
br i1 %t0, label %loop, label %return
loop:
%i = phi i64 [ %i.next, %loop ], [ 0, %entry ]
%Ai = getelementptr inbounds double* %A, i64 %i
%Bi = getelementptr inbounds double* %B, i64 %i
%Ci = getelementptr inbounds double* %C, i64 %i
%t1 = load double* %Bi
%t2 = load double* %Ci
%m = fmul double %t1, %t2
store double %m, double* %Ai
%i.next = add nsw i64 %i, 1
%exitcond = icmp eq i64 %i.next, %n
br i1 %exitcond, label %return, label %loop
return:
%q = phi i64 [ 0, %entry ], [ %n, %loop ]
ret i64 %q
}
; This should be fully strength-reduced to reduce register pressure, however
; the current heuristics get distracted by all the reuse with the stride-1
; induction variable first.
; But even so, be clever and start the stride-1 variable at a non-zero value
; to eliminate an in-loop immediate value.
; CHECK: count_me_2:
; CHECK: movl $5, %eax
; CHECK: align
; CHECK: BB4_1:
; CHECK: movsd (%rdi,%rax,8), %xmm0
; CHECK: addsd (%rsi,%rax,8), %xmm0
; CHECK: movsd %xmm0, (%rdx,%rax,8)
; CHECK: movsd 40(%rdi,%rax,8), %xmm0
; CHECK: addsd 40(%rsi,%rax,8), %xmm0
; CHECK: movsd %xmm0, 40(%rdx,%rax,8)
; CHECK: incq %rax
; CHECK: cmpq $5005, %rax
; CHECK: jne
define void @count_me_2(double* nocapture %A, double* nocapture %B, double* nocapture %C) nounwind {
entry:
br label %loop
loop:
%i = phi i64 [ 0, %entry ], [ %i.next, %loop ]
%i5 = add i64 %i, 5
%Ai = getelementptr double* %A, i64 %i5
%t2 = load double* %Ai
%Bi = getelementptr double* %B, i64 %i5
%t4 = load double* %Bi
%t5 = fadd double %t2, %t4
%Ci = getelementptr double* %C, i64 %i5
store double %t5, double* %Ci
%i10 = add i64 %i, 10
%Ai10 = getelementptr double* %A, i64 %i10
%t9 = load double* %Ai10
%Bi10 = getelementptr double* %B, i64 %i10
%t11 = load double* %Bi10
%t12 = fadd double %t9, %t11
%Ci10 = getelementptr double* %C, i64 %i10
store double %t12, double* %Ci10
%i.next = add i64 %i, 1
%exitcond = icmp eq i64 %i.next, 5000
br i1 %exitcond, label %return, label %loop
return:
ret void
}