[AArch64] Enable partial & runtime unrolling on cortex-a57

For inner one of nested loops, it is more likely to be a hot loop,
and the runtime check can be promoted out from patch 0001, so the
overhead is less, we can try a doubled threshold to unroll more loops.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@231632 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Kevin Qin 2015-03-09 06:14:28 +00:00
parent 994b4c784f
commit 40e66277f7
4 changed files with 122 additions and 0 deletions

View File

@ -10,6 +10,7 @@
#include "AArch64TargetTransformInfo.h"
#include "MCTargetDesc/AArch64AddressingModes.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/CodeGen/BasicTTIImpl.h"
#include "llvm/Support/Debug.h"
#include "llvm/Target/CostTable.h"
@ -426,6 +427,15 @@ unsigned AArch64TTIImpl::getMaxInterleaveFactor() {
void AArch64TTIImpl::getUnrollingPreferences(Loop *L,
TTI::UnrollingPreferences &UP) {
// Enable partial unrolling and runtime unrolling.
BaseT::getUnrollingPreferences(L, UP);
// For inner loop, it is more likely to be a hot one, and the runtime check
// can be promoted out from LICM pass, so the overhead is less, let's try
// a larger threshold to unroll more loops.
if (L->getLoopDepth() > 1)
UP.PartialThreshold *= 2;
// Disable partial & runtime unrolling on -Os.
UP.PartialOptSizeThreshold = 0;
}

View File

@ -0,0 +1,3 @@
if not 'AArch64' in config.root.targets:
config.unsupported = True

View File

@ -0,0 +1,76 @@
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s
; Partial unroll 8 times for this loop.
define void @unroll1() nounwind {
entry:
br label %loop
loop:
%iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
%inc = add i32 %iv, 1
%exitcnd = icmp uge i32 %inc, 1024
br i1 %exitcnd, label %exit, label %loop
exit:
ret void
}
; CHECK: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: icmp
; Partial unroll 16 times for this loop.
define void @unroll2() nounwind {
entry:
br label %loop1
loop1:
%iv1 = phi i32 [ 0, %entry ], [ %inc1, %loop1.latch ]
br label %loop2.header
loop2.header:
br label %loop2
loop2:
%iv2 = phi i32 [ 0, %loop2.header ], [ %inc2, %loop2 ]
%inc2 = add i32 %iv2, 1
%exitcnd2 = icmp uge i32 %inc2, 1024
br i1 %exitcnd2, label %exit2, label %loop2
exit2:
br label %loop1.latch
loop1.latch:
%inc1 = add i32 %iv1, 1
%exitcnd1 = icmp uge i32 %inc1, 1024
br i1 %exitcnd2, label %exit, label %loop1
exit:
ret void
}
; CHECK: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: add
; CHECK-NEXT: icmp

View File

@ -0,0 +1,33 @@
; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s
; Tests for unrolling loops with run-time trip counts
; CHECK: %xtraiter = and i32 %n
; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
; CHECK: for.body.prol:
; CHECK: for.body:
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
%cmp1 = icmp eq i32 %n, 0
br i1 %cmp1, label %for.end, label %for.body
for.body: ; preds = %for.body, %entry
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%sum.02 = phi i32 [ %add, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
%0 = load i32, i32* %arrayidx, align 4
%add = add nsw i32 %0, %sum.02
%indvars.iv.next = add i64 %indvars.iv, 1
%lftr.wideiv = trunc i64 %indvars.iv.next to i32
%exitcond = icmp eq i32 %lftr.wideiv, %n
br i1 %exitcond, label %for.end, label %for.body
for.end: ; preds = %for.body, %entry
%sum.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
ret i32 %sum.0.lcssa
}