Enable exitValue rewrite only when the cost of expansion is low.

The patch evaluates the expansion cost of exitValue in indVarSimplify pass, and only does the rewriting when the expansion cost is low or loop can be deleted with the rewriting. It provides an option "-replexitval=" to control the default aggressiveness of the exitvalue rewriting. It also fixes some missing cases in SCEVExpander::isHighCostExpansionHelper to enhance the evaluation of SCEV expansion cost.

Differential Revision: http://reviews.llvm.org/D9800


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238507 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Wei Mi
2015-05-28 21:49:07 +00:00
parent 9886da621d
commit 61897e8564
5 changed files with 235 additions and 30 deletions

View File

@@ -0,0 +1,52 @@
; PR23538
; RUN: opt < %s -indvars -loop-deletion -S | FileCheck %s
; Check IndVarSimplify should not replace exit value because or else
; udiv will be introduced by expand and the cost will be high.
;
; CHECK-LABEL: @_Z3fooPKcjj(
; CHECK-NOT: udiv
declare void @_Z3mixRjj(i32* dereferenceable(4), i32)
declare void @llvm.lifetime.start(i64, i8* nocapture)
declare void @llvm.lifetime.end(i64, i8* nocapture)
define i32 @_Z3fooPKcjj(i8* nocapture readonly %s, i32 %len, i32 %c) {
entry:
%a = alloca i32, align 4
%tmp = bitcast i32* %a to i8*
call void @llvm.lifetime.start(i64 4, i8* %tmp)
store i32 -1640531527, i32* %a, align 4
%cmp8 = icmp ugt i32 %len, 11
br i1 %cmp8, label %while.body.lr.ph, label %while.end
while.body.lr.ph: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body, %while.body.lr.ph
%keylen.010 = phi i32 [ %len, %while.body.lr.ph ], [ %sub, %while.body ]
%s.addr.09 = phi i8* [ %s, %while.body.lr.ph ], [ %add.ptr, %while.body ]
%tmp1 = bitcast i8* %s.addr.09 to i32*
%tmp2 = load i32, i32* %tmp1, align 4
%shl.i = shl i32 %tmp2, 1
%and.i = and i32 %shl.i, 16843008
%tmp3 = load i32, i32* %a, align 4
%sub.i = add i32 %tmp3, %tmp2
%add = sub i32 %sub.i, %and.i
store i32 %add, i32* %a, align 4
%add.ptr = getelementptr inbounds i8, i8* %s.addr.09, i64 12
%sub = add i32 %keylen.010, -12
%cmp = icmp ugt i32 %sub, 11
br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
while.cond.while.end_crit_edge: ; preds = %while.body
%sub.lcssa = phi i32 [ %sub, %while.body ]
br label %while.end
while.end: ; preds = %while.cond.while.end_crit_edge, %entry
%keylen.0.lcssa = phi i32 [ %sub.lcssa, %while.cond.while.end_crit_edge ], [ %len, %entry ]
call void @_Z3mixRjj(i32* dereferenceable(4) %a, i32 %keylen.0.lcssa)
%tmp4 = load i32, i32* %a, align 4
call void @llvm.lifetime.end(i64 4, i8* %tmp)
ret i32 %tmp4
}

View File

@@ -0,0 +1,24 @@
; RUN: opt < %s -indvars -loop-deletion -S |FileCheck %s
; Check IndVarSimplify should replace exit value even if the expansion cost
; is high because the loop can be deleted after the exit value rewrite.
;
; CHECK-LABEL: @_Z3fooPKcjj(
; CHECK: udiv
; CHECK: [[LABEL:^[a-zA-Z0-9_.]+]]:
; CHECK-NOT: br {{.*}} [[LABEL]]
define i32 @_Z3fooPKcjj(i8* nocapture readnone %s, i32 %len, i32 %c) #0 {
entry:
br label %while.cond
while.cond: ; preds = %while.cond, %entry
%klen.0 = phi i32 [ %len, %entry ], [ %sub, %while.cond ]
%cmp = icmp ugt i32 %klen.0, 11
%sub = add i32 %klen.0, -12
br i1 %cmp, label %while.cond, label %while.end
while.end: ; preds = %while.cond
%klen.0.lcssa = phi i32 [ %klen.0, %while.cond ]
ret i32 %klen.0.lcssa
}

View File

@@ -1,5 +1,4 @@
; RUN: opt < %s -indvars -S | FileCheck %s
;
; RUN: opt < %s -indvars -replexitval=always -S | FileCheck %s
; Make sure IndVars preserves LCSSA form, especially across loop nests.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"