llvm-6502/test/CodeGen/AArch64/combine-comparisons-by-cse.ll
Jiangning Liu b20b9bf9fd [AArch64] Add pass to enable additional comparison optimizations by CSE.
Patched by Sergey Dmitrouk.

This pass tries to make consecutive compares of values use same operands to
allow CSE pass to remove duplicated instructions. For this it analyzes
branches and adjusts comparisons with immediate values by converting:

GE -> GT
GT -> GE
LT -> LE
LE -> LT

and adjusting immediate values appropriately. It basically corrects two
immediate values towards each other to make them equal.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217220 91177308-0d34-0410-b5e6-96231b3b80d8
2014-09-05 02:55:24 +00:00

350 lines
11 KiB
LLVM

; RUN: llc < %s -march=aarch64 -mtriple=aarch64-linux-gnu | FileCheck %s
; marked as external to prevent possible optimizations
@a = external global i32
@b = external global i32
@c = external global i32
@d = external global i32
; (a > 10 && b == c) || (a >= 10 && b == d)
define i32 @combine_gt_ge_10() #0 {
; CHECK-LABEL: combine_gt_ge_10
; CHECK: cmp
; CHECK: b.le
; CHECK: ret
; CHECK-NOT: cmp
; CHECK: b.lt
entry:
%0 = load i32* @a, align 4
%cmp = icmp sgt i32 %0, 10
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
land.lhs.true: ; preds = %entry
%1 = load i32* @b, align 4
%2 = load i32* @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %land.lhs.true3
lor.lhs.false: ; preds = %entry
%cmp2 = icmp sgt i32 %0, 9
br i1 %cmp2, label %land.lhs.true3, label %if.end
land.lhs.true3: ; preds = %lor.lhs.false, %land.lhs.true
%3 = load i32* @b, align 4
%4 = load i32* @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end
if.end: ; preds = %land.lhs.true3, %lor.lhs.false
br label %return
return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
%retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
ret i32 %retval.0
}
; (a > 5 && b == c) || (a < 5 && b == d)
define i32 @combine_gt_lt_5() #0 {
; CHECK-LABEL: combine_gt_lt_5
; CHECK: cmp
; CHECK: b.le
; CHECK: ret
; CHECK-NOT: cmp
; CHECK: b.ge
entry:
%0 = load i32* @a, align 4
%cmp = icmp sgt i32 %0, 5
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
land.lhs.true: ; preds = %entry
%1 = load i32* @b, align 4
%2 = load i32* @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %if.end
lor.lhs.false: ; preds = %entry
%cmp2 = icmp slt i32 %0, 5
br i1 %cmp2, label %land.lhs.true3, label %if.end
land.lhs.true3: ; preds = %lor.lhs.false
%3 = load i32* @b, align 4
%4 = load i32* @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end
if.end: ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true
br label %return
return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
%retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
ret i32 %retval.0
}
; (a < 5 && b == c) || (a <= 5 && b == d)
define i32 @combine_lt_ge_5() #0 {
; CHECK-LABEL: combine_lt_ge_5
; CHECK: cmp
; CHECK: b.ge
; CHECK: ret
; CHECK-NOT: cmp
; CHECK: b.gt
entry:
%0 = load i32* @a, align 4
%cmp = icmp slt i32 %0, 5
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
land.lhs.true: ; preds = %entry
%1 = load i32* @b, align 4
%2 = load i32* @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %land.lhs.true3
lor.lhs.false: ; preds = %entry
%cmp2 = icmp slt i32 %0, 6
br i1 %cmp2, label %land.lhs.true3, label %if.end
land.lhs.true3: ; preds = %lor.lhs.false, %land.lhs.true
%3 = load i32* @b, align 4
%4 = load i32* @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end
if.end: ; preds = %land.lhs.true3, %lor.lhs.false
br label %return
return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
%retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
ret i32 %retval.0
}
; (a < 5 && b == c) || (a > 5 && b == d)
define i32 @combine_lt_gt_5() #0 {
; CHECK-LABEL: combine_lt_gt_5
; CHECK: cmp
; CHECK: b.ge
; CHECK: ret
; CHECK-NOT: cmp
; CHECK: b.le
entry:
%0 = load i32* @a, align 4
%cmp = icmp slt i32 %0, 5
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
land.lhs.true: ; preds = %entry
%1 = load i32* @b, align 4
%2 = load i32* @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %if.end
lor.lhs.false: ; preds = %entry
%cmp2 = icmp sgt i32 %0, 5
br i1 %cmp2, label %land.lhs.true3, label %if.end
land.lhs.true3: ; preds = %lor.lhs.false
%3 = load i32* @b, align 4
%4 = load i32* @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end
if.end: ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true
br label %return
return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
%retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
ret i32 %retval.0
}
; (a > -5 && b == c) || (a < -5 && b == d)
define i32 @combine_gt_lt_n5() #0 {
; CHECK-LABEL: combine_gt_lt_n5
; CHECK: cmn
; CHECK: b.le
; CHECK: ret
; CHECK-NOT: cmn
; CHECK: b.ge
entry:
%0 = load i32* @a, align 4
%cmp = icmp sgt i32 %0, -5
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
land.lhs.true: ; preds = %entry
%1 = load i32* @b, align 4
%2 = load i32* @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %if.end
lor.lhs.false: ; preds = %entry
%cmp2 = icmp slt i32 %0, -5
br i1 %cmp2, label %land.lhs.true3, label %if.end
land.lhs.true3: ; preds = %lor.lhs.false
%3 = load i32* @b, align 4
%4 = load i32* @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end
if.end: ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true
br label %return
return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
%retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
ret i32 %retval.0
}
; (a < -5 && b == c) || (a > -5 && b == d)
define i32 @combine_lt_gt_n5() #0 {
; CHECK-LABEL: combine_lt_gt_n5
; CHECK: cmn
; CHECK: b.ge
; CHECK: ret
; CHECK-NOT: cmn
; CHECK: b.le
entry:
%0 = load i32* @a, align 4
%cmp = icmp slt i32 %0, -5
br i1 %cmp, label %land.lhs.true, label %lor.lhs.false
land.lhs.true: ; preds = %entry
%1 = load i32* @b, align 4
%2 = load i32* @c, align 4
%cmp1 = icmp eq i32 %1, %2
br i1 %cmp1, label %return, label %if.end
lor.lhs.false: ; preds = %entry
%cmp2 = icmp sgt i32 %0, -5
br i1 %cmp2, label %land.lhs.true3, label %if.end
land.lhs.true3: ; preds = %lor.lhs.false
%3 = load i32* @b, align 4
%4 = load i32* @d, align 4
%cmp4 = icmp eq i32 %3, %4
br i1 %cmp4, label %return, label %if.end
if.end: ; preds = %land.lhs.true3, %lor.lhs.false, %land.lhs.true
br label %return
return: ; preds = %if.end, %land.lhs.true3, %land.lhs.true
%retval.0 = phi i32 [ 0, %if.end ], [ 1, %land.lhs.true3 ], [ 1, %land.lhs.true ]
ret i32 %retval.0
}
%struct.Struct = type { i64, i64 }
@glob = internal unnamed_addr global %struct.Struct* null, align 8
declare %struct.Struct* @Update(%struct.Struct*) #1
; no checks for this case, it just should be processed without errors
define void @combine_non_adjacent_cmp_br(%struct.Struct* nocapture readonly %hdCall) #0 {
entry:
%size = getelementptr inbounds %struct.Struct* %hdCall, i64 0, i32 0
%0 = load i64* %size, align 8
br label %land.rhs
land.rhs:
%rp.06 = phi i64 [ %0, %entry ], [ %sub, %while.body ]
%1 = load i64* inttoptr (i64 24 to i64*), align 8
%cmp2 = icmp sgt i64 %1, 0
br i1 %cmp2, label %while.body, label %while.end
while.body:
%2 = load %struct.Struct** @glob, align 8
%call = tail call %struct.Struct* @Update(%struct.Struct* %2) #2
%sub = add nsw i64 %rp.06, -2
%cmp = icmp slt i64 %0, %rp.06
br i1 %cmp, label %land.rhs, label %while.end
while.end:
ret void
}
; undefined external to prevent possible optimizations
declare void @do_something() #1
define i32 @do_nothing_if_resultant_opcodes_would_differ() #0 {
; CHECK-LABEL: do_nothing_if_resultant_opcodes_would_differ
; CHECK: cmn
; CHECK: b.gt
; CHECK: cmp
; CHECK: b.gt
entry:
%0 = load i32* @a, align 4
%cmp4 = icmp slt i32 %0, -1
br i1 %cmp4, label %while.body.preheader, label %while.end
while.body.preheader: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body, %while.body.preheader
%i.05 = phi i32 [ %inc, %while.body ], [ %0, %while.body.preheader ]
tail call void @do_something() #2
%inc = add nsw i32 %i.05, 1
%cmp = icmp slt i32 %i.05, 0
br i1 %cmp, label %while.body, label %while.cond.while.end_crit_edge
while.cond.while.end_crit_edge: ; preds = %while.body
%.pre = load i32* @a, align 4
br label %while.end
while.end: ; preds = %while.cond.while.end_crit_edge, %entry
%1 = phi i32 [ %.pre, %while.cond.while.end_crit_edge ], [ %0, %entry ]
%cmp1 = icmp slt i32 %1, 2
br i1 %cmp1, label %land.lhs.true, label %if.end
land.lhs.true: ; preds = %while.end
%2 = load i32* @b, align 4
%3 = load i32* @d, align 4
%cmp2 = icmp eq i32 %2, %3
br i1 %cmp2, label %return, label %if.end
if.end: ; preds = %land.lhs.true, %while.end
br label %return
return: ; preds = %if.end, %land.lhs.true
%retval.0 = phi i32 [ 0, %if.end ], [ 123, %land.lhs.true ]
ret i32 %retval.0
}
define i32 @do_nothing_if_compares_can_not_be_adjusted_to_each_other() #0 {
; CHECK-LABEL: do_nothing_if_compares_can_not_be_adjusted_to_each_other
; CHECK: cmp
; CHECK: b.gt
; CHECK: cmn
; CHECK: b.lt
entry:
%0 = load i32* @a, align 4
%cmp4 = icmp slt i32 %0, 1
br i1 %cmp4, label %while.body.preheader, label %while.end
while.body.preheader: ; preds = %entry
br label %while.body
while.body: ; preds = %while.body, %while.body.preheader
%i.05 = phi i32 [ %inc, %while.body ], [ %0, %while.body.preheader ]
tail call void @do_something() #2
%inc = add nsw i32 %i.05, 1
%cmp = icmp slt i32 %i.05, 0
br i1 %cmp, label %while.body, label %while.end.loopexit
while.end.loopexit: ; preds = %while.body
br label %while.end
while.end: ; preds = %while.end.loopexit, %entry
%1 = load i32* @c, align 4
%cmp1 = icmp sgt i32 %1, -3
br i1 %cmp1, label %land.lhs.true, label %if.end
land.lhs.true: ; preds = %while.end
%2 = load i32* @b, align 4
%3 = load i32* @d, align 4
%cmp2 = icmp eq i32 %2, %3
br i1 %cmp2, label %return, label %if.end
if.end: ; preds = %land.lhs.true, %while.end
br label %return
return: ; preds = %if.end, %land.lhs.true
%retval.0 = phi i32 [ 0, %if.end ], [ 123, %land.lhs.true ]
ret i32 %retval.0
}