llvm-6502/test/Transforms/SimplifyCFG/speculate-store.ll
Arnold Schwaighofer 8228ffe72d SimplifyCFG: If convert single conditional stores
This resurrects r179957, but adds code that makes sure we don't touch
atomic/volatile stores:

This transformation will transform a conditional store with a preceeding
uncondtional store to the same location:

 a[i] =
 may-alias with a[i] load
 if (cond)
   a[i] = Y

into an unconditional store.

 a[i] = X
 may-alias with a[i] load
 tmp = cond ? Y : X;
 a[i] = tmp

We assume that on average the cost of a mispredicted branch is going to be
higher than the cost of a second store to the same location, and that the
secondary benefits of creating a bigger basic block for other optimizations to
work on outway the potential case where the branch would be correctly predicted
and the cost of the executing the second store would be noticably reflected in
performance.

hmmer's execution time improves by 30% on an imac12,2 on ref data sets. With
this change we are on par with gcc's performance (gcc also performs this
transformation). There was a 1.2 % performance improvement on a ARM swift chip.
Other tests in the test-suite+external seem to be mostly uninfluenced in my
experiments:
This optimization was triggered on 41 tests such that the executable was
different before/after the patch. Only 1 out of the 40 tests (dealII) was
reproducable below 100% (by about .4%). Given that hmmer benefits so much I
believe this to be a fair trade off.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@180731 91177308-0d34-0410-b5e6-96231b3b80d8
2013-04-29 21:28:24 +00:00

109 lines
3.0 KiB
LLVM

; RUN: opt -simplifycfg -S < %s | FileCheck %s
define void @ifconvertstore(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
entry:
%arrayidx = getelementptr inbounds i32* %B, i64 0
%0 = load i32* %arrayidx, align 4
%add = add nsw i32 %0, %C
%arrayidx2 = getelementptr inbounds i32* %A, i64 0
; First store to the location.
store i32 %add, i32* %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds i32* %B, i64 1
%1 = load i32* %arrayidx4, align 4
%add5 = add nsw i32 %1, %D
%cmp6 = icmp sgt i32 %add5, %C
br i1 %cmp6, label %if.then, label %ret.end
; Make sure we speculate stores like the following one. It is cheap compared to
; a mispredicated branch.
; CHECK: @ifconvertstore
; CHECK: %add5.add = select i1 %cmp6, i32 %add5, i32 %add
; CHECK: store i32 %add5.add, i32* %arrayidx2, align 4
if.then:
store i32 %add5, i32* %arrayidx2, align 4
br label %ret.end
ret.end:
ret void
}
define void @noifconvertstore1(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
entry:
%arrayidx = getelementptr inbounds i32* %B, i64 0
%0 = load i32* %arrayidx, align 4
%add = add nsw i32 %0, %C
%arrayidx2 = getelementptr inbounds i32* %A, i64 0
; Store to a different location.
store i32 %add, i32* %arrayidx, align 4
%arrayidx4 = getelementptr inbounds i32* %B, i64 1
%1 = load i32* %arrayidx4, align 4
%add5 = add nsw i32 %1, %D
%cmp6 = icmp sgt i32 %add5, %C
br i1 %cmp6, label %if.then, label %ret.end
; CHECK: @noifconvertstore1
; CHECK-NOT: select
if.then:
store i32 %add5, i32* %arrayidx2, align 4
br label %ret.end
ret.end:
ret void
}
declare void @unknown_fun()
define void @noifconvertstore2(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
entry:
%arrayidx = getelementptr inbounds i32* %B, i64 0
%0 = load i32* %arrayidx, align 4
%add = add nsw i32 %0, %C
%arrayidx2 = getelementptr inbounds i32* %A, i64 0
; First store to the location.
store i32 %add, i32* %arrayidx2, align 4
call void @unknown_fun()
%arrayidx4 = getelementptr inbounds i32* %B, i64 1
%1 = load i32* %arrayidx4, align 4
%add5 = add nsw i32 %1, %D
%cmp6 = icmp sgt i32 %add5, %C
br i1 %cmp6, label %if.then, label %ret.end
; CHECK: @noifconvertstore2
; CHECK-NOT: select
if.then:
store i32 %add5, i32* %arrayidx2, align 4
br label %ret.end
ret.end:
ret void
}
define void @noifconvertstore_volatile(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
entry:
%arrayidx = getelementptr inbounds i32* %B, i64 0
%0 = load i32* %arrayidx, align 4
%add = add nsw i32 %0, %C
%arrayidx2 = getelementptr inbounds i32* %A, i64 0
; First store to the location.
store i32 %add, i32* %arrayidx2, align 4
%arrayidx4 = getelementptr inbounds i32* %B, i64 1
%1 = load i32* %arrayidx4, align 4
%add5 = add nsw i32 %1, %D
%cmp6 = icmp sgt i32 %add5, %C
br i1 %cmp6, label %if.then, label %ret.end
; Make sure we don't speculate volatile stores.
; CHECK: @noifconvertstore_volatile
; CHECK-NOT: select
if.then:
store volatile i32 %add5, i32* %arrayidx2, align 4
br label %ret.end
ret.end:
ret void
}