Use AA in LoadCombine

LoadCombine can be smarter about aborting when a writing instruction is
encountered, instead of aborting upon encountering any writing instruction, use
an AliasSetTracker, and only abort when encountering some write that might
alias with the loads that could potentially be combined.

This was originally motivated by comments made (and a test case provided) by
David Majnemer in response to PR21448. It turned out that LoadCombine was not
responsible for that PR, but LoadCombine should also be improved so that
unrelated stores (and @llvm.assume) don't interrupt load combining.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@221203 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel
2014-11-03 23:19:16 +00:00
parent 42090c633f
commit cd9acde7f3
5 changed files with 112 additions and 5 deletions

View File

@ -0,0 +1,44 @@
; RUN: opt -basicaa -load-combine -instcombine -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
declare void @llvm.assume(i1) nounwind
; 'load' before the 'call' gets optimized:
define i64 @test1(i32* nocapture readonly %a, i1 %b) {
; CHECK-LABEL: @test1
; CHECK-DAG: load i64* %1, align 4
; CHECK-DAG: tail call void @llvm.assume(i1 %b)
; CHECK: ret i64
%load1 = load i32* %a, align 4
%conv = zext i32 %load1 to i64
%arrayidx1 = getelementptr inbounds i32* %a, i64 1
%load2 = load i32* %arrayidx1, align 4
tail call void @llvm.assume(i1 %b)
%conv2 = zext i32 %load2 to i64
%shl = shl nuw i64 %conv2, 32
%add = or i64 %shl, %conv
ret i64 %add
}
; 'call' before the 'load' doesn't get optimized:
define i64 @test2(i32* nocapture readonly %a, i1 %b) {
; CHECK-LABEL: @test2
; CHECK-DAG: load i64* %1, align 4
; CHECK-DAG: tail call void @llvm.assume(i1 %b)
; CHECK: ret i64
%load1 = load i32* %a, align 4
%conv = zext i32 %load1 to i64
%arrayidx1 = getelementptr inbounds i32* %a, i64 1
tail call void @llvm.assume(i1 %b)
%load2 = load i32* %arrayidx1, align 4
%conv2 = zext i32 %load2 to i64
%shl = shl nuw i64 %conv2, 32
%add = or i64 %shl, %conv
ret i64 %add
}