The memcpy optimizer was happily doing call slot forwarding when the new memory

was less aligned than the old.  In the testcase this results in an overaligned
memset: the memset alignment was correct for the original memory but is too much
for the new memory.  Fix this by either increasing the alignment of the new
memory or bailing out if that isn't possible.  Should fix the gcc-4.7 self-host
buildbot failure.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@165220 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Duncan Sands
2012-10-04 10:54:40 +00:00
parent aa3cb334af
commit f58747517c
2 changed files with 51 additions and 7 deletions
+21 -3
View File
@@ -1,12 +1,15 @@
; RUN: opt < %s -S -memcpyopt | FileCheck %s
; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
; The resulting memset is only 4-byte aligned, despite containing
; a 16-byte aligned store in the middle.
; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false)
define void @foo(i32* %p) {
; CHECK: @foo
; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false)
%a0 = getelementptr i32* %p, i64 0
store i32 0, i32* %a0, align 4
%a1 = getelementptr i32* %p, i64 1
@@ -17,3 +20,18 @@ define void @foo(i32* %p) {
store i32 0, i32* %a3, align 4
ret void
}
; Replacing %a8 with %a4 in the memset requires boosting the alignment of %a4.
define void @bar() {
; CHECK: @bar
; CHECK: %a4 = alloca i32, align 8
; CHECK-NOT: memcpy
%a4 = alloca i32, align 4
%a8 = alloca i32, align 8
%a8.cast = bitcast i32* %a8 to i8*
%a4.cast = bitcast i32* %a4 to i8*
call void @llvm.memset.p0i8.i64(i8* %a8.cast, i8 0, i64 4, i32 8, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a4.cast, i8* %a8.cast, i64 4, i32 4, i1 false)
ret void
}