diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index f871b5a7701..ddec78bfff3 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1063,12 +1063,8 @@ X86TargetLowering::getOptimalMemOpType(uint64_t Size, // linux. This is because the stack realignment code can't handle certain // cases like PR2962. This should be removed when PR2962 is fixed. const Function *F = MF.getFunction(); - if (NonScalarIntSafe && - !F->hasFnAttr(Attribute::NoImplicitFloat)) { + if (NonScalarIntSafe && !F->hasFnAttr(Attribute::NoImplicitFloat)) { if (Size >= 16 && - (Subtarget->isUnalignedMemAccessFast() || - ((DstAlign == 0 || DstAlign >= 16) && - (SrcAlign == 0 || SrcAlign >= 16))) && Subtarget->getStackAlignment() >= 16) { if (Subtarget->hasSSE2()) return MVT::v4i32; diff --git a/test/CodeGen/X86/2010-04-08-CoalescerBug.ll b/test/CodeGen/X86/2010-04-08-CoalescerBug.ll index 1c7c28c68e9..09b7711db5d 100644 --- a/test/CodeGen/X86/2010-04-08-CoalescerBug.ll +++ b/test/CodeGen/X86/2010-04-08-CoalescerBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s +; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s ; rdar://7842028 ; Do not delete partially dead copy instructions. @@ -9,7 +9,7 @@ %struct.F = type { %struct.FC*, i32, i32, i8, i32, i32, i32 } %struct.FC = type { [10 x i8], [32 x i32], %struct.FC*, i32 } -define void @t(%struct.F* %this) nounwind { +define void @t(%struct.F* %this) nounwind optsize { entry: ; CHECK: t: ; CHECK: addq $12, %rsi diff --git a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll index 6db3ce1f42c..f31cdad8614 100644 --- a/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll +++ b/test/CodeGen/X86/2010-06-25-CoalescerSubRegDefDead.ll @@ -26,7 +26,7 @@ bb: ; CHECK: rep;stosl %tmp5 = bitcast i32* %tmp4 to i8* - call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 84, i32 4, i1 false) + call void @llvm.memset.p0i8.i64(i8* %tmp5, i8 0, i64 124, i32 4, i1 false) %tmp6 = getelementptr inbounds %struct.type* %s, i32 0, i32 62 store i32* null, i32** %tmp6, align 8 br label %bb1 diff --git a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll index 8fe0309421e..66dc0eabac3 100644 --- a/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll +++ b/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll @@ -19,8 +19,8 @@ entry: } ; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax -; CHECK: movb 30(%rsp), %dl -; CHECK: movb (%rsp), %sil -; CHECK: movb %sil, (%rsp) -; CHECK: movb %dl, 30(%rsp) +; CHECK: movb 30(%rsp), %cl +; CHECK: movb (%rsp), %dl +; CHECK: movb %dl, (%rsp) +; CHECK: movb %cl, 30(%rsp) ; CHECK: callq ___stack_chk_fail diff --git a/test/CodeGen/X86/memcpy-2.ll b/test/CodeGen/X86/memcpy-2.ll index 17cd8e868a2..9078e4ba533 100644 --- a/test/CodeGen/X86/memcpy-2.ll +++ b/test/CodeGen/X86/memcpy-2.ll @@ -1,5 +1,4 @@ ; RUN: llc < %s -mattr=+sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE2 -; RUN: llc < %s -mattr=+sse,-sse2 -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=SSE1 ; RUN: llc < %s -mattr=-sse -mtriple=i686-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=NOSSE ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | FileCheck %s -check-prefix=X86-64 @@ -15,13 +14,6 @@ entry: ; SSE2: movl $0 ; SSE2: movl $0 -; SSE1: t1: -; SSE1: movaps _.str, %xmm0 -; SSE1: movaps %xmm0 -; SSE1: movb $0 -; SSE1: movl $0 -; SSE1: movl $0 - ; NOSSE: t1: ; NOSSE: movb $0 ; NOSSE: movl $0 @@ -51,10 +43,6 @@ entry: ; SSE2: movaps (%eax), %xmm0 ; SSE2: movaps %xmm0, (%eax) -; SSE1: t2: -; SSE1: movaps (%eax), %xmm0 -; SSE1: movaps %xmm0, (%eax) - ; NOSSE: t2: ; NOSSE: movl ; NOSSE: movl @@ -79,22 +67,8 @@ entry: define void @t3(%struct.s0* nocapture %a, %struct.s0* nocapture %b) nounwind ssp { entry: ; SSE2: t3: -; SSE2: movsd (%eax), %xmm0 -; SSE2: movsd 8(%eax), %xmm1 -; SSE2: movsd %xmm1, 8(%eax) -; SSE2: movsd %xmm0, (%eax) - -; SSE1: t3: -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl -; SSE1: movl +; SSE2: movups (%eax), %xmm0 +; SSE2: movups %xmm0, (%eax) ; NOSSE: t3: ; NOSSE: movl @@ -109,10 +83,8 @@ entry: ; NOSSE: movl ; X86-64: t3: -; X86-64: movq (%rsi), %rax -; X86-64: movq 8(%rsi), %rcx -; X86-64: movq %rcx, 8(%rdi) -; X86-64: movq %rax, (%rdi) +; X86-64: movups (%rsi), %xmm0 +; X86-64: movups %xmm0, (%rdi) %tmp2 = bitcast %struct.s0* %a to i8* ; [#uses=1] %tmp3 = bitcast %struct.s0* %b to i8* ; [#uses=1] tail call void @llvm.memcpy.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 8) @@ -122,24 +94,12 @@ entry: define void @t4() nounwind { entry: ; SSE2: t4: -; SSE2: movw $120 +; SSE2: movups _.str2, %xmm0 +; SSE2: movaps %xmm0, (%esp) +; SSE2: movw $120, 28(%esp) ; SSE2: movl $2021161080 ; SSE2: movl $2021161080 ; SSE2: movl $2021161080 -; SSE2: movl $2021161080 -; SSE2: movl $2021161080 -; SSE2: movl $2021161080 -; SSE2: movl $2021161080 - -; SSE1: t4: -; SSE1: movw $120 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 -; SSE1: movl $2021161080 ; NOSSE: t4: ; NOSSE: movw $120 @@ -154,8 +114,8 @@ entry: ; X86-64: t4: ; X86-64: movabsq $8680820740569200760, %rax ; X86-64: movq %rax -; X86-64: movq %rax -; X86-64: movq %rax +; X86-64: movups _.str2(%rip), %xmm0 +; X86-64: movaps %xmm0, -40(%rsp) ; X86-64: movw $120 ; X86-64: movl $2021161080 %tmp1 = alloca [30 x i8] diff --git a/test/CodeGen/X86/memcpy.ll b/test/CodeGen/X86/memcpy.ll index 72342cbacb4..4af93ad3682 100644 --- a/test/CodeGen/X86/memcpy.ll +++ b/test/CodeGen/X86/memcpy.ll @@ -37,26 +37,34 @@ entry: tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %A, i8* %B, i64 64, i32 1, i1 false) ret void ; LINUX: test3: -; LINUX: memcpy +; LINUX-NOT: memcpy +; LINUX: movups +; LINUX: movups +; LINUX: movups +; LINUX: movups +; LINUX: movups +; LINUX: movups +; LINUX: movups +; LINUX: movups ; DARWIN: test3: ; DARWIN-NOT: memcpy -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq -; DARWIN: movq +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups +; DARWIN: movups } ; Large constant memcpy's should be inlined when not optimizing for size. diff --git a/test/CodeGen/X86/memset-2.ll b/test/CodeGen/X86/memset-2.ll index ae6b6e9772b..eb5597eb6ff 100644 --- a/test/CodeGen/X86/memset-2.ll +++ b/test/CodeGen/X86/memset-2.ll @@ -5,7 +5,21 @@ declare void @llvm.memset.i32(i8*, i8, i32, i32) nounwind define fastcc void @t1() nounwind { entry: ; CHECK: t1: -; CHECK: calll _memset +; CHECK: pxor %xmm0, %xmm0 +; CHECK: movups %xmm0, 160 +; CHECK: movups %xmm0, 144 +; CHECK: movups %xmm0, 128 +; CHECK: movups %xmm0, 112 +; CHECK: movups %xmm0, 96 +; CHECK: movups %xmm0, 80 +; CHECK: movups %xmm0, 64 +; CHECK: movups %xmm0, 48 +; CHECK: movups %xmm0, 32 +; CHECK: movups %xmm0, 16 +; CHECK: movups %xmm0, 0 +; CHECK: movl $0, 184 +; CHECK: movl $0, 180 +; CHECK: movl $0, 176 call void @llvm.memset.i32( i8* null, i8 0, i32 188, i32 1 ) nounwind unreachable } diff --git a/test/CodeGen/X86/memset64-on-x86-32.ll b/test/CodeGen/X86/memset64-on-x86-32.ll index 3f069b4a1aa..5a0e893e3b6 100644 --- a/test/CodeGen/X86/memset64-on-x86-32.ll +++ b/test/CodeGen/X86/memset64-on-x86-32.ll @@ -1,6 +1,5 @@ ; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | grep movups | count 5 -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2 | grep movl | count 20 -; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | grep movq | count 10 +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=core2 | grep movups | count 5 define void @bork() nounwind { entry: diff --git a/test/CodeGen/X86/small-byval-memcpy.ll b/test/CodeGen/X86/small-byval-memcpy.ll index 1b596b58989..8f69b111bc3 100644 --- a/test/CodeGen/X86/small-byval-memcpy.ll +++ b/test/CodeGen/X86/small-byval-memcpy.ll @@ -1,8 +1,12 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=core2 | grep movsd | count 8 -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | grep movups | count 2 +; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=nehalem | FileCheck %s define void @ccosl({ x86_fp80, x86_fp80 }* noalias sret %agg.result, { x86_fp80, x86_fp80 }* byval align 4 %z) nounwind { entry: +; CHECK: ccosl: +; CHECK: movaps +; CHECK: movaps +; CHECK: movups +; CHECK: movups %iz = alloca { x86_fp80, x86_fp80 } ; <{ x86_fp80, x86_fp80 }*> [#uses=3] %tmp1 = getelementptr { x86_fp80, x86_fp80 }* %z, i32 0, i32 1 ; [#uses=1] %tmp2 = load x86_fp80* %tmp1, align 16 ; [#uses=1] diff --git a/test/CodeGen/X86/tlv-1.ll b/test/CodeGen/X86/tlv-1.ll index 42940f147ed..f9be15d1a95 100644 --- a/test/CodeGen/X86/tlv-1.ll +++ b/test/CodeGen/X86/tlv-1.ll @@ -10,8 +10,12 @@ entry: unreachable ; CHECK: movq _c@TLVP(%rip), %rdi ; CHECK-NEXT: callq *(%rdi) - ; CHECK-NEXT: movl $0, 56(%rax) - ; CHECK-NEXT: movq $0, 48(%rax) + ; CHECK-NEXT: pxor %xmm0, %xmm0 + ; CHECK-NEXT: movups %xmm0, 32(%rax) + ; CHECK-NEXT: movups %xmm0, 16(%rax) + ; CHECK-NEXT: movups %xmm0, (%rax) + ; CHECK-NEXT: movl $0, 56(%rax) + ; CHECK-NEXT: movq $0, 48(%rax) } declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind diff --git a/test/CodeGen/X86/unaligned-load.ll b/test/CodeGen/X86/unaligned-load.ll index 6a493c0594d..040857786da 100644 --- a/test/CodeGen/X86/unaligned-load.ll +++ b/test/CodeGen/X86/unaligned-load.ll @@ -1,6 +1,4 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=I386 %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=CORE2 %s -; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=corei7 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck -check-prefix=COREI7 %s +; RUN: llc < %s -mtriple=x86_64-apple-darwin10.0 -mcpu=core2 -relocation-model=dynamic-no-pic --asm-verbose=0 | FileCheck %s @.str1 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, SOME STRING\00", align 8 @.str3 = internal constant [31 x i8] c"DHRYSTONE PROGRAM, 2'ND STRING\00", align 8 @@ -13,13 +11,8 @@ entry: bb: %String2Loc9 = getelementptr inbounds [31 x i8]* %String2Loc, i64 0, i64 0 call void @llvm.memcpy.i64(i8* %String2Loc9, i8* getelementptr inbounds ([31 x i8]* @.str3, i64 0, i64 0), i64 31, i32 1) -; I386: calll {{_?}}memcpy - -; CORE2: movabsq -; CORE2: movabsq -; CORE2: movabsq - -; COREI7: movups _.str3 +; CHECK: movabsq $2325069237881678925, %rax +; CHECK: movups _.str3(%rip), %xmm0 br label %bb return: @@ -28,9 +21,9 @@ return: declare void @llvm.memcpy.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind -; CORE2: .section -; CORE2: .align 4 -; CORE2-NEXT: _.str1: -; CORE2-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING" -; CORE2: .align 4 -; CORE2-NEXT: _.str3: +; CHECK: .section +; CHECK: .align 4 +; CHECK-NEXT: _.str1: +; CHECK-NEXT: .asciz "DHRYSTONE PROGRAM, SOME STRING" +; CHECK: .align 4 +; CHECK-NEXT: _.str3: