llvm-6502/test/CodeGen/X86/2010-04-08-CoalescerBug.ll
Evan Cheng 461f1fc359 Use movups to lower memcpy and memset even if it's not fast (like corei7).
The theory is it's still faster than a pair of movq / a quad of movl. This
will probably hurt older chips like P4 but should run faster on current
and future Intel processors. rdar://8817010


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122955 91177308-0d34-0410-b5e6-96231b3b80d8
2011-01-06 07:58:36 +00:00

27 lines
1.0 KiB
LLVM

; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; rdar://7842028
; Do not delete partially dead copy instructions.
; %RDI<def,dead> = MOV64rr %RAX<kill>, %EDI<imp-def>
; REP_MOVSD %ECX<imp-def,dead>, %EDI<imp-def,dead>, %ESI<imp-def,dead>, %ECX<imp-use,kill>, %EDI<imp-use,kill>, %ESI<imp-use,kill>
%struct.F = type { %struct.FC*, i32, i32, i8, i32, i32, i32 }
%struct.FC = type { [10 x i8], [32 x i32], %struct.FC*, i32 }
define void @t(%struct.F* %this) nounwind optsize {
entry:
; CHECK: t:
; CHECK: addq $12, %rsi
%BitValueArray = alloca [32 x i32], align 4
%tmp2 = getelementptr inbounds %struct.F* %this, i64 0, i32 0
%tmp3 = load %struct.FC** %tmp2, align 8
%tmp4 = getelementptr inbounds %struct.FC* %tmp3, i64 0, i32 1, i64 0
%tmp5 = bitcast [32 x i32]* %BitValueArray to i8*
%tmp6 = bitcast i32* %tmp4 to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp5, i8* %tmp6, i64 128, i32 4, i1 false)
unreachable
}
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind