llvm-6502/test/CodeGen/X86/2010-09-17-SideEffectsInChain.ll
Evan Cheng 461f1fc359 Use movups to lower memcpy and memset even if it's not fast (like corei7).
The theory is it's still faster than a pair of movq / a quad of movl. This
will probably hurt older chips like P4 but should run faster on current
and future Intel processors. rdar://8817010


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@122955 91177308-0d34-0410-b5e6-96231b3b80d8
2011-01-06 07:58:36 +00:00

27 lines
1.0 KiB
LLVM

; RUN: llc < %s -combiner-alias-analysis -march=x86-64 | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.4"
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
define fastcc i32 @cli_magic_scandesc(i8* %in) nounwind ssp {
entry:
%a = alloca [64 x i8]
%b = getelementptr inbounds [64 x i8]* %a, i64 0, i32 0
%c = getelementptr inbounds [64 x i8]* %a, i64 0, i32 30
%d = load i8* %b, align 8
%e = load i8* %c, align 8
%f = bitcast [64 x i8]* %a to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %f, i8* %in, i64 64, i32 8, i1 false) nounwind
store i8 %d, i8* %b, align 8
store i8 %e, i8* %c, align 8
ret i32 0
}
; CHECK: movq ___stack_chk_guard@GOTPCREL(%rip), %rax
; CHECK: movb 30(%rsp), %cl
; CHECK: movb (%rsp), %dl
; CHECK: movb %dl, (%rsp)
; CHECK: movb %cl, 30(%rsp)
; CHECK: callq ___stack_chk_fail