From 743a2cff0422031da007bcdc70ced28c09057aea Mon Sep 17 00:00:00 2001 From: Manman Ren Date: Fri, 10 Aug 2012 23:43:32 +0000 Subject: [PATCH] X86: when we are auto-detecting the subtarget features, make sure we turn on FeatureFastUAMem for Nehalem, Westmere and Sandy Bridge. FeatureFastUAMem is already on if we pass in nehalem or westmere as a command argument. rdar: 7252306 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@161717 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86Subtarget.cpp | 7 +++++-- test/CodeGen/X86/alignment-2.ll | 4 +++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index ae2229827b0..d7e3efc4f98 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -246,8 +246,11 @@ void X86Subtarget::AutoDetectSubtargetFeatures() { } // If it's Nehalem, unaligned memory access is fast. - // FIXME: Nehalem is family 6. Also include Westmere and later processors? - if (Family == 15 && Model == 26) { + // Include Westmere and Sandy Bridge as well. + // FIXME: add later processors. + if ((Family == 6 && Model == 26) || + (Family == 6 && Model == 44) || + (Family == 6 && Model == 42)) { IsUAMemFast = true; ToggleFeature(X86::FeatureFastUAMem); } diff --git a/test/CodeGen/X86/alignment-2.ll b/test/CodeGen/X86/alignment-2.ll index cc709b52d93..1f9e85cbb76 100644 --- a/test/CodeGen/X86/alignment-2.ll +++ b/test/CodeGen/X86/alignment-2.ll @@ -18,7 +18,9 @@ define signext i8 @do_lo_list() nounwind optsize ssp { bb: ; CHECK: do_lo_list -; CHECK-NOT: movaps +; Make sure we do not use movaps for the global variable. +; It is okay to use movaps for writing the local variable on stack. +; CHECK-NOT: movaps {{[0-9]*}}(%{{[a-z]*}}), {{%xmm[0-9]}} %myopt = alloca %struct.printQueryOpt, align 4 %tmp = bitcast %struct.printQueryOpt* %myopt to i8* call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.printQueryOpt* getelementptr inbounds (%struct._psqlSettings* @pset, i32 0, i32 4) to i8*), i32 76, i32 4, i1 false)