From dd7a300c10bb0aaaf9e044c2180ca1b4c84e40c7 Mon Sep 17 00:00:00 2001 From: Nadav Rotem Date: Thu, 14 Mar 2013 23:49:44 +0000 Subject: [PATCH] Unaligned loads should use the VMOVUPS opcode. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@177130 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/X86/X86InstrSSE.td | 2 +- test/CodeGen/X86/avx-load-store.ll | 2 +- test/CodeGen/X86/vec_align_i256.ll | 11 +++++++++++ 3 files changed, 13 insertions(+), 2 deletions(-) create mode 100644 test/CodeGen/X86/vec_align_i256.ll diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index 09797527573..f22a6b29cd2 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -1009,7 +1009,7 @@ let Predicates = [HasAVX] in { (VMOVUPSmr addr:$dst, (v4i32 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v8i16 (extract_subvector (v16i16 VR256:$src), (iPTR 0))), addr:$dst), - (VMOVAPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; + (VMOVUPSmr addr:$dst, (v8i16 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; def : Pat<(store (v16i8 (extract_subvector (v32i8 VR256:$src), (iPTR 0))), addr:$dst), (VMOVUPSmr addr:$dst, (v16i8 (EXTRACT_SUBREG VR256:$src,sub_xmm)))>; diff --git a/test/CodeGen/X86/avx-load-store.ll b/test/CodeGen/X86/avx-load-store.ll index 77a7c4f945f..432852d47d3 100644 --- a/test/CodeGen/X86/avx-load-store.ll +++ b/test/CodeGen/X86/avx-load-store.ll @@ -55,7 +55,7 @@ define void @storev16i16(<16 x i16> %a) nounwind { ; CHECK: storev16i16_01 ; CHECK: vextractf128 -; CHECK: vmovaps %xmm +; CHECK: vmovups %xmm define void @storev16i16_01(<16 x i16> %a) nounwind { store <16 x i16> %a, <16 x i16>* undef, align 4 unreachable diff --git a/test/CodeGen/X86/vec_align_i256.ll b/test/CodeGen/X86/vec_align_i256.ll new file mode 100644 index 00000000000..44ca5c5c8e3 --- /dev/null +++ b/test/CodeGen/X86/vec_align_i256.ll @@ -0,0 +1,11 @@ +; RUN: llc < %s -mcpu=corei7-avx | FileCheck %s + +; Make sure that we are not generating a movaps because the vector is aligned to 1. +;CHECK: @foo +;CHECK: xor +;CHECK-NEXT: vmovups +;CHECK-NEXT: ret +define void @foo() { + store <16 x i16> zeroinitializer, <16 x i16>* undef, align 1 + ret void +}