Use vld1 / vst2 for unaligned v2f64 load / store. e.g. Use vld1.16 for 2-byte

aligned address. Based on patch by David Peixotto.

Also use vld1.64 / vst1.64 with 128-bit alignment to take advantage of alignment
hints. rdar://12090772, rdar://12238782


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164089 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng
2012-09-18 01:42:45 +00:00
parent b198f5c897
commit d10eab0a95
11 changed files with 593 additions and 53 deletions

View File

@@ -137,7 +137,7 @@ return2:
define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
; CHECK: t5:
; CHECK: vldmia
; CHECK: vld1.32
; How can FileCheck match Q and D registers? We need a lisp interpreter.
; CHECK: vorr {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
; CHECK-NOT: vmov
@@ -243,8 +243,8 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
; CHECK: vldr
; CHECK-NOT: vmov d{{.*}}, d16
; CHECK: vmov.i32 d17
; CHECK-NEXT: vstmia r0, {d16, d17}
; CHECK-NEXT: vstmia r0, {d16, d17}
; CHECK-NEXT: vst1.64 {d16, d17}, [r0, :128]
; CHECK-NEXT: vst1.64 {d16, d17}, [r0, :128]
%3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
%4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
store <4 x float> %4, <4 x float>* undef, align 16