mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 05:32:25 +00:00
Use vld1 / vst2 for unaligned v2f64 load / store. e.g. Use vld1.16 for 2-byte
aligned address. Based on patch by David Peixotto. Also use vld1.64 / vst1.64 with 128-bit alignment to take advantage of alignment hints. rdar://12090772, rdar://12238782 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@164089 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b198f5c897
commit
d10eab0a95
@ -9025,8 +9025,8 @@ bool ARMTargetLowering::isDesirableToTransformToIntegerOp(unsigned Opc,
|
||||
}
|
||||
|
||||
bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
|
||||
if (!Subtarget->allowsUnalignedMem())
|
||||
return false;
|
||||
// The AllowsUnaliged flag models the SCTLR.A setting in ARM cpus
|
||||
bool AllowsUnaligned = Subtarget->allowsUnalignedMem();
|
||||
|
||||
switch (VT.getSimpleVT().SimpleTy) {
|
||||
default:
|
||||
@ -9034,10 +9034,14 @@ bool ARMTargetLowering::allowsUnalignedMemoryAccesses(EVT VT) const {
|
||||
case MVT::i8:
|
||||
case MVT::i16:
|
||||
case MVT::i32:
|
||||
return true;
|
||||
// Unaligned access can use (for example) LRDB, LRDH, LDR
|
||||
return AllowsUnaligned;
|
||||
case MVT::f64:
|
||||
return Subtarget->hasNEON();
|
||||
// FIXME: VLD1 etc with standard alignment is legal.
|
||||
case MVT::v2f64:
|
||||
// For any little-endian targets with neon, we can support unaligned ld/st
|
||||
// of D and Q (e.g. {D0,D1}) registers by using vld1.i8/vst1.i8.
|
||||
// A big-endian target may also explictly support unaligned accesses
|
||||
return Subtarget->hasNEON() && (AllowsUnaligned || isLittleEndian());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -398,6 +398,20 @@ def VecListFourQWordIndexed : Operand<i32> {
|
||||
let MIOperandInfo = (ops DPR:$Vd, i32imm:$idx);
|
||||
}
|
||||
|
||||
def dword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getAlignment() >= 8;
|
||||
}]>;
|
||||
def dword_alignedstore : PatFrag<(ops node:$val, node:$ptr),
|
||||
(store node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getAlignment() >= 8;
|
||||
}]>;
|
||||
def word_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getAlignment() == 4;
|
||||
}]>;
|
||||
def word_alignedstore : PatFrag<(ops node:$val, node:$ptr),
|
||||
(store node:$val, node:$ptr), [{
|
||||
return cast<StoreSDNode>(N)->getAlignment() == 4;
|
||||
}]>;
|
||||
def hword_alignedload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
return cast<LoadSDNode>(N)->getAlignment() == 2;
|
||||
}]>;
|
||||
@ -2273,6 +2287,25 @@ def : Pat<(f64 (non_word_alignedload addrmode6:$addr)),
|
||||
def : Pat<(non_word_alignedstore (f64 DPR:$value), addrmode6:$addr),
|
||||
(VST1d64 addrmode6:$addr, DPR:$value)>, Requires<[IsBE]>;
|
||||
|
||||
// Use vld1/vst1 for Q and QQ. Also use them for unaligned v2f64
|
||||
// load / store if it's legal.
|
||||
def : Pat<(v2f64 (dword_alignedload addrmode6:$addr)),
|
||||
(VLD1q64 addrmode6:$addr)>;
|
||||
def : Pat<(dword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
|
||||
(VST1q64 addrmode6:$addr, QPR:$value)>;
|
||||
def : Pat<(v2f64 (word_alignedload addrmode6:$addr)),
|
||||
(VLD1q32 addrmode6:$addr)>;
|
||||
def : Pat<(word_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
|
||||
(VST1q32 addrmode6:$addr, QPR:$value)>;
|
||||
def : Pat<(v2f64 (hword_alignedload addrmode6:$addr)),
|
||||
(VLD1q16 addrmode6:$addr)>, Requires<[IsLE]>;
|
||||
def : Pat<(hword_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
|
||||
(VST1q16 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
|
||||
def : Pat<(v2f64 (byte_alignedload addrmode6:$addr)),
|
||||
(VLD1q8 addrmode6:$addr)>, Requires<[IsLE]>;
|
||||
def : Pat<(byte_alignedstore (v2f64 QPR:$value), addrmode6:$addr),
|
||||
(VST1q8 addrmode6:$addr, QPR:$value)>, Requires<[IsLE]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// NEON pattern fragments
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -9,8 +9,8 @@ entry:
|
||||
}
|
||||
|
||||
; Trigger multiple NEON stores.
|
||||
; CHECK: vstmia
|
||||
; CHECK-NEXT: vstmia
|
||||
; CHECK: vst1.64
|
||||
; CHECK-NEXT: vst1.64
|
||||
define void @f_0_40(i8* nocapture %c) nounwind optsize {
|
||||
entry:
|
||||
call void @llvm.memset.p0i8.i64(i8* %c, i8 0, i64 40, i32 16, i1 false)
|
||||
|
@ -8,12 +8,12 @@ define void @test_sqrt(<4 x float>* %X) nounwind {
|
||||
|
||||
; CHECK: movw r1, :lower16:{{.*}}
|
||||
; CHECK: movt r1, :upper16:{{.*}}
|
||||
; CHECK: vldmia r1
|
||||
; CHECK: vld1.64 {{.*}}, [r1, :128]
|
||||
; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; CHECK: vsqrt.f32 {{s[0-9]+}}, {{s[0-9]+}}
|
||||
; CHECK: vstmia {{.*}}
|
||||
; CHECK: vst1.64 {{.*}}
|
||||
|
||||
L.entry:
|
||||
%0 = load <4 x float>* @A, align 16
|
||||
@ -31,7 +31,7 @@ define void @test_cos(<4 x float>* %X) nounwind {
|
||||
|
||||
; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
; CHECK: vld1.64
|
||||
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}cosf
|
||||
@ -45,7 +45,7 @@ define void @test_cos(<4 x float>* %X) nounwind {
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}cosf
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
; CHECK: vst1.64
|
||||
|
||||
L.entry:
|
||||
%0 = load <4 x float>* @A, align 16
|
||||
@ -62,7 +62,7 @@ define void @test_exp(<4 x float>* %X) nounwind {
|
||||
|
||||
; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
; CHECK: vld1.64
|
||||
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}expf
|
||||
@ -76,7 +76,7 @@ define void @test_exp(<4 x float>* %X) nounwind {
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}expf
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
; CHECK: vst1.64
|
||||
|
||||
L.entry:
|
||||
%0 = load <4 x float>* @A, align 16
|
||||
@ -93,7 +93,7 @@ define void @test_exp2(<4 x float>* %X) nounwind {
|
||||
|
||||
; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
; CHECK: vld1.64
|
||||
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}exp2f
|
||||
@ -107,7 +107,7 @@ define void @test_exp2(<4 x float>* %X) nounwind {
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}exp2f
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
; CHECK: vst1.64
|
||||
|
||||
L.entry:
|
||||
%0 = load <4 x float>* @A, align 16
|
||||
@ -124,7 +124,7 @@ define void @test_log10(<4 x float>* %X) nounwind {
|
||||
|
||||
; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
; CHECK: vld1.64
|
||||
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}log10f
|
||||
@ -138,7 +138,7 @@ define void @test_log10(<4 x float>* %X) nounwind {
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}log10f
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
; CHECK: vst1.64
|
||||
|
||||
L.entry:
|
||||
%0 = load <4 x float>* @A, align 16
|
||||
@ -155,7 +155,7 @@ define void @test_log(<4 x float>* %X) nounwind {
|
||||
|
||||
; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
; CHECK: vld1.64
|
||||
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}logf
|
||||
@ -169,7 +169,7 @@ define void @test_log(<4 x float>* %X) nounwind {
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}logf
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
; CHECK: vst1.64
|
||||
|
||||
L.entry:
|
||||
%0 = load <4 x float>* @A, align 16
|
||||
@ -186,7 +186,7 @@ define void @test_log2(<4 x float>* %X) nounwind {
|
||||
|
||||
; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
; CHECK: vld1.64
|
||||
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}log2f
|
||||
@ -200,7 +200,7 @@ define void @test_log2(<4 x float>* %X) nounwind {
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}log2f
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
; CHECK: vst1.64
|
||||
|
||||
L.entry:
|
||||
%0 = load <4 x float>* @A, align 16
|
||||
@ -218,7 +218,7 @@ define void @test_pow(<4 x float>* %X) nounwind {
|
||||
|
||||
; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
; CHECK: vld1.64
|
||||
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}powf
|
||||
@ -232,7 +232,7 @@ define void @test_pow(<4 x float>* %X) nounwind {
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}powf
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
; CHECK: vst1.64
|
||||
|
||||
L.entry:
|
||||
|
||||
@ -252,10 +252,10 @@ define void @test_powi(<4 x float>* %X) nounwind {
|
||||
|
||||
; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia [[reg0]], {{.*}}
|
||||
; CHECK: vld1.64 {{.*}}, :128
|
||||
; CHECK: vmul.f32 {{.*}}
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
; CHECK: vst1.64
|
||||
|
||||
L.entry:
|
||||
|
||||
@ -275,7 +275,7 @@ define void @test_sin(<4 x float>* %X) nounwind {
|
||||
|
||||
; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
; CHECK: vld1.64
|
||||
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}sinf
|
||||
@ -289,7 +289,7 @@ define void @test_sin(<4 x float>* %X) nounwind {
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}sinf
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
; CHECK: vst1.64
|
||||
|
||||
L.entry:
|
||||
%0 = load <4 x float>* @A, align 16
|
||||
@ -306,7 +306,7 @@ define void @test_floor(<4 x float>* %X) nounwind {
|
||||
|
||||
; CHECK: movw [[reg0:r[0-9]+]], :lower16:{{.*}}
|
||||
; CHECK: movt [[reg0]], :upper16:{{.*}}
|
||||
; CHECK: vldmia r{{[0-9][0-9]?}}, {{.*}}
|
||||
; CHECK: vld1.64
|
||||
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}floorf
|
||||
@ -320,7 +320,7 @@ define void @test_floor(<4 x float>* %X) nounwind {
|
||||
; CHECK: {{v?mov(.32)?}} r0,
|
||||
; CHECK: bl {{.*}}floorf
|
||||
|
||||
; CHECK: vstmia {{.*}}
|
||||
; CHECK: vst1.64
|
||||
|
||||
L.entry:
|
||||
%0 = load <4 x float>* @A, align 16
|
||||
|
@ -1,10 +1,10 @@
|
||||
; RUN: llc < %s -march=arm -mattr=+neon | FileCheck %s
|
||||
|
||||
; CHECK: t1
|
||||
; CHECK: vldmia
|
||||
; CHECK: vldmia
|
||||
; CHECK: vld1.64
|
||||
; CHECK: vld1.64
|
||||
; CHECK: vadd.i64 q
|
||||
; CHECK: vstmia
|
||||
; CHECK: vst1.64
|
||||
define void @t1(<4 x i32>* %r, <2 x i64>* %a, <2 x i64>* %b) nounwind {
|
||||
entry:
|
||||
%0 = load <2 x i64>* %a, align 16 ; <<2 x i64>> [#uses=1]
|
||||
@ -16,8 +16,8 @@ entry:
|
||||
}
|
||||
|
||||
; CHECK: t2
|
||||
; CHECK: vldmia
|
||||
; CHECK: vldmia
|
||||
; CHECK: vld1.64
|
||||
; CHECK: vld1.64
|
||||
; CHECK: vsub.i64 q
|
||||
; CHECK: vmov r0, r1, d
|
||||
; CHECK: vmov r2, r3, d
|
||||
|
@ -137,7 +137,7 @@ return2:
|
||||
|
||||
define <8 x i16> @t5(i16* %A, <8 x i16>* %B) nounwind {
|
||||
; CHECK: t5:
|
||||
; CHECK: vldmia
|
||||
; CHECK: vld1.32
|
||||
; How can FileCheck match Q and D registers? We need a lisp interpreter.
|
||||
; CHECK: vorr {{q[0-9]+}}, {{q[0-9]+}}, {{q[0-9]+}}
|
||||
; CHECK-NOT: vmov
|
||||
@ -243,8 +243,8 @@ define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind {
|
||||
; CHECK: vldr
|
||||
; CHECK-NOT: vmov d{{.*}}, d16
|
||||
; CHECK: vmov.i32 d17
|
||||
; CHECK-NEXT: vstmia r0, {d16, d17}
|
||||
; CHECK-NEXT: vstmia r0, {d16, d17}
|
||||
; CHECK-NEXT: vst1.64 {d16, d17}, [r0, :128]
|
||||
; CHECK-NEXT: vst1.64 {d16, d17}, [r0, :128]
|
||||
%3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2]
|
||||
%4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1]
|
||||
store <4 x float> %4, <4 x float>* undef, align 16
|
||||
|
@ -4,12 +4,12 @@
|
||||
define void @PR13378() nounwind {
|
||||
; This was orriginally a crasher trying to schedule the instructions.
|
||||
; CHECK: PR13378:
|
||||
; CHECK: vldmia
|
||||
; CHECK: vld1.32
|
||||
; CHECK-NEXT: vst1.32
|
||||
; CHECK-NEXT: vst1.32
|
||||
; CHECK-NEXT: vmov.f32
|
||||
; CHECK-NEXT: vstmia
|
||||
; CHECK-NEXT: vstmia
|
||||
; CHECK-NEXT: vmov.f32
|
||||
; CHECK-NEXT: vstmia
|
||||
; CHECK-NEXT: vst1.32
|
||||
|
||||
entry:
|
||||
%0 = load <4 x float>* undef
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED
|
||||
; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -arm-strict-align -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED
|
||||
; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 -mattr=-neon -arm-strict-align -pre-RA-sched=source | FileCheck %s -check-prefix=EXPANDED
|
||||
; RUN: llc < %s -mtriple=armv6-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=UNALIGNED
|
||||
|
||||
; rdar://7113725
|
||||
@ -59,3 +59,19 @@ entry:
|
||||
store double %tmp, double* %b, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @byte_word_ops(i32* %a, i32* %b) nounwind {
|
||||
entry:
|
||||
; EXPANDED: byte_word_ops:
|
||||
; EXPANDED: ldrb
|
||||
; EXPANDED: strb
|
||||
|
||||
; UNALIGNED: byte_word_ops:
|
||||
; UNALIGNED-NOT: ldrb
|
||||
; UNALIGNED: ldr
|
||||
; UNALIGNED-NOT: strb
|
||||
; UNALIGNED: str
|
||||
%tmp = load i32* %a, align 1
|
||||
store i32 %tmp, i32* %b, align 1
|
||||
ret void
|
||||
}
|
||||
|
487
test/CodeGen/ARM/unaligned_load_store_vector.ll
Normal file
487
test/CodeGen/ARM/unaligned_load_store_vector.ll
Normal file
@ -0,0 +1,487 @@
|
||||
;RUN: llc < %s -march=arm -mattr=+v7 -mattr=+neon | FileCheck %s
|
||||
|
||||
;ALIGN = 1
|
||||
;SIZE = 64
|
||||
;TYPE = <8 x i8>
|
||||
define void @v64_v8i8_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v64_v8i8_1:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <8 x i8>*
|
||||
%vo = bitcast i8* %po to <8 x i8>*
|
||||
;CHECK: vld1.8
|
||||
%v1 = load <8 x i8>* %vi, align 1
|
||||
;CHECK: vst1.8
|
||||
store <8 x i8> %v1, <8 x i8>* %vo, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 1
|
||||
;SIZE = 64
|
||||
;TYPE = <4 x i16>
|
||||
define void @v64_v4i16_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v64_v4i16_1:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <4 x i16>*
|
||||
%vo = bitcast i8* %po to <4 x i16>*
|
||||
;CHECK: vld1.8
|
||||
%v1 = load <4 x i16>* %vi, align 1
|
||||
;CHECK: vst1.8
|
||||
store <4 x i16> %v1, <4 x i16>* %vo, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 1
|
||||
;SIZE = 64
|
||||
;TYPE = <2 x i32>
|
||||
define void @v64_v2i32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v64_v2i32_1:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <2 x i32>*
|
||||
%vo = bitcast i8* %po to <2 x i32>*
|
||||
;CHECK: vld1.8
|
||||
%v1 = load <2 x i32>* %vi, align 1
|
||||
;CHECK: vst1.8
|
||||
store <2 x i32> %v1, <2 x i32>* %vo, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 1
|
||||
;SIZE = 64
|
||||
;TYPE = <2 x float>
|
||||
define void @v64_v2f32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v64_v2f32_1:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <2 x float>*
|
||||
%vo = bitcast i8* %po to <2 x float>*
|
||||
;CHECK: vld1.8
|
||||
%v1 = load <2 x float>* %vi, align 1
|
||||
;CHECK: vst1.8
|
||||
store <2 x float> %v1, <2 x float>* %vo, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 1
|
||||
;SIZE = 128
|
||||
;TYPE = <16 x i8>
|
||||
define void @v128_v16i8_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v16i8_1:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <16 x i8>*
|
||||
%vo = bitcast i8* %po to <16 x i8>*
|
||||
;CHECK: vld1.8
|
||||
%v1 = load <16 x i8>* %vi, align 1
|
||||
;CHECK: vst1.8
|
||||
store <16 x i8> %v1, <16 x i8>* %vo, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 1
|
||||
;SIZE = 128
|
||||
;TYPE = <8 x i16>
|
||||
define void @v128_v8i16_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v8i16_1:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <8 x i16>*
|
||||
%vo = bitcast i8* %po to <8 x i16>*
|
||||
;CHECK: vld1.8
|
||||
%v1 = load <8 x i16>* %vi, align 1
|
||||
;CHECK: vst1.8
|
||||
store <8 x i16> %v1, <8 x i16>* %vo, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 1
|
||||
;SIZE = 128
|
||||
;TYPE = <4 x i32>
|
||||
define void @v128_v4i32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v4i32_1:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <4 x i32>*
|
||||
%vo = bitcast i8* %po to <4 x i32>*
|
||||
;CHECK: vld1.8
|
||||
%v1 = load <4 x i32>* %vi, align 1
|
||||
;CHECK: vst1.8
|
||||
store <4 x i32> %v1, <4 x i32>* %vo, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 1
|
||||
;SIZE = 128
|
||||
;TYPE = <2 x i64>
|
||||
define void @v128_v2i64_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v2i64_1:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <2 x i64>*
|
||||
%vo = bitcast i8* %po to <2 x i64>*
|
||||
;CHECK: vld1.8
|
||||
%v1 = load <2 x i64>* %vi, align 1
|
||||
;CHECK: vst1.8
|
||||
store <2 x i64> %v1, <2 x i64>* %vo, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 1
|
||||
;SIZE = 128
|
||||
;TYPE = <4 x float>
|
||||
define void @v128_v4f32_1(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v4f32_1:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <4 x float>*
|
||||
%vo = bitcast i8* %po to <4 x float>*
|
||||
;CHECK: vld1.8
|
||||
%v1 = load <4 x float>* %vi, align 1
|
||||
;CHECK: vst1.8
|
||||
store <4 x float> %v1, <4 x float>* %vo, align 1
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 2
|
||||
;SIZE = 64
|
||||
;TYPE = <8 x i8>
|
||||
define void @v64_v8i8_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v64_v8i8_2:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <8 x i8>*
|
||||
%vo = bitcast i8* %po to <8 x i8>*
|
||||
;CHECK: vld1.16
|
||||
%v1 = load <8 x i8>* %vi, align 2
|
||||
;CHECK: vst1.16
|
||||
store <8 x i8> %v1, <8 x i8>* %vo, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 2
|
||||
;SIZE = 64
|
||||
;TYPE = <4 x i16>
|
||||
define void @v64_v4i16_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v64_v4i16_2:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <4 x i16>*
|
||||
%vo = bitcast i8* %po to <4 x i16>*
|
||||
;CHECK: vld1.16
|
||||
%v1 = load <4 x i16>* %vi, align 2
|
||||
;CHECK: vst1.16
|
||||
store <4 x i16> %v1, <4 x i16>* %vo, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 2
|
||||
;SIZE = 64
|
||||
;TYPE = <2 x i32>
|
||||
define void @v64_v2i32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v64_v2i32_2:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <2 x i32>*
|
||||
%vo = bitcast i8* %po to <2 x i32>*
|
||||
;CHECK: vld1.16
|
||||
%v1 = load <2 x i32>* %vi, align 2
|
||||
;CHECK: vst1.16
|
||||
store <2 x i32> %v1, <2 x i32>* %vo, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 2
|
||||
;SIZE = 64
|
||||
;TYPE = <2 x float>
|
||||
define void @v64_v2f32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v64_v2f32_2:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <2 x float>*
|
||||
%vo = bitcast i8* %po to <2 x float>*
|
||||
;CHECK: vld1.16
|
||||
%v1 = load <2 x float>* %vi, align 2
|
||||
;CHECK: vst1.16
|
||||
store <2 x float> %v1, <2 x float>* %vo, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 2
|
||||
;SIZE = 128
|
||||
;TYPE = <16 x i8>
|
||||
define void @v128_v16i8_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v16i8_2:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <16 x i8>*
|
||||
%vo = bitcast i8* %po to <16 x i8>*
|
||||
;CHECK: vld1.16
|
||||
%v1 = load <16 x i8>* %vi, align 2
|
||||
;CHECK: vst1.16
|
||||
store <16 x i8> %v1, <16 x i8>* %vo, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 2
|
||||
;SIZE = 128
|
||||
;TYPE = <8 x i16>
|
||||
define void @v128_v8i16_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v8i16_2:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <8 x i16>*
|
||||
%vo = bitcast i8* %po to <8 x i16>*
|
||||
;CHECK: vld1.16
|
||||
%v1 = load <8 x i16>* %vi, align 2
|
||||
;CHECK: vst1.16
|
||||
store <8 x i16> %v1, <8 x i16>* %vo, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 2
|
||||
;SIZE = 128
|
||||
;TYPE = <4 x i32>
|
||||
define void @v128_v4i32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v4i32_2:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <4 x i32>*
|
||||
%vo = bitcast i8* %po to <4 x i32>*
|
||||
;CHECK: vld1.16
|
||||
%v1 = load <4 x i32>* %vi, align 2
|
||||
;CHECK: vst1.16
|
||||
store <4 x i32> %v1, <4 x i32>* %vo, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 2
|
||||
;SIZE = 128
|
||||
;TYPE = <2 x i64>
|
||||
define void @v128_v2i64_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v2i64_2:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <2 x i64>*
|
||||
%vo = bitcast i8* %po to <2 x i64>*
|
||||
;CHECK: vld1.16
|
||||
%v1 = load <2 x i64>* %vi, align 2
|
||||
;CHECK: vst1.16
|
||||
store <2 x i64> %v1, <2 x i64>* %vo, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 2
|
||||
;SIZE = 128
|
||||
;TYPE = <4 x float>
|
||||
define void @v128_v4f32_2(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v4f32_2:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <4 x float>*
|
||||
%vo = bitcast i8* %po to <4 x float>*
|
||||
;CHECK: vld1.16
|
||||
%v1 = load <4 x float>* %vi, align 2
|
||||
;CHECK: vst1.16
|
||||
store <4 x float> %v1, <4 x float>* %vo, align 2
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 4
|
||||
;SIZE = 64
|
||||
;TYPE = <8 x i8>
|
||||
define void @v64_v8i8_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v64_v8i8_4:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <8 x i8>*
|
||||
%vo = bitcast i8* %po to <8 x i8>*
|
||||
;CHECK: vldr
|
||||
%v1 = load <8 x i8>* %vi, align 4
|
||||
;CHECK: vstr
|
||||
store <8 x i8> %v1, <8 x i8>* %vo, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 4
|
||||
;SIZE = 64
|
||||
;TYPE = <4 x i16>
|
||||
define void @v64_v4i16_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v64_v4i16_4:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <4 x i16>*
|
||||
%vo = bitcast i8* %po to <4 x i16>*
|
||||
;CHECK: vldr
|
||||
%v1 = load <4 x i16>* %vi, align 4
|
||||
;CHECK: vstr
|
||||
store <4 x i16> %v1, <4 x i16>* %vo, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 4
|
||||
;SIZE = 64
|
||||
;TYPE = <2 x i32>
|
||||
define void @v64_v2i32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v64_v2i32_4:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <2 x i32>*
|
||||
%vo = bitcast i8* %po to <2 x i32>*
|
||||
;CHECK: vldr
|
||||
%v1 = load <2 x i32>* %vi, align 4
|
||||
;CHECK: vstr
|
||||
store <2 x i32> %v1, <2 x i32>* %vo, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 4
|
||||
;SIZE = 64
|
||||
;TYPE = <2 x float>
|
||||
define void @v64_v2f32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v64_v2f32_4:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <2 x float>*
|
||||
%vo = bitcast i8* %po to <2 x float>*
|
||||
;CHECK: vldr
|
||||
%v1 = load <2 x float>* %vi, align 4
|
||||
;CHECK: vstr
|
||||
store <2 x float> %v1, <2 x float>* %vo, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 4
|
||||
;SIZE = 128
|
||||
;TYPE = <16 x i8>
|
||||
define void @v128_v16i8_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v16i8_4:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <16 x i8>*
|
||||
%vo = bitcast i8* %po to <16 x i8>*
|
||||
;CHECK: vld1.32
|
||||
%v1 = load <16 x i8>* %vi, align 4
|
||||
;CHECK: vst1.32
|
||||
store <16 x i8> %v1, <16 x i8>* %vo, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 4
|
||||
;SIZE = 128
|
||||
;TYPE = <8 x i16>
|
||||
define void @v128_v8i16_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v8i16_4:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <8 x i16>*
|
||||
%vo = bitcast i8* %po to <8 x i16>*
|
||||
;CHECK: vld1.32
|
||||
%v1 = load <8 x i16>* %vi, align 4
|
||||
;CHECK: vst1.32
|
||||
store <8 x i16> %v1, <8 x i16>* %vo, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 4
|
||||
;SIZE = 128
|
||||
;TYPE = <4 x i32>
|
||||
define void @v128_v4i32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v4i32_4:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <4 x i32>*
|
||||
%vo = bitcast i8* %po to <4 x i32>*
|
||||
;CHECK: vld1.32
|
||||
%v1 = load <4 x i32>* %vi, align 4
|
||||
;CHECK: vst1.32
|
||||
store <4 x i32> %v1, <4 x i32>* %vo, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 4
|
||||
;SIZE = 128
|
||||
;TYPE = <2 x i64>
|
||||
define void @v128_v2i64_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v2i64_4:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <2 x i64>*
|
||||
%vo = bitcast i8* %po to <2 x i64>*
|
||||
;CHECK: vld1.32
|
||||
%v1 = load <2 x i64>* %vi, align 4
|
||||
;CHECK: vst1.32
|
||||
store <2 x i64> %v1, <2 x i64>* %vo, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;ALIGN = 4
|
||||
;SIZE = 128
|
||||
;TYPE = <4 x float>
|
||||
define void @v128_v4f32_4(i8* noalias nocapture %out, i8* noalias nocapture %in) nounwind {
|
||||
;CHECK: v128_v4f32_4:
|
||||
entry:
|
||||
%po = getelementptr i8* %out, i32 0
|
||||
%pi = getelementptr i8* %in, i32 0
|
||||
%vi = bitcast i8* %pi to <4 x float>*
|
||||
%vo = bitcast i8* %po to <4 x float>*
|
||||
;CHECK: vld1.32
|
||||
%v1 = load <4 x float>* %vi, align 4
|
||||
;CHECK: vst1.32
|
||||
store <4 x float> %v1, <4 x float>* %vo, align 4
|
||||
ret void
|
||||
}
|
||||
|
@ -59,8 +59,8 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind
|
||||
|
||||
define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
|
||||
;CHECK: v_bslQi8:
|
||||
;CHECK: vldmia
|
||||
;CHECK: vldmia
|
||||
;CHECK: vld1.32
|
||||
;CHECK: vld1.32
|
||||
;CHECK: vbsl
|
||||
%tmp1 = load <16 x i8>* %A
|
||||
%tmp2 = load <16 x i8>* %B
|
||||
@ -73,8 +73,8 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
|
||||
|
||||
define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
|
||||
;CHECK: v_bslQi16:
|
||||
;CHECK: vldmia
|
||||
;CHECK: vldmia
|
||||
;CHECK: vld1.32
|
||||
;CHECK: vld1.32
|
||||
;CHECK: vbsl
|
||||
%tmp1 = load <8 x i16>* %A
|
||||
%tmp2 = load <8 x i16>* %B
|
||||
@ -87,8 +87,8 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
|
||||
|
||||
define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
|
||||
;CHECK: v_bslQi32:
|
||||
;CHECK: vldmia
|
||||
;CHECK: vldmia
|
||||
;CHECK: vld1.32
|
||||
;CHECK: vld1.32
|
||||
;CHECK: vbsl
|
||||
%tmp1 = load <4 x i32>* %A
|
||||
%tmp2 = load <4 x i32>* %B
|
||||
@ -101,9 +101,9 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
|
||||
|
||||
define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
|
||||
;CHECK: v_bslQi64:
|
||||
;CHECK: vldmia
|
||||
;CHECK: vldmia
|
||||
;CHECK: vldmia
|
||||
;CHECK: vld1.32
|
||||
;CHECK: vld1.32
|
||||
;CHECK: vld1.64
|
||||
;CHECK: vbsl
|
||||
%tmp1 = load <2 x i64>* %A
|
||||
%tmp2 = load <2 x i64>* %B
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -O3 -mtriple=thumbv7-apple-darwin10 -mcpu=cortex-a8 | FileCheck %s
|
||||
; RUN: llc < %s -O3 -mtriple=thumbv7-apple-ios -mcpu=cortex-a8 | FileCheck %s
|
||||
; Formerly crashed, 3573915.
|
||||
|
||||
define void @RotateStarsFP_Vec() nounwind {
|
||||
@ -13,5 +13,5 @@ bb8: ; preds = %bb8, %bb.nph372
|
||||
store <4 x float> %3, <4 x float>* undef, align 4
|
||||
br label %bb8
|
||||
; CHECK: RotateStarsFP_Vec:
|
||||
; CHECK: vldmia
|
||||
; CHECK: vld1.64
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user