mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-28 19:31:58 +00:00
780a093afb
r223862 tried to also combine base-updating load/stores. r224198 reverted it, as "it created a regression on the test-suite on test MultiSource/Benchmarks/Ptrdist/anagram by scrambling the order in which the words are shown." Reapply, with a fix to ignore non-normal load/stores. Truncstores are handled elsewhere (you can actually write a pattern for those, whereas for postinc loads you can't, since they return two values), but it should be possible to also combine extloads base updates, by checking that the memory (rather than result) type is of the same size as the addend. Original commit message: We used to only combine intrinsics, and turn them into VLD1_UPD/VST1_UPD when the base pointer is incremented after the load/store. We can do the same thing for generic load/stores. Note that we can only combine the first load/store+adds pair in a sequence (as might be generated for a v16f32 load for instance), because other combines turn the base pointer addition chain (each computing the address of the next load, from the address of the last load) into independent additions (common base pointer + this load's offset). Differential Revision: http://reviews.llvm.org/D6585 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224203 91177308-0d34-0410-b5e6-96231b3b80d8
213 lines
6.3 KiB
LLVM
213 lines
6.3 KiB
LLVM
; RUN: llc < %s | FileCheck %s
|
|
|
|
target datalayout = "e-m:o-p:32:32-i1:8:32-i8:8:32-i16:16:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
|
|
target triple = "thumbv7s-apple-ios8.0.0"
|
|
|
|
define <8 x i8> @load_v8i8(<8 x i8>** %ptr) {
|
|
;CHECK-LABEL: load_v8i8:
|
|
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
|
|
%A = load <8 x i8>** %ptr
|
|
%lA = load <8 x i8>* %A, align 1
|
|
ret <8 x i8> %lA
|
|
}
|
|
|
|
define <8 x i8> @load_v8i8_update(<8 x i8>** %ptr) {
|
|
;CHECK-LABEL: load_v8i8_update:
|
|
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
|
%A = load <8 x i8>** %ptr
|
|
%lA = load <8 x i8>* %A, align 1
|
|
%inc = getelementptr <8 x i8>* %A, i38 1
|
|
store <8 x i8>* %inc, <8 x i8>** %ptr
|
|
ret <8 x i8> %lA
|
|
}
|
|
|
|
define <4 x i16> @load_v4i16(<4 x i16>** %ptr) {
|
|
;CHECK-LABEL: load_v4i16:
|
|
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
|
|
%A = load <4 x i16>** %ptr
|
|
%lA = load <4 x i16>* %A, align 1
|
|
ret <4 x i16> %lA
|
|
}
|
|
|
|
define <4 x i16> @load_v4i16_update(<4 x i16>** %ptr) {
|
|
;CHECK-LABEL: load_v4i16_update:
|
|
;CHECK: vld1.16 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
|
%A = load <4 x i16>** %ptr
|
|
%lA = load <4 x i16>* %A, align 1
|
|
%inc = getelementptr <4 x i16>* %A, i34 1
|
|
store <4 x i16>* %inc, <4 x i16>** %ptr
|
|
ret <4 x i16> %lA
|
|
}
|
|
|
|
define <2 x i32> @load_v2i32(<2 x i32>** %ptr) {
|
|
;CHECK-LABEL: load_v2i32:
|
|
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
|
|
%A = load <2 x i32>** %ptr
|
|
%lA = load <2 x i32>* %A, align 1
|
|
ret <2 x i32> %lA
|
|
}
|
|
|
|
define <2 x i32> @load_v2i32_update(<2 x i32>** %ptr) {
|
|
;CHECK-LABEL: load_v2i32_update:
|
|
;CHECK: vld1.32 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
|
%A = load <2 x i32>** %ptr
|
|
%lA = load <2 x i32>* %A, align 1
|
|
%inc = getelementptr <2 x i32>* %A, i32 1
|
|
store <2 x i32>* %inc, <2 x i32>** %ptr
|
|
ret <2 x i32> %lA
|
|
}
|
|
|
|
define <2 x float> @load_v2f32(<2 x float>** %ptr) {
|
|
;CHECK-LABEL: load_v2f32:
|
|
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
|
|
%A = load <2 x float>** %ptr
|
|
%lA = load <2 x float>* %A, align 1
|
|
ret <2 x float> %lA
|
|
}
|
|
|
|
define <2 x float> @load_v2f32_update(<2 x float>** %ptr) {
|
|
;CHECK-LABEL: load_v2f32_update:
|
|
;CHECK: vld1.32 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
|
%A = load <2 x float>** %ptr
|
|
%lA = load <2 x float>* %A, align 1
|
|
%inc = getelementptr <2 x float>* %A, i32 1
|
|
store <2 x float>* %inc, <2 x float>** %ptr
|
|
ret <2 x float> %lA
|
|
}
|
|
|
|
define <1 x i64> @load_v1i64(<1 x i64>** %ptr) {
|
|
;CHECK-LABEL: load_v1i64:
|
|
;CHECK: vld1.8 {{{d[0-9]+}}}, [{{r[0-9]+}}]
|
|
%A = load <1 x i64>** %ptr
|
|
%lA = load <1 x i64>* %A, align 1
|
|
ret <1 x i64> %lA
|
|
}
|
|
|
|
define <1 x i64> @load_v1i64_update(<1 x i64>** %ptr) {
|
|
;CHECK-LABEL: load_v1i64_update:
|
|
;CHECK: vld1.64 {{{d[0-9]+}}}, [{{r[0-9]+}}]!
|
|
%A = load <1 x i64>** %ptr
|
|
%lA = load <1 x i64>* %A, align 1
|
|
%inc = getelementptr <1 x i64>* %A, i31 1
|
|
store <1 x i64>* %inc, <1 x i64>** %ptr
|
|
ret <1 x i64> %lA
|
|
}
|
|
|
|
define <16 x i8> @load_v16i8(<16 x i8>** %ptr) {
|
|
;CHECK-LABEL: load_v16i8:
|
|
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
|
%A = load <16 x i8>** %ptr
|
|
%lA = load <16 x i8>* %A, align 1
|
|
ret <16 x i8> %lA
|
|
}
|
|
|
|
define <16 x i8> @load_v16i8_update(<16 x i8>** %ptr) {
|
|
;CHECK-LABEL: load_v16i8_update:
|
|
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
|
%A = load <16 x i8>** %ptr
|
|
%lA = load <16 x i8>* %A, align 1
|
|
%inc = getelementptr <16 x i8>* %A, i316 1
|
|
store <16 x i8>* %inc, <16 x i8>** %ptr
|
|
ret <16 x i8> %lA
|
|
}
|
|
|
|
define <8 x i16> @load_v8i16(<8 x i16>** %ptr) {
|
|
;CHECK-LABEL: load_v8i16:
|
|
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
|
%A = load <8 x i16>** %ptr
|
|
%lA = load <8 x i16>* %A, align 1
|
|
ret <8 x i16> %lA
|
|
}
|
|
|
|
define <8 x i16> @load_v8i16_update(<8 x i16>** %ptr) {
|
|
;CHECK-LABEL: load_v8i16_update:
|
|
;CHECK: vld1.16 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
|
%A = load <8 x i16>** %ptr
|
|
%lA = load <8 x i16>* %A, align 1
|
|
%inc = getelementptr <8 x i16>* %A, i38 1
|
|
store <8 x i16>* %inc, <8 x i16>** %ptr
|
|
ret <8 x i16> %lA
|
|
}
|
|
|
|
define <4 x i32> @load_v4i32(<4 x i32>** %ptr) {
|
|
;CHECK-LABEL: load_v4i32:
|
|
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
|
%A = load <4 x i32>** %ptr
|
|
%lA = load <4 x i32>* %A, align 1
|
|
ret <4 x i32> %lA
|
|
}
|
|
|
|
define <4 x i32> @load_v4i32_update(<4 x i32>** %ptr) {
|
|
;CHECK-LABEL: load_v4i32_update:
|
|
;CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
|
%A = load <4 x i32>** %ptr
|
|
%lA = load <4 x i32>* %A, align 1
|
|
%inc = getelementptr <4 x i32>* %A, i34 1
|
|
store <4 x i32>* %inc, <4 x i32>** %ptr
|
|
ret <4 x i32> %lA
|
|
}
|
|
|
|
define <4 x float> @load_v4f32(<4 x float>** %ptr) {
|
|
;CHECK-LABEL: load_v4f32:
|
|
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
|
%A = load <4 x float>** %ptr
|
|
%lA = load <4 x float>* %A, align 1
|
|
ret <4 x float> %lA
|
|
}
|
|
|
|
define <4 x float> @load_v4f32_update(<4 x float>** %ptr) {
|
|
;CHECK-LABEL: load_v4f32_update:
|
|
;CHECK: vld1.32 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
|
%A = load <4 x float>** %ptr
|
|
%lA = load <4 x float>* %A, align 1
|
|
%inc = getelementptr <4 x float>* %A, i34 1
|
|
store <4 x float>* %inc, <4 x float>** %ptr
|
|
ret <4 x float> %lA
|
|
}
|
|
|
|
define <2 x i64> @load_v2i64(<2 x i64>** %ptr) {
|
|
;CHECK-LABEL: load_v2i64:
|
|
;CHECK: vld1.8 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]
|
|
%A = load <2 x i64>** %ptr
|
|
%lA = load <2 x i64>* %A, align 1
|
|
ret <2 x i64> %lA
|
|
}
|
|
|
|
define <2 x i64> @load_v2i64_update(<2 x i64>** %ptr) {
|
|
;CHECK-LABEL: load_v2i64_update:
|
|
;CHECK: vld1.64 {{{d[0-9]+, d[0-9]+}}}, [{{r[0-9]+}}]!
|
|
%A = load <2 x i64>** %ptr
|
|
%lA = load <2 x i64>* %A, align 1
|
|
%inc = getelementptr <2 x i64>* %A, i32 1
|
|
store <2 x i64>* %inc, <2 x i64>** %ptr
|
|
ret <2 x i64> %lA
|
|
}
|
|
|
|
; Make sure we don't break smaller-than-dreg extloads.
|
|
define <4 x i32> @zextload_v8i8tov8i32(<4 x i8>** %ptr) {
|
|
;CHECK-LABEL: zextload_v8i8tov8i32:
|
|
;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [{{r[0-9]+}}:32]
|
|
;CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
|
|
;CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
|
|
%A = load <4 x i8>** %ptr
|
|
%lA = load <4 x i8>* %A, align 1
|
|
%zlA = zext <4 x i8> %lA to <4 x i32>
|
|
ret <4 x i32> %zlA
|
|
}
|
|
|
|
define <4 x i32> @zextload_v8i8tov8i32_fake_update(<4 x i8>** %ptr) {
|
|
;CHECK-LABEL: zextload_v8i8tov8i32_fake_update:
|
|
;CHECK: ldr.w r[[PTRREG:[0-9]+]], [r0]
|
|
;CHECK: vld1.32 {{{d[0-9]+}}[0]}, [r[[PTRREG]]:32]
|
|
;CHECK: add.w r[[INCREG:[0-9]+]], r[[PTRREG]], #16
|
|
;CHECK: str.w r[[INCREG]], [r0]
|
|
;CHECK: vmovl.u8 {{q[0-9]+}}, {{d[0-9]+}}
|
|
;CHECK: vmovl.u16 {{q[0-9]+}}, {{d[0-9]+}}
|
|
%A = load <4 x i8>** %ptr
|
|
%lA = load <4 x i8>* %A, align 4
|
|
%inc = getelementptr <4 x i8>* %A, i38 4
|
|
store <4 x i8>* %inc, <4 x i8>** %ptr
|
|
%zlA = zext <4 x i8> %lA to <4 x i32>
|
|
ret <4 x i32> %zlA
|
|
}
|