mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-05 13:26:55 +00:00
Add codegen support for using post-increment NEON load/store instructions.
The vld1-lane, vld1-dup and vst1-lane instructions do not yet support using post-increment versions, but all the rest of the NEON load/store instructions should be handled now. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@125014 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -33,6 +33,21 @@ define <4 x i16> @vld3i16(i16* %A) nounwind {
|
||||
ret <4 x i16> %tmp4
|
||||
}
|
||||
|
||||
;Check for a post-increment updating load with register increment.
|
||||
define <4 x i16> @vld3i16_update(i16** %ptr, i32 %inc) nounwind {
|
||||
;CHECK: vld3i16_update:
|
||||
;CHECK: vld3.16 {d16, d17, d18}, [r2], r1
|
||||
%A = load i16** %ptr
|
||||
%tmp0 = bitcast i16* %A to i8*
|
||||
%tmp1 = call %struct.__neon_int16x4x3_t @llvm.arm.neon.vld3.v4i16(i8* %tmp0, i32 1)
|
||||
%tmp2 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 0
|
||||
%tmp3 = extractvalue %struct.__neon_int16x4x3_t %tmp1, 2
|
||||
%tmp4 = add <4 x i16> %tmp2, %tmp3
|
||||
%tmp5 = getelementptr i16* %A, i32 %inc
|
||||
store i16* %tmp5, i16** %ptr
|
||||
ret <4 x i16> %tmp4
|
||||
}
|
||||
|
||||
define <2 x i32> @vld3i32(i32* %A) nounwind {
|
||||
;CHECK: vld3i32:
|
||||
;CHECK: vld3.32
|
||||
@@ -103,6 +118,22 @@ define <4 x i32> @vld3Qi32(i32* %A) nounwind {
|
||||
ret <4 x i32> %tmp4
|
||||
}
|
||||
|
||||
;Check for a post-increment updating load.
|
||||
define <4 x i32> @vld3Qi32_update(i32** %ptr) nounwind {
|
||||
;CHECK: vld3Qi32_update:
|
||||
;CHECK: vld3.32 {d16, d18, d20}, [r1]!
|
||||
;CHECK: vld3.32 {d17, d19, d21}, [r1]!
|
||||
%A = load i32** %ptr
|
||||
%tmp0 = bitcast i32* %A to i8*
|
||||
%tmp1 = call %struct.__neon_int32x4x3_t @llvm.arm.neon.vld3.v4i32(i8* %tmp0, i32 1)
|
||||
%tmp2 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 0
|
||||
%tmp3 = extractvalue %struct.__neon_int32x4x3_t %tmp1, 2
|
||||
%tmp4 = add <4 x i32> %tmp2, %tmp3
|
||||
%tmp5 = getelementptr i32* %A, i32 12
|
||||
store i32* %tmp5, i32** %ptr
|
||||
ret <4 x i32> %tmp4
|
||||
}
|
||||
|
||||
define <4 x float> @vld3Qf(float* %A) nounwind {
|
||||
;CHECK: vld3Qf:
|
||||
;CHECK: vld3.32
|
||||
|
Reference in New Issue
Block a user