mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-07-07 08:24:27 +00:00
Use new EVT::vAny type to combine Neon intrinsics for VPADD.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78632 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -42,6 +42,9 @@ let TargetPrefix = "arm" in { // All intrinsics start with "llvm.arm.".
|
|||||||
class Neon_2Arg_Float_Intrinsic
|
class Neon_2Arg_Float_Intrinsic
|
||||||
: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
|
: Intrinsic<[llvm_anyfloat_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
|
||||||
[IntrNoMem]>;
|
[IntrNoMem]>;
|
||||||
|
class Neon_2Arg_Vector_Intrinsic
|
||||||
|
: Intrinsic<[llvm_anyvector_ty], [LLVMMatchType<0>, LLVMMatchType<0>],
|
||||||
|
[IntrNoMem]>;
|
||||||
class Neon_2Arg_Narrow_Intrinsic
|
class Neon_2Arg_Narrow_Intrinsic
|
||||||
: Intrinsic<[llvm_anyint_ty],
|
: Intrinsic<[llvm_anyint_ty],
|
||||||
[LLVMExtendedElementVectorType<0>,
|
[LLVMExtendedElementVectorType<0>,
|
||||||
@ -194,8 +197,7 @@ def int_arm_neon_vabals : Neon_3Arg_Long_Intrinsic;
|
|||||||
def int_arm_neon_vabalu : Neon_3Arg_Long_Intrinsic;
|
def int_arm_neon_vabalu : Neon_3Arg_Long_Intrinsic;
|
||||||
|
|
||||||
// Vector Pairwise Add.
|
// Vector Pairwise Add.
|
||||||
def int_arm_neon_vpaddi : Neon_2Arg_Intrinsic;
|
def int_arm_neon_vpadd : Neon_2Arg_Vector_Intrinsic;
|
||||||
def int_arm_neon_vpaddf : Neon_2Arg_Float_Intrinsic;
|
|
||||||
|
|
||||||
// Vector Pairwise Add Long.
|
// Vector Pairwise Add Long.
|
||||||
// Note: This is different than the other "long" NEON intrinsics because
|
// Note: This is different than the other "long" NEON intrinsics because
|
||||||
|
@ -1261,13 +1261,13 @@ def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, "vmin.f32", v4f32, v4f32,
|
|||||||
|
|
||||||
// VPADD : Vector Pairwise Add
|
// VPADD : Vector Pairwise Add
|
||||||
def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, "vpadd.i8", v8i8, v8i8,
|
def VPADDi8 : N3VDInt<0, 0, 0b00, 0b1011, 1, "vpadd.i8", v8i8, v8i8,
|
||||||
int_arm_neon_vpaddi, 0>;
|
int_arm_neon_vpadd, 0>;
|
||||||
def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, "vpadd.i16", v4i16, v4i16,
|
def VPADDi16 : N3VDInt<0, 0, 0b01, 0b1011, 1, "vpadd.i16", v4i16, v4i16,
|
||||||
int_arm_neon_vpaddi, 0>;
|
int_arm_neon_vpadd, 0>;
|
||||||
def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, "vpadd.i32", v2i32, v2i32,
|
def VPADDi32 : N3VDInt<0, 0, 0b10, 0b1011, 1, "vpadd.i32", v2i32, v2i32,
|
||||||
int_arm_neon_vpaddi, 0>;
|
int_arm_neon_vpadd, 0>;
|
||||||
def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, "vpadd.f32", v2f32, v2f32,
|
def VPADDf : N3VDInt<1, 0, 0b00, 0b1101, 0, "vpadd.f32", v2f32, v2f32,
|
||||||
int_arm_neon_vpaddf, 0>;
|
int_arm_neon_vpadd, 0>;
|
||||||
|
|
||||||
// VPADDL : Vector Pairwise Add Long
|
// VPADDL : Vector Pairwise Add Long
|
||||||
defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s",
|
defm VPADDLs : N2VPLInt_QHS<0b11, 0b11, 0b00, 0b00100, 0, "vpaddl.s",
|
||||||
|
@ -7,33 +7,33 @@
|
|||||||
define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
define <8 x i8> @vpaddi8(<8 x i8>* %A, <8 x i8>* %B) nounwind {
|
||||||
%tmp1 = load <8 x i8>* %A
|
%tmp1 = load <8 x i8>* %A
|
||||||
%tmp2 = load <8 x i8>* %B
|
%tmp2 = load <8 x i8>* %B
|
||||||
%tmp3 = call <8 x i8> @llvm.arm.neon.vpaddi.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
|
%tmp3 = call <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8> %tmp1, <8 x i8> %tmp2)
|
||||||
ret <8 x i8> %tmp3
|
ret <8 x i8> %tmp3
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
define <4 x i16> @vpaddi16(<4 x i16>* %A, <4 x i16>* %B) nounwind {
|
||||||
%tmp1 = load <4 x i16>* %A
|
%tmp1 = load <4 x i16>* %A
|
||||||
%tmp2 = load <4 x i16>* %B
|
%tmp2 = load <4 x i16>* %B
|
||||||
%tmp3 = call <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
|
%tmp3 = call <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16> %tmp1, <4 x i16> %tmp2)
|
||||||
ret <4 x i16> %tmp3
|
ret <4 x i16> %tmp3
|
||||||
}
|
}
|
||||||
|
|
||||||
define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
|
define <2 x i32> @vpaddi32(<2 x i32>* %A, <2 x i32>* %B) nounwind {
|
||||||
%tmp1 = load <2 x i32>* %A
|
%tmp1 = load <2 x i32>* %A
|
||||||
%tmp2 = load <2 x i32>* %B
|
%tmp2 = load <2 x i32>* %B
|
||||||
%tmp3 = call <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
|
%tmp3 = call <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32> %tmp1, <2 x i32> %tmp2)
|
||||||
ret <2 x i32> %tmp3
|
ret <2 x i32> %tmp3
|
||||||
}
|
}
|
||||||
|
|
||||||
define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
|
define <2 x float> @vpaddf32(<2 x float>* %A, <2 x float>* %B) nounwind {
|
||||||
%tmp1 = load <2 x float>* %A
|
%tmp1 = load <2 x float>* %A
|
||||||
%tmp2 = load <2 x float>* %B
|
%tmp2 = load <2 x float>* %B
|
||||||
%tmp3 = call <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
|
%tmp3 = call <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float> %tmp1, <2 x float> %tmp2)
|
||||||
ret <2 x float> %tmp3
|
ret <2 x float> %tmp3
|
||||||
}
|
}
|
||||||
|
|
||||||
declare <8 x i8> @llvm.arm.neon.vpaddi.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
|
declare <8 x i8> @llvm.arm.neon.vpadd.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
|
||||||
declare <4 x i16> @llvm.arm.neon.vpaddi.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
|
declare <4 x i16> @llvm.arm.neon.vpadd.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
|
||||||
declare <2 x i32> @llvm.arm.neon.vpaddi.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
declare <2 x i32> @llvm.arm.neon.vpadd.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
|
||||||
|
|
||||||
declare <2 x float> @llvm.arm.neon.vpaddf.v2f32(<2 x float>, <2 x float>) nounwind readnone
|
declare <2 x float> @llvm.arm.neon.vpadd.v2f32(<2 x float>, <2 x float>) nounwind readnone
|
||||||
|
Reference in New Issue
Block a user