mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-28 04:33:05 +00:00
Remove NEON vmovn intrinsic, replacing it with vector truncate operations.
Auto-upgrade the old intrinsic and update tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@112507 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
663e339e20
commit
973a074345
@ -303,7 +303,6 @@ def int_arm_neon_vcvtfxs2fp : Neon_CvtFxToFP_Intrinsic;
|
||||
def int_arm_neon_vcvtfxu2fp : Neon_CvtFxToFP_Intrinsic;
|
||||
|
||||
// Narrowing and Lengthening Vector Moves.
|
||||
def int_arm_neon_vmovn : Neon_1Arg_Narrow_Intrinsic;
|
||||
def int_arm_neon_vqmovns : Neon_1Arg_Narrow_Intrinsic;
|
||||
def int_arm_neon_vqmovnu : Neon_1Arg_Narrow_Intrinsic;
|
||||
def int_arm_neon_vqmovnsu : Neon_1Arg_Narrow_Intrinsic;
|
||||
|
@ -956,6 +956,15 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
|
||||
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
|
||||
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
|
||||
|
||||
// Narrow 2-register operations.
|
||||
class N2VN<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
|
||||
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
|
||||
InstrItinClass itin, string OpcodeStr, string Dt,
|
||||
ValueType TyD, ValueType TyQ, SDNode OpNode>
|
||||
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, op6, op4, (outs DPR:$dst),
|
||||
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
|
||||
[(set DPR:$dst, (TyD (OpNode (TyQ QPR:$src))))]>;
|
||||
|
||||
// Narrow 2-register intrinsics.
|
||||
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
|
||||
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
|
||||
@ -1579,6 +1588,23 @@ multiclass N3V_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
|
||||
}
|
||||
|
||||
|
||||
// Neon Narrowing 2-register vector operations,
|
||||
// source operand element sizes of 16, 32 and 64 bits:
|
||||
multiclass N2VN_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
|
||||
bits<5> op11_7, bit op6, bit op4,
|
||||
InstrItinClass itin, string OpcodeStr, string Dt,
|
||||
SDNode OpNode> {
|
||||
def v8i8 : N2VN<op24_23, op21_20, 0b00, op17_16, op11_7, op6, op4,
|
||||
itin, OpcodeStr, !strconcat(Dt, "16"),
|
||||
v8i8, v8i16, OpNode>;
|
||||
def v4i16 : N2VN<op24_23, op21_20, 0b01, op17_16, op11_7, op6, op4,
|
||||
itin, OpcodeStr, !strconcat(Dt, "32"),
|
||||
v4i16, v4i32, OpNode>;
|
||||
def v2i32 : N2VN<op24_23, op21_20, 0b10, op17_16, op11_7, op6, op4,
|
||||
itin, OpcodeStr, !strconcat(Dt, "64"),
|
||||
v2i32, v2i64, OpNode>;
|
||||
}
|
||||
|
||||
// Neon Narrowing 2-register vector intrinsics,
|
||||
// source operand element sizes of 16, 32 and 64 bits:
|
||||
multiclass N2VNInt_HSD<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
|
||||
@ -3221,8 +3247,8 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
|
||||
[(set QPR:$dst, (v4f32 (NEONvdup (f32 SPR:$src))))]>;
|
||||
|
||||
// VMOVN : Vector Narrowing Move
|
||||
defm VMOVN : N2VNInt_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
|
||||
"vmovn", "i", int_arm_neon_vmovn>;
|
||||
defm VMOVN : N2VN_HSD<0b11,0b11,0b10,0b00100,0,0, IIC_VMOVD,
|
||||
"vmovn", "i", trunc>;
|
||||
// VQMOVN : Vector Saturating Narrowing Move
|
||||
defm VQMOVNs : N2VNInt_HSD<0b11,0b11,0b10,0b00101,0,0, IIC_VQUNAiD,
|
||||
"vqmovn", "s", int_arm_neon_vqmovns>;
|
||||
|
@ -88,7 +88,9 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) {
|
||||
((Name.compare(14, 5, "vaddw", 5) == 0 ||
|
||||
Name.compare(14, 5, "vsubw", 5) == 0) &&
|
||||
(Name.compare(19, 2, "s.", 2) == 0 ||
|
||||
Name.compare(19, 2, "u.", 2) == 0))) {
|
||||
Name.compare(19, 2, "u.", 2) == 0)) ||
|
||||
|
||||
(Name.compare(14, 6, "vmovn.", 6) == 0)) {
|
||||
|
||||
// Calls to these are transformed into IR without intrinsics.
|
||||
NewFn = 0;
|
||||
@ -401,6 +403,9 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) {
|
||||
else
|
||||
NewI = BinaryOperator::CreateSub(V0, V1,"upgraded."+CI->getName(),CI);
|
||||
|
||||
} else if (Name.compare(14, 6, "vmovn.", 6) == 0) {
|
||||
NewI = new TruncInst(CI->getArgOperand(0), CI->getType(),
|
||||
"upgraded." + CI->getName(), CI);
|
||||
} else {
|
||||
llvm_unreachable("Unknown arm.neon function for CallInst upgrade.");
|
||||
}
|
||||
|
@ -76,6 +76,20 @@
|
||||
; CHECK: zext <4 x i16>
|
||||
; CHECK-NEXT: sub <4 x i32>
|
||||
|
||||
; vmovn should be auto-upgraded to trunc
|
||||
|
||||
; CHECK: vmovni16
|
||||
; CHECK-NOT: arm.neon.vmovn.v8i8
|
||||
; CHECK: trunc <8 x i16>
|
||||
|
||||
; CHECK: vmovni32
|
||||
; CHECK-NOT: arm.neon.vmovn.v4i16
|
||||
; CHECK: trunc <4 x i32>
|
||||
|
||||
; CHECK: vmovni64
|
||||
; CHECK-NOT: arm.neon.vmovn.v2i32
|
||||
; CHECK: trunc <2 x i64>
|
||||
|
||||
; vld* and vst* intrinsic calls need an alignment argument (defaulted to 1)
|
||||
|
||||
; CHECK: vld1i8
|
||||
|
Binary file not shown.
@ -240,7 +240,7 @@ define <8 x i8> @vmovni16(<8 x i16>* %A) nounwind {
|
||||
;CHECK: vmovni16:
|
||||
;CHECK: vmovn.i16
|
||||
%tmp1 = load <8 x i16>* %A
|
||||
%tmp2 = call <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16> %tmp1)
|
||||
%tmp2 = trunc <8 x i16> %tmp1 to <8 x i8>
|
||||
ret <8 x i8> %tmp2
|
||||
}
|
||||
|
||||
@ -248,7 +248,7 @@ define <4 x i16> @vmovni32(<4 x i32>* %A) nounwind {
|
||||
;CHECK: vmovni32:
|
||||
;CHECK: vmovn.i32
|
||||
%tmp1 = load <4 x i32>* %A
|
||||
%tmp2 = call <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32> %tmp1)
|
||||
%tmp2 = trunc <4 x i32> %tmp1 to <4 x i16>
|
||||
ret <4 x i16> %tmp2
|
||||
}
|
||||
|
||||
@ -256,14 +256,10 @@ define <2 x i32> @vmovni64(<2 x i64>* %A) nounwind {
|
||||
;CHECK: vmovni64:
|
||||
;CHECK: vmovn.i64
|
||||
%tmp1 = load <2 x i64>* %A
|
||||
%tmp2 = call <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64> %tmp1)
|
||||
%tmp2 = trunc <2 x i64> %tmp1 to <2 x i32>
|
||||
ret <2 x i32> %tmp2
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vmovn.v8i8(<8 x i16>) nounwind readnone
|
||||
declare <4 x i16> @llvm.arm.neon.vmovn.v4i16(<4 x i32>) nounwind readnone
|
||||
declare <2 x i32> @llvm.arm.neon.vmovn.v2i32(<2 x i64>) nounwind readnone
|
||||
|
||||
define <8 x i8> @vqmovns16(<8 x i16>* %A) nounwind {
|
||||
;CHECK: vqmovns16:
|
||||
;CHECK: vqmovn.s16
|
||||
|
Loading…
Reference in New Issue
Block a user