mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-09 16:45:03 +00:00
ARM: use LLVM IR to represent the vshrn operation
vshrn is just the combination of a right shift and a truncate (and the limits on the immediate value actually mean the signedness of the shift doesn't matter). Using that representation allows us to get rid of an ARM-specific intrinsic, share more code with AArch64 and hopefully get better code out of the mid-end optimisers. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@201085 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5a2ae98407
commit
9ed30bb230
@ -289,7 +289,6 @@ def int_arm_neon_vshifts : Neon_2Arg_Intrinsic;
|
||||
def int_arm_neon_vshiftu : Neon_2Arg_Intrinsic;
|
||||
def int_arm_neon_vshiftls : Neon_2Arg_Long_Intrinsic;
|
||||
def int_arm_neon_vshiftlu : Neon_2Arg_Long_Intrinsic;
|
||||
def int_arm_neon_vshiftn : Neon_2Arg_Narrow_Intrinsic;
|
||||
|
||||
// Vector Rounding Shift.
|
||||
def int_arm_neon_vrshifts : Neon_2Arg_Intrinsic;
|
||||
|
@ -1081,7 +1081,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case ARMISD::VSHLLs: return "ARMISD::VSHLLs";
|
||||
case ARMISD::VSHLLu: return "ARMISD::VSHLLu";
|
||||
case ARMISD::VSHLLi: return "ARMISD::VSHLLi";
|
||||
case ARMISD::VSHRN: return "ARMISD::VSHRN";
|
||||
case ARMISD::VRSHRs: return "ARMISD::VRSHRs";
|
||||
case ARMISD::VRSHRu: return "ARMISD::VRSHRu";
|
||||
case ARMISD::VRSHRN: return "ARMISD::VRSHRN";
|
||||
@ -9717,7 +9716,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
case Intrinsic::arm_neon_vshiftu:
|
||||
case Intrinsic::arm_neon_vshiftls:
|
||||
case Intrinsic::arm_neon_vshiftlu:
|
||||
case Intrinsic::arm_neon_vshiftn:
|
||||
case Intrinsic::arm_neon_vrshifts:
|
||||
case Intrinsic::arm_neon_vrshiftu:
|
||||
case Intrinsic::arm_neon_vrshiftn:
|
||||
@ -9771,7 +9769,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
break;
|
||||
llvm_unreachable("invalid shift count for vqshlu intrinsic");
|
||||
|
||||
case Intrinsic::arm_neon_vshiftn:
|
||||
case Intrinsic::arm_neon_vrshiftn:
|
||||
case Intrinsic::arm_neon_vqshiftns:
|
||||
case Intrinsic::arm_neon_vqshiftnu:
|
||||
@ -9802,8 +9799,6 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) {
|
||||
VShiftOpc = (IntNo == Intrinsic::arm_neon_vshiftls ?
|
||||
ARMISD::VSHLLs : ARMISD::VSHLLu);
|
||||
break;
|
||||
case Intrinsic::arm_neon_vshiftn:
|
||||
VShiftOpc = ARMISD::VSHRN; break;
|
||||
case Intrinsic::arm_neon_vrshifts:
|
||||
VShiftOpc = ARMISD::VRSHRs; break;
|
||||
case Intrinsic::arm_neon_vrshiftu:
|
||||
|
@ -116,7 +116,6 @@ namespace llvm {
|
||||
VSHLLs, // ...left long (signed)
|
||||
VSHLLu, // ...left long (unsigned)
|
||||
VSHLLi, // ...left long (with maximum shift count)
|
||||
VSHRN, // ...right narrow
|
||||
|
||||
// Vector rounding shift by immediate:
|
||||
VRSHRs, // ...right (signed)
|
||||
|
@ -212,25 +212,25 @@ def msr_mask : Operand<i32> {
|
||||
// 32 imm6<5> = '1', 32 - <imm> is encoded in imm6<4:0>
|
||||
// 64 64 - <imm> is encoded in imm6<5:0>
|
||||
def shr_imm8_asm_operand : ImmAsmOperand { let Name = "ShrImm8"; }
|
||||
def shr_imm8 : Operand<i32> {
|
||||
def shr_imm8 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 8; }]> {
|
||||
let EncoderMethod = "getShiftRight8Imm";
|
||||
let DecoderMethod = "DecodeShiftRight8Imm";
|
||||
let ParserMatchClass = shr_imm8_asm_operand;
|
||||
}
|
||||
def shr_imm16_asm_operand : ImmAsmOperand { let Name = "ShrImm16"; }
|
||||
def shr_imm16 : Operand<i32> {
|
||||
def shr_imm16 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 16; }]> {
|
||||
let EncoderMethod = "getShiftRight16Imm";
|
||||
let DecoderMethod = "DecodeShiftRight16Imm";
|
||||
let ParserMatchClass = shr_imm16_asm_operand;
|
||||
}
|
||||
def shr_imm32_asm_operand : ImmAsmOperand { let Name = "ShrImm32"; }
|
||||
def shr_imm32 : Operand<i32> {
|
||||
def shr_imm32 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 32; }]> {
|
||||
let EncoderMethod = "getShiftRight32Imm";
|
||||
let DecoderMethod = "DecodeShiftRight32Imm";
|
||||
let ParserMatchClass = shr_imm32_asm_operand;
|
||||
}
|
||||
def shr_imm64_asm_operand : ImmAsmOperand { let Name = "ShrImm64"; }
|
||||
def shr_imm64 : Operand<i32> {
|
||||
def shr_imm64 : Operand<i32>, ImmLeaf<i32, [{ return Imm > 0 && Imm <= 64; }]> {
|
||||
let EncoderMethod = "getShiftRight64Imm";
|
||||
let DecoderMethod = "DecodeShiftRight64Imm";
|
||||
let ParserMatchClass = shr_imm64_asm_operand;
|
||||
|
@ -3048,12 +3048,13 @@ class N2VLSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
|
||||
// Narrow shift by immediate.
|
||||
class N2VNSh<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6, bit op4,
|
||||
InstrItinClass itin, string OpcodeStr, string Dt,
|
||||
ValueType ResTy, ValueType OpTy, Operand ImmTy, SDNode OpNode>
|
||||
ValueType ResTy, ValueType OpTy, Operand ImmTy,
|
||||
SDPatternOperator OpNode>
|
||||
: N2VImm<op24, op23, op11_8, op7, op6, op4,
|
||||
(outs DPR:$Vd), (ins QPR:$Vm, ImmTy:$SIMM), N2RegVShRFrm, itin,
|
||||
OpcodeStr, Dt, "$Vd, $Vm, $SIMM", "",
|
||||
[(set DPR:$Vd, (ResTy (OpNode (OpTy QPR:$Vm),
|
||||
(i32 imm:$SIMM))))]>;
|
||||
(i32 ImmTy:$SIMM))))]>;
|
||||
|
||||
// Shift right by immediate and accumulate,
|
||||
// both double- and quad-register.
|
||||
@ -3960,7 +3961,7 @@ multiclass N2VLSh_QHS<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
|
||||
// element sizes of 16, 32, 64 bits:
|
||||
multiclass N2VNSh_HSD<bit op24, bit op23, bits<4> op11_8, bit op7, bit op6,
|
||||
bit op4, InstrItinClass itin, string OpcodeStr, string Dt,
|
||||
SDNode OpNode> {
|
||||
SDPatternOperator OpNode> {
|
||||
def v8i8 : N2VNSh<op24, op23, op11_8, op7, op6, op4, itin,
|
||||
OpcodeStr, !strconcat(Dt, "16"),
|
||||
v8i8, v8i16, shr_imm8, OpNode> {
|
||||
@ -4967,7 +4968,15 @@ def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
|
||||
|
||||
// VSHRN : Vector Shift Right and Narrow
|
||||
defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
|
||||
NEONvshrn>;
|
||||
PatFrag<(ops node:$Rn, node:$amt),
|
||||
(trunc (NEONvshrs node:$Rn, node:$amt))>>;
|
||||
|
||||
def : Pat<(v8i8 (trunc (NEONvshru (v8i16 QPR:$Vn), shr_imm8:$amt))),
|
||||
(VSHRNv8i8 QPR:$Vn, shr_imm8:$amt)>;
|
||||
def : Pat<(v4i16 (trunc (NEONvshru (v4i32 QPR:$Vn), shr_imm16:$amt))),
|
||||
(VSHRNv4i16 QPR:$Vn, shr_imm16:$amt)>;
|
||||
def : Pat<(v2i32 (trunc (NEONvshru (v2i64 QPR:$Vn), shr_imm32:$amt))),
|
||||
(VSHRNv2i32 QPR:$Vn, shr_imm32:$amt)>;
|
||||
|
||||
// VRSHL : Vector Rounding Shift
|
||||
defm VRSHLs : N3VInt_QHSDSh<0, 0, 0b0101, 0, N3RegVShFrm,
|
||||
|
@ -34,9 +34,11 @@ entry:
|
||||
%12 = sext <4 x i16> %11 to <4 x i32> ; <<4 x i32>> [#uses=1]
|
||||
%13 = mul <4 x i32> %1, %9 ; <<4 x i32>> [#uses=1]
|
||||
%14 = mul <4 x i32> %3, %12 ; <<4 x i32>> [#uses=1]
|
||||
%15 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %13, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1]
|
||||
%16 = tail call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %14, <4 x i32> <i32 -12, i32 -12, i32 -12, i32 -12>) ; <<4 x i16>> [#uses=1]
|
||||
%17 = shufflevector <4 x i16> %15, <4 x i16> %16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1]
|
||||
%15 = lshr <4 x i32> %13, <i32 12, i32 12, i32 12, i32 12>
|
||||
%trunc_15 = trunc <4 x i32> %15 to <4 x i16>
|
||||
%16 = lshr <4 x i32> %14, <i32 12, i32 12, i32 12, i32 12>
|
||||
%trunc_16 = trunc <4 x i32> %16 to <4 x i16>
|
||||
%17 = shufflevector <4 x i16> %trunc_15, <4 x i16> %trunc_16, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> ; <<8 x i16>> [#uses=1]
|
||||
%18 = bitcast i16* %o_ptr to i8* ; <i8*> [#uses=1]
|
||||
tail call void @llvm.arm.neon.vst1.v8i16(i8* %18, <8 x i16> %17, i32 1)
|
||||
ret void
|
||||
|
@ -4,29 +4,58 @@ define <8 x i8> @vshrns8(<8 x i16>* %A) nounwind {
|
||||
;CHECK-LABEL: vshrns8:
|
||||
;CHECK: vshrn.i16
|
||||
%tmp1 = load <8 x i16>* %A
|
||||
%tmp2 = call <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16> %tmp1, <8 x i16> < i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8 >)
|
||||
ret <8 x i8> %tmp2
|
||||
%tmp2 = lshr <8 x i16> %tmp1, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8>
|
||||
%tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
|
||||
ret <8 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <4 x i16> @vshrns16(<4 x i32>* %A) nounwind {
|
||||
;CHECK-LABEL: vshrns16:
|
||||
;CHECK: vshrn.i32
|
||||
%tmp1 = load <4 x i32>* %A
|
||||
%tmp2 = call <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32> %tmp1, <4 x i32> < i32 -16, i32 -16, i32 -16, i32 -16 >)
|
||||
ret <4 x i16> %tmp2
|
||||
%tmp2 = ashr <4 x i32> %tmp1, <i32 16, i32 16, i32 16, i32 16>
|
||||
%tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
|
||||
ret <4 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <2 x i32> @vshrns32(<2 x i64>* %A) nounwind {
|
||||
;CHECK-LABEL: vshrns32:
|
||||
;CHECK: vshrn.i64
|
||||
%tmp1 = load <2 x i64>* %A
|
||||
%tmp2 = call <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64> %tmp1, <2 x i64> < i64 -32, i64 -32 >)
|
||||
ret <2 x i32> %tmp2
|
||||
%tmp2 = ashr <2 x i64> %tmp1, <i64 32, i64 32>
|
||||
%tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
|
||||
ret <2 x i32> %tmp3
|
||||
}
|
||||
|
||||
declare <8 x i8> @llvm.arm.neon.vshiftn.v8i8(<8 x i16>, <8 x i16>) nounwind readnone
|
||||
declare <4 x i16> @llvm.arm.neon.vshiftn.v4i16(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <2 x i32> @llvm.arm.neon.vshiftn.v2i32(<2 x i64>, <2 x i64>) nounwind readnone
|
||||
define <8 x i8> @vshrns8_bad(<8 x i16>* %A) nounwind {
|
||||
; CHECK-LABEL: vshrns8_bad:
|
||||
; CHECK: vshr.s16
|
||||
; CHECK: vmovn.i16
|
||||
%tmp1 = load <8 x i16>* %A
|
||||
%tmp2 = ashr <8 x i16> %tmp1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
|
||||
%tmp3 = trunc <8 x i16> %tmp2 to <8 x i8>
|
||||
ret <8 x i8> %tmp3
|
||||
}
|
||||
|
||||
define <4 x i16> @vshrns16_bad(<4 x i32>* %A) nounwind {
|
||||
; CHECK-LABEL: vshrns16_bad:
|
||||
; CHECK: vshr.u32
|
||||
; CHECK: vmovn.i32
|
||||
%tmp1 = load <4 x i32>* %A
|
||||
%tmp2 = lshr <4 x i32> %tmp1, <i32 17, i32 17, i32 17, i32 17>
|
||||
%tmp3 = trunc <4 x i32> %tmp2 to <4 x i16>
|
||||
ret <4 x i16> %tmp3
|
||||
}
|
||||
|
||||
define <2 x i32> @vshrns32_bad(<2 x i64>* %A) nounwind {
|
||||
; CHECK-LABEL: vshrns32_bad:
|
||||
; CHECK: vshr.u64
|
||||
; CHECK: vmovn.i64
|
||||
%tmp1 = load <2 x i64>* %A
|
||||
%tmp2 = lshr <2 x i64> %tmp1, <i64 33, i64 33>
|
||||
%tmp3 = trunc <2 x i64> %tmp2 to <2 x i32>
|
||||
ret <2 x i32> %tmp3
|
||||
}
|
||||
|
||||
define <8 x i8> @vrshrns8(<8 x i16>* %A) nounwind {
|
||||
;CHECK-LABEL: vrshrns8:
|
||||
|
Loading…
x
Reference in New Issue
Block a user