From 97a62bf2a4a2d141aad8af3531c3b69934f134c1 Mon Sep 17 00:00:00 2001 From: Akira Hatanaka Date: Fri, 19 Apr 2013 23:21:32 +0000 Subject: [PATCH] [mips] Instruction selection patterns for DSP-ASE vector shifts. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179906 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsDSPInstrInfo.td | 36 ++++++++++--- lib/Target/Mips/MipsISelLowering.cpp | 3 ++ lib/Target/Mips/MipsISelLowering.h | 5 ++ lib/Target/Mips/MipsSEISelLowering.cpp | 63 ++++++++++++++++++++++ test/CodeGen/Mips/dsp-patterns.ll | 75 ++++++++++++++++++++++++++ 5 files changed, 176 insertions(+), 6 deletions(-) diff --git a/lib/Target/Mips/MipsDSPInstrInfo.td b/lib/Target/Mips/MipsDSPInstrInfo.td index 86983dd4bcf..6790a279839 100644 --- a/lib/Target/Mips/MipsDSPInstrInfo.td +++ b/lib/Target/Mips/MipsDSPInstrInfo.td @@ -26,6 +26,8 @@ def SDT_MipsShilo : SDTypeProfile<1, 2, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 2>, SDTCisVT<1, i32>]>; def SDT_MipsDPA : SDTypeProfile<1, 3, [SDTCisVT<0, untyped>, SDTCisSameAs<0, 3>, SDTCisVT<1, i32>, SDTCisSameAs<1, 2>]>; +def SDT_MipsSHIFT_DSP : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; class MipsDSPBase : SDNode; @@ -74,6 +76,9 @@ def MipsMADD_DSP : MipsDSPBase<"MADD_DSP", SDT_MipsDPA>; def MipsMADDU_DSP : MipsDSPBase<"MADDU_DSP", SDT_MipsDPA>; def MipsMSUB_DSP : MipsDSPBase<"MSUB_DSP", SDT_MipsDPA>; def MipsMSUBU_DSP : MipsDSPBase<"MSUBU_DSP", SDT_MipsDPA>; +def MipsSHLL_DSP : MipsDSPBase<"SHLL_DSP", SDT_MipsSHIFT_DSP>; +def MipsSHRA_DSP : MipsDSPBase<"SHRA_DSP", SDT_MipsSHIFT_DSP>; +def MipsSHRL_DSP : MipsDSPBase<"SHRL_DSP", SDT_MipsSHIFT_DSP>; // Flags. class UseAC { @@ -348,6 +353,7 @@ class SHLL_QB_R2_DESC_BASE Pattern = [(set RC:$rd, (OpNode RC:$rt, ImmPat:$rs_sa))]; InstrItinClass Itinerary = itin; list Defs = [DSPCtrl]; + bit hasSideEffects = 1; } class LX_DESC_BASE; class SHLLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shllv.qb", int_mips_shll_qb, NoItinerary, DSPRegs>; -class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", int_mips_shrl_qb, immZExt3, +class SHRL_QB_DESC : SHLL_QB_R2_DESC_BASE<"shrl.qb", null_frag, immZExt3, NoItinerary, DSPRegs>, ClearDefs; class SHRLV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.qb", int_mips_shrl_qb, NoItinerary, DSPRegs>, ClearDefs; -class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", int_mips_shll_ph, immZExt4, +class SHLL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll.ph", null_frag, immZExt4, NoItinerary, DSPRegs>; class SHLLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv.ph", int_mips_shll_ph, @@ -683,7 +689,7 @@ class SHLL_S_PH_DESC : SHLL_QB_R2_DESC_BASE<"shll_s.ph", int_mips_shll_s_ph, class SHLLV_S_PH_DESC : SHLL_QB_R3_DESC_BASE<"shllv_s.ph", int_mips_shll_s_ph, NoItinerary, DSPRegs>; -class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", int_mips_shra_ph, immZExt4, +class SHRA_PH_DESC : SHLL_QB_R2_DESC_BASE<"shra.ph", null_frag, immZExt4, NoItinerary, DSPRegs>, ClearDefs; class SHRAV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrav.ph", int_mips_shra_ph, @@ -1028,7 +1034,7 @@ class PRECR_SRA_R_PH_W_DESC : PRECR_SRA_PH_W_DESC_BASE<"precr_sra_r.ph.w", CPURegs>, ClearDefs; // Shift -class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", int_mips_shra_qb, immZExt3, +class SHRA_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra.qb", null_frag, immZExt3, NoItinerary, DSPRegs>, ClearDefs; class SHRAV_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav.qb", int_mips_shra_qb, @@ -1041,7 +1047,7 @@ class SHRA_R_QB_DESC : SHLL_QB_R2_DESC_BASE<"shra_r.qb", int_mips_shra_r_qb, class SHRAV_R_QB_DESC : SHLL_QB_R3_DESC_BASE<"shrav_r.qb", int_mips_shra_r_qb, NoItinerary, DSPRegs>, ClearDefs; -class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", int_mips_shrl_ph, immZExt4, +class SHRL_PH_DESC : SHLL_QB_R2_DESC_BASE<"shrl.ph", null_frag, immZExt4, NoItinerary, DSPRegs>, ClearDefs; class SHRLV_PH_DESC : SHLL_QB_R3_DESC_BASE<"shrlv.ph", int_mips_shrl_ph, @@ -1280,6 +1286,24 @@ def : DSPBinPat; def : DSPBinPat; def : DSPBinPat; +// Shift immediate patterns. +class DSPShiftPat : + DSPPat<(Node ValTy:$a, Imm:$shamt), (Inst ValTy:$a, Imm:$shamt), Pred>; + +def : DSPShiftPat; +def : DSPShiftPat; +def : DSPShiftPat; +def : DSPShiftPat; +def : DSPShiftPat; +def : DSPShiftPat; +def : DSPShiftPat; +def : DSPShiftPat; +def : DSPShiftPat; +def : DSPShiftPat; +def : DSPShiftPat; +def : DSPShiftPat; + // Extr patterns. class EXTR_W_TY1_R2_Pat : DSPPat<(i32 (OpNode CPURegs:$rs, ACRegsDSP:$ac)), diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 512eeef3834..9f136f43d24 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -197,6 +197,9 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::MADDU_DSP: return "MipsISD::MADDU_DSP"; case MipsISD::MSUB_DSP: return "MipsISD::MSUB_DSP"; case MipsISD::MSUBU_DSP: return "MipsISD::MSUBU_DSP"; + case MipsISD::SHLL_DSP: return "MipsISD::SHLL_DSP"; + case MipsISD::SHRA_DSP: return "MipsISD::SHRA_DSP"; + case MipsISD::SHRL_DSP: return "MipsISD::SHRL_DSP"; default: return NULL; } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 9e15b9a8767..e1e6055b397 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -143,6 +143,11 @@ namespace llvm { MSUB_DSP, MSUBU_DSP, + // DSP shift nodes. + SHLL_DSP, + SHRA_DSP, + SHRL_DSP, + // Load/Store Left/Right nodes. LWL = ISD::FIRST_TARGET_MEMORY_OPCODE, LWR, diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index a4fcffbf519..efadaaa1f72 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -52,6 +52,10 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setOperationAction(ISD::STORE, VecTys[i], Legal); setOperationAction(ISD::BITCAST, VecTys[i], Legal); } + + setTargetDAGCombine(ISD::SHL); + setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SRL); } if (Subtarget->hasDSPR2()) @@ -314,6 +318,59 @@ static SDValue performSUBECombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue performDSPShiftCombine(unsigned Opc, SDNode *N, EVT Ty, + SelectionDAG &DAG, + const MipsSubtarget *Subtarget) { + // See if this is a vector splat immediate node. + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + unsigned EltSize = Ty.getVectorElementType().getSizeInBits(); + BuildVectorSDNode *BV = dyn_cast(N->getOperand(1)); + + if (!BV || !BV->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, EltSize, + !Subtarget->isLittle())) + return SDValue(); + + return DAG.getNode(Opc, N->getDebugLoc(), Ty, N->getOperand(0), + DAG.getConstant(SplatValue.getZExtValue(), MVT::i32)); +} + +static SDValue performSHLCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + EVT Ty = N->getValueType(0); + + if ((Ty != MVT::v2i16) && (Ty != MVT::v4i8)) + return SDValue(); + + return performDSPShiftCombine(MipsISD::SHLL_DSP, N, Ty, DAG, Subtarget); +} + +static SDValue performSRACombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + EVT Ty = N->getValueType(0); + + if ((Ty != MVT::v2i16) && ((Ty != MVT::v4i8) || !Subtarget->hasDSPR2())) + return SDValue(); + + return performDSPShiftCombine(MipsISD::SHRA_DSP, N, Ty, DAG, Subtarget); +} + + +static SDValue performSRLCombine(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI, + const MipsSubtarget *Subtarget) { + EVT Ty = N->getValueType(0); + + if (((Ty != MVT::v2i16) || !Subtarget->hasDSPR2()) && (Ty != MVT::v4i8)) + return SDValue(); + + return performDSPShiftCombine(MipsISD::SHRL_DSP, N, Ty, DAG, Subtarget); +} + SDValue MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { SelectionDAG &DAG = DCI.DAG; @@ -323,6 +380,12 @@ MipsSETargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { return performADDECombine(N, DAG, DCI, Subtarget); case ISD::SUBE: return performSUBECombine(N, DAG, DCI, Subtarget); + case ISD::SHL: + return performSHLCombine(N, DAG, DCI, Subtarget); + case ISD::SRA: + return performSRACombine(N, DAG, DCI, Subtarget); + case ISD::SRL: + return performSRLCombine(N, DAG, DCI, Subtarget); default: return MipsTargetLowering::PerformDAGCombine(N, DCI); } diff --git a/test/CodeGen/Mips/dsp-patterns.ll b/test/CodeGen/Mips/dsp-patterns.ll index 8fb86c23895..c2aeab5c10b 100644 --- a/test/CodeGen/Mips/dsp-patterns.ll +++ b/test/CodeGen/Mips/dsp-patterns.ll @@ -128,3 +128,78 @@ entry: ret i64 %add } +; R1: shift1_v2i16_shl_: +; R1: shll.ph ${{[0-9]+}}, ${{[0-9]+}}, 15 + +define { i32 } @shift1_v2i16_shl_(i32 %a0.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %shl = shl <2 x i16> %0, + %1 = bitcast <2 x i16> %shl to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + +; R1: shift1_v2i16_sra_: +; R1: shra.ph ${{[0-9]+}}, ${{[0-9]+}}, 15 + +define { i32 } @shift1_v2i16_sra_(i32 %a0.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %shr = ashr <2 x i16> %0, + %1 = bitcast <2 x i16> %shr to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + +; R1: shift1_v2ui16_srl_: +; R1-NOT: shrl.ph +; R2: shift1_v2ui16_srl_: +; R2: shrl.ph ${{[0-9]+}}, ${{[0-9]+}}, 15 + +define { i32 } @shift1_v2ui16_srl_(i32 %a0.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <2 x i16> + %shr = lshr <2 x i16> %0, + %1 = bitcast <2 x i16> %shr to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + +; R1: shift1_v4i8_shl_: +; R1: shll.qb ${{[0-9]+}}, ${{[0-9]+}}, 7 + +define { i32 } @shift1_v4i8_shl_(i32 %a0.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %shl = shl <4 x i8> %0, + %1 = bitcast <4 x i8> %shl to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + +; R1: shift1_v4i8_sra_: +; R1-NOT: shra.qb +; R2: shift1_v4i8_sra_: +; R2: shra.qb ${{[0-9]+}}, ${{[0-9]+}}, 7 + +define { i32 } @shift1_v4i8_sra_(i32 %a0.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %shr = ashr <4 x i8> %0, + %1 = bitcast <4 x i8> %shr to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +} + +; R1: shift1_v4ui8_srl_: +; R1: shrl.qb ${{[0-9]+}}, ${{[0-9]+}}, 7 + +define { i32 } @shift1_v4ui8_srl_(i32 %a0.coerce) { +entry: + %0 = bitcast i32 %a0.coerce to <4 x i8> + %shr = lshr <4 x i8> %0, + %1 = bitcast <4 x i8> %shr to i32 + %.fca.0.insert = insertvalue { i32 } undef, i32 %1, 0 + ret { i32 } %.fca.0.insert +}