From da521cc1cc733ee1c27b00e4c0e365c8b702e2e0 Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Mon, 23 Sep 2013 12:02:46 +0000 Subject: [PATCH] [mips][msa] Implemented build_vector using ldi, fill, and custom SelectionDAG nodes (VSPLAT and VSPLATD) Note: There's a later patch on my branch that re-implements this to select build_vector without the custom SelectionDAG nodes. The future patch avoids the constant-folding problems stemming from the custom node (i.e. it doesn't need to re-implement all the DAG combines related to BUILD_VECTOR). Changes to MIPS specific SelectionDAG nodes: * Added VSPLAT This is a special case of BUILD_VECTOR that covers the case the BUILD_VECTOR is a splat operation. * Added VSPLATD This is a special case of VSPLAT that handles the cases when v2i64 is legal git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191191 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/Mips/MipsISelLowering.cpp | 2 + lib/Target/Mips/MipsISelLowering.h | 8 +- lib/Target/Mips/MipsMSAInstrInfo.td | 26 ++-- lib/Target/Mips/MipsSEISelLowering.cpp | 109 ++++++++++++++++ lib/Target/Mips/MipsSEISelLowering.h | 5 + test/CodeGen/Mips/msa/basic_operations.ll | 120 ++++++++++++++++++ .../Mips/msa/basic_operations_float.ll | 57 +++++++++ 7 files changed, 316 insertions(+), 11 deletions(-) create mode 100644 test/CodeGen/Mips/msa/basic_operations.ll create mode 100644 test/CodeGen/Mips/msa/basic_operations_float.ll diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp index 220955ea662..21c5edb39cb 100644 --- a/lib/Target/Mips/MipsISelLowering.cpp +++ b/lib/Target/Mips/MipsISelLowering.cpp @@ -212,6 +212,8 @@ const char *MipsTargetLowering::getTargetNodeName(unsigned Opcode) const { case MipsISD::VANY_ZERO: return "MipsISD::VANY_ZERO"; case MipsISD::VALL_NONZERO: return "MipsISD::VALL_NONZERO"; case MipsISD::VANY_NONZERO: return "MipsISD::VANY_NONZERO"; + case MipsISD::VSPLAT: return "MipsISD::VSPLAT"; + case MipsISD::VSPLATD: return "MipsISD::VSPLATD"; default: return NULL; } } diff --git a/lib/Target/Mips/MipsISelLowering.h b/lib/Target/Mips/MipsISelLowering.h index 85aa162fb41..57b5603ac2d 100644 --- a/lib/Target/Mips/MipsISelLowering.h +++ b/lib/Target/Mips/MipsISelLowering.h @@ -152,12 +152,18 @@ namespace llvm { SETCC_DSP, SELECT_CC_DSP, - // Vector comparisons + // Vector comparisons. VALL_ZERO, VANY_ZERO, VALL_NONZERO, VANY_NONZERO, + // Special case of BUILD_VECTOR where all elements are the same. + VSPLAT, + // Special case of VSPLAT where the result is v2i64, the operand is + // constant, and the operand fits in a signed 10-bits value. + VSPLATD, + // Load/Store Left/Right nodes. LWL = ISD::FIRST_TARGET_MEMORY_OPCODE, LWR, diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index d4dcbd1f5e1..68b835e0572 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -11,12 +11,20 @@ // //===----------------------------------------------------------------------===// +def SDT_MipsSplat : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisInt<1>]>; def SDT_MipsVecCond : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisVec<1>]>; def MipsVAllNonZero : SDNode<"MipsISD::VALL_NONZERO", SDT_MipsVecCond>; def MipsVAnyNonZero : SDNode<"MipsISD::VANY_NONZERO", SDT_MipsVecCond>; def MipsVAllZero : SDNode<"MipsISD::VALL_ZERO", SDT_MipsVecCond>; def MipsVAnyZero : SDNode<"MipsISD::VANY_ZERO", SDT_MipsVecCond>; +def MipsVSplat : SDNode<"MipsISD::VSPLAT", SDT_MipsSplat>; +def MipsVSplatD : SDNode<"MipsISD::VSPLATD", SDT_MipsSplat>; + +def vsplati8 : PatFrag<(ops node:$in), (v16i8 (MipsVSplat (i32 node:$in)))>; +def vsplati16 : PatFrag<(ops node:$in), (v8i16 (MipsVSplat (i32 node:$in)))>; +def vsplati32 : PatFrag<(ops node:$in), (v4i32 (MipsVSplat (i32 node:$in)))>; +def vsplati64 : PatFrag<(ops node:$in), (v2i64 (MipsVSplatD (i32 node:$in)))>; // Immediates def immSExt5 : ImmLeaf(Imm);}]>; @@ -1383,12 +1391,9 @@ class FFQR_W_DESC : MSA_2RF_DESC_BASE<"ffqr.w", int_mips_ffqr_w, class FFQR_D_DESC : MSA_2RF_DESC_BASE<"ffqr.d", int_mips_ffqr_d, MSA128D, MSA128W>; -class FILL_B_DESC : MSA_2R_DESC_BASE<"fill.b", int_mips_fill_b, - MSA128B, GPR32>; -class FILL_H_DESC : MSA_2R_DESC_BASE<"fill.h", int_mips_fill_h, - MSA128H, GPR32>; -class FILL_W_DESC : MSA_2R_DESC_BASE<"fill.w", int_mips_fill_w, - MSA128W, GPR32>; +class FILL_B_DESC : MSA_2R_DESC_BASE<"fill.b", vsplati8, MSA128B, GPR32>; +class FILL_H_DESC : MSA_2R_DESC_BASE<"fill.h", vsplati16, MSA128H, GPR32>; +class FILL_W_DESC : MSA_2R_DESC_BASE<"fill.w", vsplati32, MSA128W, GPR32>; class FLOG2_W_DESC : MSA_2RF_DESC_BASE<"flog2.w", flog2, MSA128W>; class FLOG2_D_DESC : MSA_2RF_DESC_BASE<"flog2.d", flog2, MSA128D>; @@ -1573,10 +1578,10 @@ class LD_H_DESC : LD_DESC_BASE<"ld.h", load, v8i16, MSA128H>; class LD_W_DESC : LD_DESC_BASE<"ld.w", load, v4i32, MSA128W>; class LD_D_DESC : LD_DESC_BASE<"ld.d", load, v2i64, MSA128D>; -class LDI_B_DESC : MSA_I10_DESC_BASE<"ldi.b", int_mips_ldi_b, MSA128B>; -class LDI_H_DESC : MSA_I10_DESC_BASE<"ldi.h", int_mips_ldi_h, MSA128H>; -class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", int_mips_ldi_w, MSA128W>; -class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", int_mips_ldi_d, MSA128D>; +class LDI_B_DESC : MSA_I10_DESC_BASE<"ldi.b", vsplati8, MSA128B>; +class LDI_H_DESC : MSA_I10_DESC_BASE<"ldi.h", vsplati16, MSA128H>; +class LDI_W_DESC : MSA_I10_DESC_BASE<"ldi.w", vsplati32, MSA128W>; +class LDI_D_DESC : MSA_I10_DESC_BASE<"ldi.d", vsplati64, MSA128D>; class LDX_DESC_BASEgetValueType(0); + assert(VT.isVector() && "Expected a vector type"); + + unsigned int nOps = N->getNumOperands(); + assert(nOps > 1 && "isSplat has 0 or 1 sized build vector"); + + SDValue Operand0 = N->getOperand(0); + + for (unsigned int i = 1; i < nOps; ++i) { + if (N->getOperand(i) != Operand0) + return false; + } + + return true; +} + +// Lowers ISD::BUILD_VECTOR into appropriate SelectionDAG nodes for the +// backend. +// +// Lowers according to the following rules: +// - Vectors of 128-bits may be legal subject to the other rules. Other sizes +// are not legal. +// - Non-constant splats are legal and are lowered to MipsISD::VSPLAT. +// - Constant splats with an element size of 32-bits or less are legal and are +// lowered to MipsISD::VSPLAT. +// - Constant splats with an element size of 64-bits but whose value would fit +// within a 10 bit immediate are legal and are lowered to MipsISD::VSPLATD. +// - All other ISD::BUILD_VECTORS are not legal +SDValue MipsSETargetLowering::lowerBUILD_VECTOR(SDValue Op, + SelectionDAG &DAG) const { + BuildVectorSDNode *Node = cast(Op); + EVT ResTy = Op->getValueType(0); + SDLoc DL(Op); + APInt SplatValue, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + if (!Subtarget->hasMSA() || !ResTy.is128BitVector()) + return SDValue(); + + if (Node->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs, 8, + !Subtarget->isLittle())) { + SDValue Result; + EVT TmpVecTy; + EVT ConstTy = MVT::i32; + unsigned SplatOp = MipsISD::VSPLAT; + + switch (SplatBitSize) { + default: + return SDValue(); + case 64: + TmpVecTy = MVT::v2i64; + + // i64 is an illegal type on Mips32, but if it the constant fits into a + // signed 10-bit value then we can still handle it using VSPLATD and an + // i32 constant + if (HasMips64) + ConstTy = MVT::i64; + else if (isInt<10>(SplatValue.getSExtValue())) { + SplatValue = SplatValue.trunc(32); + SplatOp = MipsISD::VSPLATD; + } else + return SDValue(); + break; + case 32: + TmpVecTy = MVT::v4i32; + break; + case 16: + TmpVecTy = MVT::v8i16; + SplatValue = SplatValue.sext(32); + break; + case 8: + TmpVecTy = MVT::v16i8; + SplatValue = SplatValue.sext(32); + break; + } + + Result = DAG.getNode(SplatOp, DL, TmpVecTy, + DAG.getConstant(SplatValue, ConstTy)); + if (ResTy != Result.getValueType()) + Result = DAG.getNode(ISD::BITCAST, DL, ResTy, Result); + + return Result; + } + else if (isSplatVector(Node)) + return DAG.getNode(MipsISD::VSPLAT, DL, ResTy, Op->getOperand(0)); + + return SDValue(); +} + MachineBasicBlock * MipsSETargetLowering:: emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const{ // $bb: diff --git a/lib/Target/Mips/MipsSEISelLowering.h b/lib/Target/Mips/MipsSEISelLowering.h index 016d4adbf73..909ab7dfde1 100644 --- a/lib/Target/Mips/MipsSEISelLowering.h +++ b/lib/Target/Mips/MipsSEISelLowering.h @@ -22,7 +22,11 @@ namespace llvm { public: explicit MipsSETargetLowering(MipsTargetMachine &TM); + /// \brief Enable MSA support for the given integer type and Register + /// class. void addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); + /// \brief Enable MSA support for the given floating-point type and + /// Register class. void addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC); @@ -69,6 +73,7 @@ namespace llvm { SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue lowerINTRINSIC_VOID(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; MachineBasicBlock *emitBPOSGE32(MachineInstr *MI, MachineBasicBlock *BB) const; diff --git a/test/CodeGen/Mips/msa/basic_operations.ll b/test/CodeGen/Mips/msa/basic_operations.ll new file mode 100644 index 00000000000..4cec6aa28aa --- /dev/null +++ b/test/CodeGen/Mips/msa/basic_operations.ll @@ -0,0 +1,120 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck -check-prefix=MIPS32 %s + +@v16i8 = global <16 x i8> +@v8i16 = global <8 x i16> +@v4i32 = global <4 x i32> +@v2i64 = global <2 x i64> +@i64 = global i64 0 + +define void @const_v16i8() nounwind { + ; MIPS32: const_v16i8: + + store volatile <16 x i8> , <16 x i8>*@v16i8 + ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 0 + + store volatile <16 x i8> , <16 x i8>*@v16i8 + ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 1 + + store volatile <16 x i8> , <16 x i8>*@v16i8 + ; MIPS32: ld.b [[R1:\$w[0-9]+]], %lo( + + store volatile <16 x i8> , <16 x i8>*@v16i8 + ; MIPS32: ld.b [[R1:\$w[0-9]+]], %lo( + + store volatile <16 x i8> , <16 x i8>*@v16i8 + ; MIPS32: ldi.h [[R1:\$w[0-9]+]], 258 + + store volatile <16 x i8> , <16 x i8>*@v16i8 + ; MIPS32-DAG: lui [[R2:\$[0-9]+]], 258 + ; MIPS32-DAG: ori [[R2]], [[R2]], 772 + ; MIPS32-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]] + + store volatile <16 x i8> , <16 x i8>*@v16i8 + ; MIPS32: ld.b [[R1:\$w[0-9]+]], %lo( + + ret void + ; MIPS32: .size const_v16i8 +} + +define void @const_v8i16() nounwind { + ; MIPS32: const_v8i16: + + store volatile <8 x i16> , <8 x i16>*@v8i16 + ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 0 + + store volatile <8 x i16> , <8 x i16>*@v8i16 + ; MIPS32: ldi.h [[R1:\$w[0-9]+]], 1 + + store volatile <8 x i16> , <8 x i16>*@v8i16 + ; MIPS32: ld.h [[R1:\$w[0-9]+]], %lo( + + store volatile <8 x i16> , <8 x i16>*@v8i16 + ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 4 + + store volatile <8 x i16> , <8 x i16>*@v8i16 + ; MIPS32-DAG: lui [[R2:\$[0-9]+]], 1 + ; MIPS32-DAG: ori [[R2]], [[R2]], 2 + ; MIPS32-DAG: fill.w [[R1:\$w[0-9]+]], [[R2]] + + store volatile <8 x i16> , <8 x i16>*@v8i16 + ; MIPS32: ld.h [[R1:\$w[0-9]+]], %lo( + + ret void + ; MIPS32: .size const_v8i16 +} + +define void @const_v4i32() nounwind { + ; MIPS32: const_v4i32: + + store volatile <4 x i32> , <4 x i32>*@v4i32 + ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 0 + + store volatile <4 x i32> , <4 x i32>*@v4i32 + ; MIPS32: ldi.w [[R1:\$w[0-9]+]], 1 + + store volatile <4 x i32> , <4 x i32>*@v4i32 + ; MIPS32: ld.w [[R1:\$w[0-9]+]], %lo( + + store volatile <4 x i32> , <4 x i32>*@v4i32 + ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 1 + + store volatile <4 x i32> , <4 x i32>*@v4i32 + ; MIPS32: ldi.h [[R1:\$w[0-9]+]], 1 + + store volatile <4 x i32> , <4 x i32>*@v4i32 + ; MIPS32: ld.w [[R1:\$w[0-9]+]], %lo( + + store volatile <4 x i32> , <4 x i32>*@v4i32 + ; MIPS32: ld.w [[R1:\$w[0-9]+]], %lo( + + ret void + ; MIPS32: .size const_v4i32 +} + +define void @const_v2i64() nounwind { + ; MIPS32: const_v2i64: + + store volatile <2 x i64> , <2 x i64>*@v2i64 + ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 0 + + store volatile <2 x i64> , <2 x i64>*@v2i64 + ; MIPS32: ldi.b [[R1:\$w[0-9]+]], 1 + + store volatile <2 x i64> , <2 x i64>*@v2i64 + ; MIPS32: ldi.h [[R1:\$w[0-9]+]], 1 + + store volatile <2 x i64> , <2 x i64>*@v2i64 + ; MIPS32: ldi.w [[R1:\$w[0-9]+]], 1 + + store volatile <2 x i64> , <2 x i64>*@v2i64 + ; MIPS32: ldi.d [[R1:\$w[0-9]+]], 1 + + store volatile <2 x i64> , <2 x i64>*@v2i64 + ; MIPS32: ld.w [[R1:\$w[0-9]+]], %lo( + + store volatile <2 x i64> , <2 x i64>*@v2i64 + ; MIPS32: ld.w [[R1:\$w[0-9]+]], %lo( + + ret void + ; MIPS32: .size const_v2i64 +} diff --git a/test/CodeGen/Mips/msa/basic_operations_float.ll b/test/CodeGen/Mips/msa/basic_operations_float.ll new file mode 100644 index 00000000000..19213758773 --- /dev/null +++ b/test/CodeGen/Mips/msa/basic_operations_float.ll @@ -0,0 +1,57 @@ +; RUN: llc -march=mips -mattr=+msa < %s | FileCheck -check-prefix=MIPS32 %s + +@v4f32 = global <4 x float> +@v2f64 = global <2 x double> + +define void @const_v4f32() nounwind { + ; MIPS32: const_v4f32: + + store volatile <4 x float> , <4 x float>*@v4f32 + ; MIPS32: ld.w [[R1:\$w[0-9]+]], %lo( + + store volatile <4 x float> , <4 x float>*@v4f32 + ; MIPS32: ld.w [[R1:\$w[0-9]+]], %lo( + + store volatile <4 x float> , <4 x float>*@v4f32 + ; MIPS32: ld.w [[R1:\$w[0-9]+]], %lo( + + store volatile <4 x float> , <4 x float>*@v4f32 + ; MIPS32: ld.w [[R1:\$w[0-9]+]], %lo( + + store volatile <4 x float> , <4 x float>*@v4f32 + ; MIPS32: ld.w [[R1:\$w[0-9]+]], %lo( + + store volatile <4 x float> , <4 x float>*@v4f32 + ; MIPS32: ld.w [[R1:\$w[0-9]+]], %lo( + + ret void + ; MIPS32: .size const_v4f32 +} + +define void @const_v2f64() nounwind { + ; MIPS32: const_v2f64: + + store volatile <2 x double> , <2 x double>*@v2f64 + ; MIPS32: ld.d [[R1:\$w[0-9]+]], %lo( + + store volatile <2 x double> , <2 x double>*@v2f64 + ; MIPS32: ld.d [[R1:\$w[0-9]+]], %lo( + + store volatile <2 x double> , <2 x double>*@v2f64 + ; MIPS32: ld.d [[R1:\$w[0-9]+]], %lo( + + store volatile <2 x double> , <2 x double>*@v2f64 + ; MIPS32: ld.d [[R1:\$w[0-9]+]], %lo( + + store volatile <2 x double> , <2 x double>*@v2f64 + ; MIPS32: ld.d [[R1:\$w[0-9]+]], %lo( + + store volatile <2 x double> , <2 x double>*@v2f64 + ; MIPS32: ld.d [[R1:\$w[0-9]+]], %lo( + + store volatile <2 x double> , <2 x double>*@v2f64 + ; MIPS32: ld.d [[R1:\$w[0-9]+]], %lo( + + ret void + ; MIPS32: .size const_v2f64 +}