From 6ff1ef9931b50763a40e9ae8696cfab9e25cf4de Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Wed, 30 Oct 2013 14:45:14 +0000 Subject: [PATCH] [mips][msa] Added support for matching bins[lr]i.[bhwd] from normal IR (i.e. not intrinsics) This required correcting the definition of the bins[lr]i intrinsics because the result is also the first operand. It also required removing the (arbitrary) check for 32-bit immediates in MipsSEDAGToDAGISel::selectVSplat(). Currently using binsli.d with 2 bits set in the mask doesn't select binsli.d because the constant is legalized into a ConstantPool. Similar things can happen with binsri.d with more than 10 bits set in the mask. The resulting code when this happens is correct but not optimal. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@193687 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsMips.td | 24 ++- lib/Target/Mips/MSA.txt | 9 +- lib/Target/Mips/MipsISelDAGToDAG.cpp | 10 ++ lib/Target/Mips/MipsISelDAGToDAG.h | 6 + lib/Target/Mips/MipsMSAInstrInfo.td | 61 ++++++-- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 74 ++++++++-- lib/Target/Mips/MipsSEISelDAGToDAG.h | 6 + lib/Target/Mips/MipsSEISelLowering.cpp | 24 +++ test/CodeGen/Mips/msa/bitwise.ll | 104 +++++++++++-- test/CodeGen/Mips/msa/i5-b.ll | 196 ++++++++++++++++--------- 10 files changed, 395 insertions(+), 119 deletions(-) diff --git a/include/llvm/IR/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td index 4118f3e710d..6276646be55 100644 --- a/include/llvm/IR/IntrinsicsMips.td +++ b/include/llvm/IR/IntrinsicsMips.td @@ -575,13 +575,17 @@ def int_mips_binsl_d : GCCBuiltin<"__builtin_msa_binsl_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_binsli_b : GCCBuiltin<"__builtin_msa_binsli_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_mips_binsli_h : GCCBuiltin<"__builtin_msa_binsli_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem]>; def int_mips_binsli_w : GCCBuiltin<"__builtin_msa_binsli_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem]>; def int_mips_binsli_d : GCCBuiltin<"__builtin_msa_binsli_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem]>; def int_mips_binsr_b : GCCBuiltin<"__builtin_msa_binsr_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; @@ -593,13 +597,17 @@ def int_mips_binsr_d : GCCBuiltin<"__builtin_msa_binsr_d">, Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty], [IntrNoMem]>; def int_mips_binsri_b : GCCBuiltin<"__builtin_msa_binsri_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_mips_binsri_h : GCCBuiltin<"__builtin_msa_binsri_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i32_ty], + [IntrNoMem]>; def int_mips_binsri_w : GCCBuiltin<"__builtin_msa_binsri_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty], + [IntrNoMem]>; def int_mips_binsri_d : GCCBuiltin<"__builtin_msa_binsri_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], + [IntrNoMem]>; def int_mips_bmnz_v : GCCBuiltin<"__builtin_msa_bmnz_v">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; diff --git a/lib/Target/Mips/MSA.txt b/lib/Target/Mips/MSA.txt index f7c42c06cec..802217a8865 100644 --- a/lib/Target/Mips/MSA.txt +++ b/lib/Target/Mips/MSA.txt @@ -43,7 +43,14 @@ splati.w: It is not possible to emit splati.w since shf.w covers the same cases. shf.w will be emitted instead. -copy_s.w +copy_s.w: On MIPS32, the copy_u.d intrinsic will emit this instruction instead of copy_u.w. This is semantically equivalent since the general-purpose register file is 32-bits wide. + +binsri.[bhwd], binsli.[bhwd]: + These two operations are equivalent to each other with the operands + swapped and condition inverted. The compiler may use either one as + appropriate. + Furthermore, the compiler may use bsel.[bhwd] for some masks that do + not survive the legalization process (this is a bug and will be fixed). diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp index 0d239fd46db..d0a41e755ee 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp @@ -149,6 +149,16 @@ bool MipsDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const { return false; } +bool MipsDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + +bool MipsDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { + llvm_unreachable("Unimplemented function."); + return false; +} + /// Select instructions not customized! Used for /// expanded, promoted and normal instructions SDNode* MipsDAGToDAGISel::Select(SDNode *Node) { diff --git a/lib/Target/Mips/MipsISelDAGToDAG.h b/lib/Target/Mips/MipsISelDAGToDAG.h index e5695c41c43..6bc96446d36 100644 --- a/lib/Target/Mips/MipsISelDAGToDAG.h +++ b/lib/Target/Mips/MipsISelDAGToDAG.h @@ -96,6 +96,12 @@ private: virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const; /// \brief Select constant vector splats whose value is a power of 2. virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value is a run of set bits + /// ending at the most significant bit + virtual bool selectVSplatMaskL(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value is a run of set bits + /// starting at bit zero. + virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; virtual SDNode *Select(SDNode *N); diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index 65d85e34ee2..a393fdc45e4 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -315,6 +315,18 @@ def vsplati64_simm5 : SplatComplexPattern; +// Any build_vector that is a constant splat with only a consecutive sequence +// of left-most bits set. +def vsplat_maskl_bits : SplatComplexPattern; + +// Any build_vector that is a constant splat with only a consecutive sequence +// of right-most bits set. +def vsplat_maskr_bits : SplatComplexPattern; + def fms : PatFrag<(ops node:$wd, node:$ws, node:$wt), (fsub node:$wd, (fmul node:$ws, node:$wt))>; @@ -1079,6 +1091,31 @@ class MSA_BIT_D_DESC_BASE { + dag OutOperandList = (outs ROWD:$wd); + dag InOperandList = (ins ROWD:$wd_in, ROWS:$ws, vsplat_uimm8:$m); + string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); + list Pattern = [(set ROWD:$wd, (vselect (Ty Mask:$m), (Ty ROWD:$wd_in), + ROWS:$ws))]; + InstrItinClass Itinerary = itin; + string Constraints = "$wd = $wd_in"; +} + +class MSA_BIT_BINSLI_DESC_BASE : + MSA_BIT_BINSXI_DESC_BASE; + +class MSA_BIT_BINSRI_DESC_BASE : + MSA_BIT_BINSXI_DESC_BASE; + class MSA_BIT_SPLAT_DESC_BASE; class BINSL_W_DESC : MSA_3R_DESC_BASE<"binsl.w", int_mips_binsl_w, MSA128WOpnd>; class BINSL_D_DESC : MSA_3R_DESC_BASE<"binsl.d", int_mips_binsl_d, MSA128DOpnd>; -class BINSLI_B_DESC : MSA_BIT_B_DESC_BASE<"binsli.b", int_mips_binsli_b, - MSA128BOpnd>; -class BINSLI_H_DESC : MSA_BIT_H_DESC_BASE<"binsli.h", int_mips_binsli_h, - MSA128HOpnd>; -class BINSLI_W_DESC : MSA_BIT_W_DESC_BASE<"binsli.w", int_mips_binsli_w, - MSA128WOpnd>; -class BINSLI_D_DESC : MSA_BIT_D_DESC_BASE<"binsli.d", int_mips_binsli_d, - MSA128DOpnd>; +class BINSLI_B_DESC : MSA_BIT_BINSLI_DESC_BASE<"binsli.b", v16i8, MSA128BOpnd>; +class BINSLI_H_DESC : MSA_BIT_BINSLI_DESC_BASE<"binsli.h", v8i16, MSA128HOpnd>; +class BINSLI_W_DESC : MSA_BIT_BINSLI_DESC_BASE<"binsli.w", v4i32, MSA128WOpnd>; +class BINSLI_D_DESC : MSA_BIT_BINSLI_DESC_BASE<"binsli.d", v2i64, MSA128DOpnd>; class BINSR_B_DESC : MSA_3R_DESC_BASE<"binsr.b", int_mips_binsr_b, MSA128BOpnd>; class BINSR_H_DESC : MSA_3R_DESC_BASE<"binsr.h", int_mips_binsr_h, MSA128HOpnd>; class BINSR_W_DESC : MSA_3R_DESC_BASE<"binsr.w", int_mips_binsr_w, MSA128WOpnd>; class BINSR_D_DESC : MSA_3R_DESC_BASE<"binsr.d", int_mips_binsr_d, MSA128DOpnd>; -class BINSRI_B_DESC : MSA_BIT_B_DESC_BASE<"binsri.b", int_mips_binsri_b, - MSA128BOpnd>; -class BINSRI_H_DESC : MSA_BIT_H_DESC_BASE<"binsri.h", int_mips_binsri_h, - MSA128HOpnd>; -class BINSRI_W_DESC : MSA_BIT_W_DESC_BASE<"binsri.w", int_mips_binsri_w, - MSA128WOpnd>; -class BINSRI_D_DESC : MSA_BIT_D_DESC_BASE<"binsri.d", int_mips_binsri_d, - MSA128DOpnd>; +class BINSRI_B_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.b", v16i8, MSA128BOpnd>; +class BINSRI_H_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.h", v8i16, MSA128HOpnd>; +class BINSRI_W_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.w", v4i32, MSA128WOpnd>; +class BINSRI_D_DESC : MSA_BIT_BINSRI_DESC_BASE<"binsri.d", v2i64, MSA128DOpnd>; class BMNZ_V_DESC : MSA_VEC_DESC_BASE<"bmnz.v", int_mips_bmnz_v, MSA128BOpnd>; diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp index 0a82a3a83fb..93e7bdf38a6 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.cpp +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.cpp @@ -379,12 +379,6 @@ bool MipsSEDAGToDAGISel::selectIntAddrMM(SDValue Addr, SDValue &Base, // Returns true and sets Imm if: // * MSA is enabled // * N is a ISD::BUILD_VECTOR representing a constant splat -// * The splat value fits in a signed 32-bit value. -// -// That last requirement isn't strictly a requirement of the instruction set -// but it simplifies the callers by allowing them to assume they don't have to -// handle 64-bit values. The callers will also be placing stricter requirements -// on the immediates so this doesn't prohibit selection of legal immediates. bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const { if (!Subtarget.hasMSA()) return false; @@ -403,10 +397,6 @@ bool MipsSEDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm) const { !Subtarget.isLittle())) return false; - // None of the immediate forms can handle more than 32 bits - if (!SplatValue.isIntN(32)) - return false; - Imm = SplatValue; return true; @@ -525,6 +515,70 @@ bool MipsSEDAGToDAGISel::selectVSplatUimmPow2(SDValue N, SDValue &Imm) const { return false; } +// Select constant vector splats whose value only has a consecutive sequence +// of left-most bits set (e.g. 0b11...1100...00). +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a consecutive sequence of left-most bits. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +bool MipsSEDAGToDAGISel::selectVSplatMaskL(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + // Extract the run of set bits starting with bit zero from the bitwise + // inverse of ImmValue, and test that the inverse of this is the same + // as the original value. + if (ImmValue == ~(~ImmValue & ~(~ImmValue + 1))) { + + Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), EltTy); + return true; + } + } + + return false; +} + +// Select constant vector splats whose value only has a consecutive sequence +// of right-most bits set (e.g. 0b00...0011...11). +// +// In addition to the requirements of selectVSplat(), this function returns +// true and sets Imm if: +// * The splat value is the same width as the elements of the vector +// * The splat value is a consecutive sequence of right-most bits. +// +// This function looks through ISD::BITCAST nodes. +// TODO: This might not be appropriate for big-endian MSA since BITCAST is +// sometimes a shuffle in big-endian mode. +bool MipsSEDAGToDAGISel::selectVSplatMaskR(SDValue N, SDValue &Imm) const { + APInt ImmValue; + EVT EltTy = N->getValueType(0).getVectorElementType(); + + if (N->getOpcode() == ISD::BITCAST) + N = N->getOperand(0); + + if (selectVSplat(N.getNode(), ImmValue) && + ImmValue.getBitWidth() == EltTy.getSizeInBits()) { + // Extract the run of set bits starting with bit zero, and test that the + // result is the same as the original value + if (ImmValue == (ImmValue & ~(ImmValue + 1))) { + Imm = CurDAG->getTargetConstant(ImmValue.countPopulation(), EltTy); + return true; + } + } + + return false; +} + std::pair MipsSEDAGToDAGISel::selectNode(SDNode *Node) { unsigned Opcode = Node->getOpcode(); SDLoc DL(Node); diff --git a/lib/Target/Mips/MipsSEISelDAGToDAG.h b/lib/Target/Mips/MipsSEISelDAGToDAG.h index 759d3afc373..851fba0e19c 100644 --- a/lib/Target/Mips/MipsSEISelDAGToDAG.h +++ b/lib/Target/Mips/MipsSEISelDAGToDAG.h @@ -81,6 +81,12 @@ private: virtual bool selectVSplatSimm5(SDValue N, SDValue &Imm) const; /// \brief Select constant vector splats whose value is a power of 2. virtual bool selectVSplatUimmPow2(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value is a run of set bits + /// ending at the most significant bit + virtual bool selectVSplatMaskL(SDValue N, SDValue &Imm) const; + /// \brief Select constant vector splats whose value is a run of set bits + /// starting at bit zero. + virtual bool selectVSplatMaskR(SDValue N, SDValue &Imm) const; virtual std::pair selectNode(SDNode *Node); diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index d858e2aac5a..9c543330cc1 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -1260,6 +1260,30 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, case Intrinsic::mips_andi_b: return DAG.getNode(ISD::AND, DL, Op->getValueType(0), Op->getOperand(1), lowerMSASplatImm(Op, 2, DAG)); + case Intrinsic::mips_binsli_b: + case Intrinsic::mips_binsli_h: + case Intrinsic::mips_binsli_w: + case Intrinsic::mips_binsli_d: { + EVT VecTy = Op->getValueType(0); + EVT EltTy = VecTy.getVectorElementType(); + APInt Mask = APInt::getHighBitsSet(EltTy.getSizeInBits(), + Op->getConstantOperandVal(3)); + return DAG.getNode(ISD::VSELECT, DL, VecTy, + DAG.getConstant(Mask, VecTy, true), Op->getOperand(1), + Op->getOperand(2)); + } + case Intrinsic::mips_binsri_b: + case Intrinsic::mips_binsri_h: + case Intrinsic::mips_binsri_w: + case Intrinsic::mips_binsri_d: { + EVT VecTy = Op->getValueType(0); + EVT EltTy = VecTy.getVectorElementType(); + APInt Mask = APInt::getLowBitsSet(EltTy.getSizeInBits(), + Op->getConstantOperandVal(3)); + return DAG.getNode(ISD::VSELECT, DL, VecTy, + DAG.getConstant(Mask, VecTy, true), Op->getOperand(1), + Op->getOperand(2)); + } case Intrinsic::mips_bnz_b: case Intrinsic::mips_bnz_h: case Intrinsic::mips_bnz_w: diff --git a/test/CodeGen/Mips/msa/bitwise.ll b/test/CodeGen/Mips/msa/bitwise.ll index a606fdfb858..d0b13f6580e 100644 --- a/test/CodeGen/Mips/msa/bitwise.ll +++ b/test/CodeGen/Mips/msa/bitwise.ll @@ -1054,6 +1054,90 @@ define void @bsel_v2i64(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { ; CHECK: .size bsel_v2i64 } +define void @binsl_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { + ; CHECK: binsl_v16i8_i: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = and <16 x i8> %1, + %4 = and <16 x i8> %2, + %5 = or <16 x i8> %3, %4 + ; CHECK-DAG: binsli.b [[R2]], [[R1]], 2 + store <16 x i8> %5, <16 x i8>* %c + ; CHECK-DAG: st.b [[R2]], 0($4) + + ret void + ; CHECK: .size binsl_v16i8_i +} + +define void @binsl_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind { + ; CHECK: binsl_v8i16_i: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = and <8 x i16> %1, + %4 = and <8 x i16> %2, + %5 = or <8 x i16> %3, %4 + ; CHECK-DAG: binsli.h [[R2]], [[R1]], 2 + store <8 x i16> %5, <8 x i16>* %c + ; CHECK-DAG: st.h [[R2]], 0($4) + + ret void + ; CHECK: .size binsl_v8i16_i +} + +define void @binsl_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind { + ; CHECK: binsl_v4i32_i: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = and <4 x i32> %1, + %4 = and <4 x i32> %2, + %5 = or <4 x i32> %3, %4 + ; CHECK-DAG: binsli.w [[R2]], [[R1]], 2 + store <4 x i32> %5, <4 x i32>* %c + ; CHECK-DAG: st.w [[R2]], 0($4) + + ret void + ; CHECK: .size binsl_v4i32_i +} + +define void @binsl_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind { + ; CHECK: binsl_v2i64_i: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = and <2 x i64> %1, + %4 = and <2 x i64> %2, + %5 = or <2 x i64> %3, %4 + ; TODO: We use a particularly wide mask here to work around a legalization + ; issue. If the mask doesn't fit within a 10-bit immediate, it gets + ; legalized into a constant pool. We should add a test to cover the + ; other cases once they correctly select binsli.d. + ; CHECK-DAG: binsli.d [[R2]], [[R1]], 61 + store <2 x i64> %5, <2 x i64>* %c + ; CHECK-DAG: st.d [[R2]], 0($4) + + ret void + ; CHECK: .size binsl_v2i64_i +} + define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind { ; CHECK: binsr_v16i8_i: @@ -1068,10 +1152,9 @@ define void @binsr_v16i8_i(<16 x i8>* %c, <16 x i8>* %a, <16 x i8>* %b) nounwind i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252, i8 252> %5 = or <16 x i8> %3, %4 - ; CHECK-DAG: ldi.b [[R3:\$w[0-9]+]], 3 - ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] + ; CHECK-DAG: binsri.b [[R2]], [[R1]], 2 store <16 x i8> %5, <16 x i8>* %c - ; CHECK-DAG: st.b [[R3]], 0($4) + ; CHECK-DAG: st.b [[R2]], 0($4) ret void ; CHECK: .size binsr_v16i8_i @@ -1089,10 +1172,9 @@ define void @binsr_v8i16_i(<8 x i16>* %c, <8 x i16>* %a, <8 x i16>* %b) nounwind %4 = and <8 x i16> %2, %5 = or <8 x i16> %3, %4 - ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 3 - ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] + ; CHECK-DAG: binsri.h [[R2]], [[R1]], 2 store <8 x i16> %5, <8 x i16>* %c - ; CHECK-DAG: st.h [[R3]], 0($4) + ; CHECK-DAG: st.h [[R2]], 0($4) ret void ; CHECK: .size binsr_v8i16_i @@ -1108,10 +1190,9 @@ define void @binsr_v4i32_i(<4 x i32>* %c, <4 x i32>* %a, <4 x i32>* %b) nounwind %3 = and <4 x i32> %1, %4 = and <4 x i32> %2, %5 = or <4 x i32> %3, %4 - ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 3 - ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] + ; CHECK-DAG: binsri.w [[R2]], [[R1]], 2 store <4 x i32> %5, <4 x i32>* %c - ; CHECK-DAG: st.w [[R3]], 0($4) + ; CHECK-DAG: st.w [[R2]], 0($4) ret void ; CHECK: .size binsr_v4i32_i @@ -1127,10 +1208,9 @@ define void @binsr_v2i64_i(<2 x i64>* %c, <2 x i64>* %a, <2 x i64>* %b) nounwind %3 = and <2 x i64> %1, %4 = and <2 x i64> %2, %5 = or <2 x i64> %3, %4 - ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 3 - ; CHECK-DAG: bsel.v [[R3]], [[R2]], [[R1]] + ; CHECK-DAG: binsri.d [[R2]], [[R1]], 2 store <2 x i64> %5, <2 x i64>* %c - ; CHECK-DAG: st.d [[R3]], 0($4) + ; CHECK-DAG: st.d [[R2]], 0($4) ret void ; CHECK: .size binsr_v2i64_i diff --git a/test/CodeGen/Mips/msa/i5-b.ll b/test/CodeGen/Mips/msa/i5-b.ll index 4362625469b..14f2066ded7 100644 --- a/test/CodeGen/Mips/msa/i5-b.ll +++ b/test/CodeGen/Mips/msa/i5-b.ll @@ -79,158 +79,210 @@ declare <2 x i64> @llvm.mips.bclri.d(<2 x i64>, i32) nounwind ; CHECK: st.d ; CHECK: .size llvm_mips_bclri_d_test ; -@llvm_mips_binsli_b_ARG1 = global <16 x i8> , align 16 -@llvm_mips_binsli_b_RES = global <16 x i8> , align 16 +@llvm_mips_binsli_b_ARG1 = global <16 x i8> zeroinitializer, align 16 +@llvm_mips_binsli_b_ARG2 = global <16 x i8> , align 16 +@llvm_mips_binsli_b_RES = global <16 x i8> zeroinitializer, align 16 define void @llvm_mips_binsli_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_binsli_b_ARG1 - %1 = tail call <16 x i8> @llvm.mips.binsli.b(<16 x i8> %0, i32 7) - store <16 x i8> %1, <16 x i8>* @llvm_mips_binsli_b_RES + %1 = load <16 x i8>* @llvm_mips_binsli_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.binsli.b(<16 x i8> %0, <16 x i8> %1, i32 7) + store <16 x i8> %2, <16 x i8>* @llvm_mips_binsli_b_RES ret void } -declare <16 x i8> @llvm.mips.binsli.b(<16 x i8>, i32) nounwind +declare <16 x i8> @llvm.mips.binsli.b(<16 x i8>, <16 x i8>, i32) nounwind ; CHECK: llvm_mips_binsli_b_test: -; CHECK: ld.b -; CHECK: binsli.b -; CHECK: st.b +; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsli_b_ARG1)( +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsli_b_ARG2)( +; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: binsli.b [[R3]], [[R4]], 7 +; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsli_b_RES)( +; CHECK-DAG: st.b [[R3]], 0([[R5]]) ; CHECK: .size llvm_mips_binsli_b_test -; -@llvm_mips_binsli_h_ARG1 = global <8 x i16> , align 16 -@llvm_mips_binsli_h_RES = global <8 x i16> , align 16 + +@llvm_mips_binsli_h_ARG1 = global <8 x i16> zeroinitializer, align 16 +@llvm_mips_binsli_h_ARG2 = global <8 x i16> , align 16 +@llvm_mips_binsli_h_RES = global <8 x i16> zeroinitializer, align 16 define void @llvm_mips_binsli_h_test() nounwind { entry: %0 = load <8 x i16>* @llvm_mips_binsli_h_ARG1 - %1 = tail call <8 x i16> @llvm.mips.binsli.h(<8 x i16> %0, i32 7) - store <8 x i16> %1, <8 x i16>* @llvm_mips_binsli_h_RES + %1 = load <8 x i16>* @llvm_mips_binsli_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.binsli.h(<8 x i16> %0, <8 x i16> %1, i32 7) + store <8 x i16> %2, <8 x i16>* @llvm_mips_binsli_h_RES ret void } -declare <8 x i16> @llvm.mips.binsli.h(<8 x i16>, i32) nounwind +declare <8 x i16> @llvm.mips.binsli.h(<8 x i16>, <8 x i16>, i32) nounwind ; CHECK: llvm_mips_binsli_h_test: -; CHECK: ld.h -; CHECK: binsli.h -; CHECK: st.h +; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsli_h_ARG1)( +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsli_h_ARG2)( +; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: binsli.h [[R3]], [[R4]], 7 +; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsli_h_RES)( +; CHECK-DAG: st.h [[R3]], 0([[R5]]) ; CHECK: .size llvm_mips_binsli_h_test -; -@llvm_mips_binsli_w_ARG1 = global <4 x i32> , align 16 -@llvm_mips_binsli_w_RES = global <4 x i32> , align 16 + +@llvm_mips_binsli_w_ARG1 = global <4 x i32> zeroinitializer, align 16 +@llvm_mips_binsli_w_ARG2 = global <4 x i32> , align 16 +@llvm_mips_binsli_w_RES = global <4 x i32> zeroinitializer, align 16 define void @llvm_mips_binsli_w_test() nounwind { entry: %0 = load <4 x i32>* @llvm_mips_binsli_w_ARG1 - %1 = tail call <4 x i32> @llvm.mips.binsli.w(<4 x i32> %0, i32 7) - store <4 x i32> %1, <4 x i32>* @llvm_mips_binsli_w_RES + %1 = load <4 x i32>* @llvm_mips_binsli_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.binsli.w(<4 x i32> %0, <4 x i32> %1, i32 7) + store <4 x i32> %2, <4 x i32>* @llvm_mips_binsli_w_RES ret void } -declare <4 x i32> @llvm.mips.binsli.w(<4 x i32>, i32) nounwind +declare <4 x i32> @llvm.mips.binsli.w(<4 x i32>, <4 x i32>, i32) nounwind ; CHECK: llvm_mips_binsli_w_test: -; CHECK: ld.w -; CHECK: binsli.w -; CHECK: st.w +; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsli_w_ARG1)( +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsli_w_ARG2)( +; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: binsli.w [[R3]], [[R4]], 7 +; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsli_w_RES)( +; CHECK-DAG: st.w [[R3]], 0([[R5]]) ; CHECK: .size llvm_mips_binsli_w_test -; -@llvm_mips_binsli_d_ARG1 = global <2 x i64> , align 16 -@llvm_mips_binsli_d_RES = global <2 x i64> , align 16 + +@llvm_mips_binsli_d_ARG1 = global <2 x i64> zeroinitializer, align 16 +@llvm_mips_binsli_d_ARG2 = global <2 x i64> , align 16 +@llvm_mips_binsli_d_RES = global <2 x i64> zeroinitializer, align 16 define void @llvm_mips_binsli_d_test() nounwind { entry: %0 = load <2 x i64>* @llvm_mips_binsli_d_ARG1 - %1 = tail call <2 x i64> @llvm.mips.binsli.d(<2 x i64> %0, i32 7) - store <2 x i64> %1, <2 x i64>* @llvm_mips_binsli_d_RES + %1 = load <2 x i64>* @llvm_mips_binsli_d_ARG2 + ; TODO: We use a particularly wide mask here to work around a legalization + ; issue. If the mask doesn't fit within a 10-bit immediate, it gets + ; legalized into a constant pool. We should add a test to cover the + ; other cases once they correctly select binsli.d. + %2 = tail call <2 x i64> @llvm.mips.binsli.d(<2 x i64> %0, <2 x i64> %1, i32 61) + store <2 x i64> %2, <2 x i64>* @llvm_mips_binsli_d_RES ret void } -declare <2 x i64> @llvm.mips.binsli.d(<2 x i64>, i32) nounwind +declare <2 x i64> @llvm.mips.binsli.d(<2 x i64>, <2 x i64>, i32) nounwind ; CHECK: llvm_mips_binsli_d_test: -; CHECK: ld.d -; CHECK: binsli.d -; CHECK: st.d +; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsli_d_ARG1)( +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsli_d_ARG2)( +; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: binsli.d [[R3]], [[R4]], 61 +; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsli_d_RES)( +; CHECK-DAG: st.d [[R3]], 0([[R5]]) ; CHECK: .size llvm_mips_binsli_d_test -; -@llvm_mips_binsri_b_ARG1 = global <16 x i8> , align 16 -@llvm_mips_binsri_b_RES = global <16 x i8> , align 16 + +@llvm_mips_binsri_b_ARG1 = global <16 x i8> zeroinitializer, align 16 +@llvm_mips_binsri_b_ARG2 = global <16 x i8> , align 16 +@llvm_mips_binsri_b_RES = global <16 x i8> zeroinitializer, align 16 define void @llvm_mips_binsri_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_binsri_b_ARG1 - %1 = tail call <16 x i8> @llvm.mips.binsri.b(<16 x i8> %0, i32 7) - store <16 x i8> %1, <16 x i8>* @llvm_mips_binsri_b_RES + %1 = load <16 x i8>* @llvm_mips_binsri_b_ARG2 + %2 = tail call <16 x i8> @llvm.mips.binsri.b(<16 x i8> %0, <16 x i8> %1, i32 7) + store <16 x i8> %2, <16 x i8>* @llvm_mips_binsri_b_RES ret void } -declare <16 x i8> @llvm.mips.binsri.b(<16 x i8>, i32) nounwind +declare <16 x i8> @llvm.mips.binsri.b(<16 x i8>, <16 x i8>, i32) nounwind ; CHECK: llvm_mips_binsri_b_test: -; CHECK: ld.b -; CHECK: binsri.b -; CHECK: st.b +; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsri_b_ARG1)( +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsri_b_ARG2)( +; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.b [[R4:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: binsri.b [[R3]], [[R4]], 7 +; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsri_b_RES)( +; CHECK-DAG: st.b [[R3]], 0([[R5]]) ; CHECK: .size llvm_mips_binsri_b_test -; -@llvm_mips_binsri_h_ARG1 = global <8 x i16> , align 16 -@llvm_mips_binsri_h_RES = global <8 x i16> , align 16 + +@llvm_mips_binsri_h_ARG1 = global <8 x i16> zeroinitializer, align 16 +@llvm_mips_binsri_h_ARG2 = global <8 x i16> , align 16 +@llvm_mips_binsri_h_RES = global <8 x i16> zeroinitializer, align 16 define void @llvm_mips_binsri_h_test() nounwind { entry: %0 = load <8 x i16>* @llvm_mips_binsri_h_ARG1 - %1 = tail call <8 x i16> @llvm.mips.binsri.h(<8 x i16> %0, i32 7) - store <8 x i16> %1, <8 x i16>* @llvm_mips_binsri_h_RES + %1 = load <8 x i16>* @llvm_mips_binsri_h_ARG2 + %2 = tail call <8 x i16> @llvm.mips.binsri.h(<8 x i16> %0, <8 x i16> %1, i32 7) + store <8 x i16> %2, <8 x i16>* @llvm_mips_binsri_h_RES ret void } -declare <8 x i16> @llvm.mips.binsri.h(<8 x i16>, i32) nounwind +declare <8 x i16> @llvm.mips.binsri.h(<8 x i16>, <8 x i16>, i32) nounwind ; CHECK: llvm_mips_binsri_h_test: -; CHECK: ld.h -; CHECK: binsri.h -; CHECK: st.h +; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsri_h_ARG1)( +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsri_h_ARG2)( +; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.h [[R4:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: binsri.h [[R3]], [[R4]], 7 +; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsri_h_RES)( +; CHECK-DAG: st.h [[R3]], 0([[R5]]) ; CHECK: .size llvm_mips_binsri_h_test -; -@llvm_mips_binsri_w_ARG1 = global <4 x i32> , align 16 -@llvm_mips_binsri_w_RES = global <4 x i32> , align 16 + +@llvm_mips_binsri_w_ARG1 = global <4 x i32> zeroinitializer, align 16 +@llvm_mips_binsri_w_ARG2 = global <4 x i32> , align 16 +@llvm_mips_binsri_w_RES = global <4 x i32> zeroinitializer, align 16 define void @llvm_mips_binsri_w_test() nounwind { entry: %0 = load <4 x i32>* @llvm_mips_binsri_w_ARG1 - %1 = tail call <4 x i32> @llvm.mips.binsri.w(<4 x i32> %0, i32 7) - store <4 x i32> %1, <4 x i32>* @llvm_mips_binsri_w_RES + %1 = load <4 x i32>* @llvm_mips_binsri_w_ARG2 + %2 = tail call <4 x i32> @llvm.mips.binsri.w(<4 x i32> %0, <4 x i32> %1, i32 7) + store <4 x i32> %2, <4 x i32>* @llvm_mips_binsri_w_RES ret void } -declare <4 x i32> @llvm.mips.binsri.w(<4 x i32>, i32) nounwind +declare <4 x i32> @llvm.mips.binsri.w(<4 x i32>, <4 x i32>, i32) nounwind ; CHECK: llvm_mips_binsri_w_test: -; CHECK: ld.w -; CHECK: binsri.w -; CHECK: st.w +; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsri_w_ARG1)( +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsri_w_ARG2)( +; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.w [[R4:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: binsri.w [[R3]], [[R4]], 7 +; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsri_w_RES)( +; CHECK-DAG: st.w [[R3]], 0([[R5]]) ; CHECK: .size llvm_mips_binsri_w_test -; -@llvm_mips_binsri_d_ARG1 = global <2 x i64> , align 16 -@llvm_mips_binsri_d_RES = global <2 x i64> , align 16 + +@llvm_mips_binsri_d_ARG1 = global <2 x i64> zeroinitializer, align 16 +@llvm_mips_binsri_d_ARG2 = global <2 x i64> , align 16 +@llvm_mips_binsri_d_RES = global <2 x i64> zeroinitializer, align 16 define void @llvm_mips_binsri_d_test() nounwind { entry: %0 = load <2 x i64>* @llvm_mips_binsri_d_ARG1 - %1 = tail call <2 x i64> @llvm.mips.binsri.d(<2 x i64> %0, i32 7) - store <2 x i64> %1, <2 x i64>* @llvm_mips_binsri_d_RES + %1 = load <2 x i64>* @llvm_mips_binsri_d_ARG2 + %2 = tail call <2 x i64> @llvm.mips.binsri.d(<2 x i64> %0, <2 x i64> %1, i32 7) + store <2 x i64> %2, <2 x i64>* @llvm_mips_binsri_d_RES ret void } -declare <2 x i64> @llvm.mips.binsri.d(<2 x i64>, i32) nounwind +declare <2 x i64> @llvm.mips.binsri.d(<2 x i64>, <2 x i64>, i32) nounwind ; CHECK: llvm_mips_binsri_d_test: -; CHECK: ld.d -; CHECK: binsri.d -; CHECK: st.d +; CHECK-DAG: lw [[R1:\$[0-9]+]], %got(llvm_mips_binsri_d_ARG1)( +; CHECK-DAG: lw [[R2:\$[0-9]+]], %got(llvm_mips_binsri_d_ARG2)( +; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0([[R1]]) +; CHECK-DAG: ld.d [[R4:\$w[0-9]+]], 0([[R2]]) +; CHECK-DAG: binsri.d [[R3]], [[R4]], 7 +; CHECK-DAG: lw [[R5:\$[0-9]+]], %got(llvm_mips_binsri_d_RES)( +; CHECK-DAG: st.d [[R3]], 0([[R5]]) ; CHECK: .size llvm_mips_binsri_d_test -; + @llvm_mips_bnegi_b_ARG1 = global <16 x i8> , align 16 @llvm_mips_bnegi_b_RES = global <16 x i8> , align 16