From 38a10ff063971c2f7f7384cceba3253bca32e27a Mon Sep 17 00:00:00 2001 From: Daniel Sanders Date: Tue, 24 Sep 2013 12:04:44 +0000 Subject: [PATCH] [mips][msa] Added support for matching bsel and bseli from normal IR (i.e. not intrinsics) This required correcting the definition of the bsel and bseli intrinsics. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@191290 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/IR/IntrinsicsMips.td | 12 +- lib/Target/Mips/MipsMSAInstrInfo.td | 35 ++- lib/Target/Mips/MipsSEISelLowering.cpp | 11 + test/CodeGen/Mips/msa/compare.ll | 326 +++++++++++++++++++++++++ test/CodeGen/Mips/msa/compare_float.ll | 80 ++++++ test/CodeGen/Mips/msa/i8.ll | 4 +- test/CodeGen/Mips/msa/vec.ll | 10 +- 7 files changed, 461 insertions(+), 17 deletions(-) diff --git a/include/llvm/IR/IntrinsicsMips.td b/include/llvm/IR/IntrinsicsMips.td index 055a8cf9d2c..abca235681d 100644 --- a/include/llvm/IR/IntrinsicsMips.td +++ b/include/llvm/IR/IntrinsicsMips.td @@ -644,16 +644,12 @@ def int_mips_bnz_v : GCCBuiltin<"__builtin_msa_bnz_v">, Intrinsic<[llvm_i32_ty], [llvm_v16i8_ty], [IntrNoMem]>; def int_mips_bsel_v : GCCBuiltin<"__builtin_msa_bsel_v">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], + [IntrNoMem]>; def int_mips_bseli_b : GCCBuiltin<"__builtin_msa_bseli_b">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_bseli_h : GCCBuiltin<"__builtin_msa_bseli_h">, - Intrinsic<[llvm_v8i16_ty], [llvm_v8i16_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_bseli_w : GCCBuiltin<"__builtin_msa_bseli_w">, - Intrinsic<[llvm_v4i32_ty], [llvm_v4i32_ty, llvm_i32_ty], [IntrNoMem]>; -def int_mips_bseli_d : GCCBuiltin<"__builtin_msa_bseli_d">, - Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], + [IntrNoMem]>; def int_mips_bset_b : GCCBuiltin<"__builtin_msa_bset_b">, Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, llvm_v16i8_ty], [IntrNoMem]>; diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index fb88a3b3472..48bf8a27715 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -1282,9 +1282,26 @@ class BNZ_D_DESC : MSA_CBRANCH_DESC_BASE<"bnz.d", MSA128D>; class BNZ_V_DESC : MSA_CBRANCH_DESC_BASE<"bnz.v", MSA128B>; -class BSEL_V_DESC : MSA_VEC_DESC_BASE<"bsel.v", int_mips_bsel_v, MSA128B>; +class BSEL_V_DESC { + dag OutOperandList = (outs MSA128B:$wd); + dag InOperandList = (ins MSA128B:$wd_in, MSA128B:$ws, MSA128B:$wt); + string AsmString = "bsel.v\t$wd, $ws, $wt"; + list Pattern = [(set MSA128B:$wd, (vselect MSA128B:$wd_in, MSA128B:$ws, + MSA128B:$wt))]; + InstrItinClass Itinerary = NoItinerary; + string Constraints = "$wd = $wd_in"; +} -class BSELI_B_DESC : MSA_I8_DESC_BASE<"bseli.b", int_mips_bseli_b, MSA128B>; +class BSELI_B_DESC { + dag OutOperandList = (outs MSA128B:$wd); + dag InOperandList = (ins MSA128B:$wd_in, MSA128B:$ws, uimm8:$u8); + string AsmString = "bseli.b\t$wd, $ws, $u8"; + list Pattern = [(set MSA128B:$wd, (vselect MSA128B:$wd_in, + MSA128B:$ws, + (vsplati8 immZExt8:$u8)))]; + InstrItinClass Itinerary = NoItinerary; + string Constraints = "$wd = $wd_in"; +} class BSET_B_DESC : MSA_3R_DESC_BASE<"bset.b", int_mips_bset_b, MSA128B>; class BSET_H_DESC : MSA_3R_DESC_BASE<"bset.h", int_mips_bset_h, MSA128H>; @@ -2244,6 +2261,20 @@ def BNZ_V : BNZ_V_ENC, BNZ_V_DESC; def BSEL_V : BSEL_V_ENC, BSEL_V_DESC; +class MSA_BSEL_PSEUDO_BASE : + MipsPseudo<(outs RC:$wd), (ins RC:$wd_in, RC:$ws, RC:$wt), + [(set RC:$wd, (Ty (vselect RC:$wd_in, RC:$ws, RC:$wt)))]>, + PseudoInstExpansion<(BSEL_V MSA128B:$wd, MSA128B:$wd_in, MSA128B:$ws, + MSA128B:$wt)> { + let Constraints = "$wd_in = $wd"; +} + +def BSEL_H_PSEUDO : MSA_BSEL_PSEUDO_BASE; +def BSEL_W_PSEUDO : MSA_BSEL_PSEUDO_BASE; +def BSEL_D_PSEUDO : MSA_BSEL_PSEUDO_BASE; +def BSEL_FW_PSEUDO : MSA_BSEL_PSEUDO_BASE; +def BSEL_FD_PSEUDO : MSA_BSEL_PSEUDO_BASE; + def BSELI_B : BSELI_B_ENC, BSELI_B_DESC; def BSET_B : BSET_B_ENC, BSET_B_DESC; diff --git a/lib/Target/Mips/MipsSEISelLowering.cpp b/lib/Target/Mips/MipsSEISelLowering.cpp index 260847cd7e6..ffc5777cec8 100644 --- a/lib/Target/Mips/MipsSEISelLowering.cpp +++ b/lib/Target/Mips/MipsSEISelLowering.cpp @@ -93,6 +93,7 @@ MipsSETargetLowering::MipsSETargetLowering(MipsTargetMachine &TM) setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::VSELECT); setTargetDAGCombine(ISD::XOR); } @@ -179,6 +180,7 @@ addMSAIntType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::SRL, Ty, Legal); setOperationAction(ISD::SUB, Ty, Legal); setOperationAction(ISD::UDIV, Ty, Legal); + setOperationAction(ISD::VSELECT, Ty, Legal); setOperationAction(ISD::XOR, Ty, Legal); setOperationAction(ISD::SETCC, Ty, Legal); @@ -211,6 +213,7 @@ addMSAFloatType(MVT::SimpleValueType Ty, const TargetRegisterClass *RC) { setOperationAction(ISD::FRINT, Ty, Legal); setOperationAction(ISD::FSQRT, Ty, Legal); setOperationAction(ISD::FSUB, Ty, Legal); + setOperationAction(ISD::VSELECT, Ty, Legal); setOperationAction(ISD::SETCC, Ty, Legal); setCondCodeAction(ISD::SETOGE, Ty, Expand); @@ -1117,6 +1120,14 @@ SDValue MipsSETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, return lowerMSABranchIntr(Op, DAG, MipsISD::VALL_NONZERO); case Intrinsic::mips_bnz_v: return lowerMSABranchIntr(Op, DAG, MipsISD::VANY_NONZERO); + case Intrinsic::mips_bsel_v: + return DAG.getNode(ISD::VSELECT, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), + Op->getOperand(3)); + case Intrinsic::mips_bseli_b: + return DAG.getNode(ISD::VSELECT, SDLoc(Op), Op->getValueType(0), + Op->getOperand(1), Op->getOperand(2), + lowerMSASplatImm(Op, 3, DAG)); case Intrinsic::mips_bz_b: case Intrinsic::mips_bz_h: case Intrinsic::mips_bz_w: diff --git a/test/CodeGen/Mips/msa/compare.ll b/test/CodeGen/Mips/msa/compare.ll index 34e619b22a1..fc83f44cd40 100644 --- a/test/CodeGen/Mips/msa/compare.ll +++ b/test/CodeGen/Mips/msa/compare.ll @@ -639,3 +639,329 @@ define void @clti_u_v2i64(<2 x i64>* %c, <2 x i64>* %a) nounwind { ret void ; CHECK: .size clti_u_v2i64 } + +define void @bsel_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b, + <16 x i8>* %c) nounwind { + ; CHECK: bsel_s_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = load <16 x i8>* %c + ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7) + %4 = icmp sgt <16 x i8> %1, %2 + ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <16 x i8> %5, <16 x i8>* %d + ; CHECK-DAG: st.b [[R4]], 0($4) + + ret void + ; CHECK: .size bsel_s_v16i8 +} + +define void @bsel_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b, + <8 x i16>* %c) nounwind { + ; CHECK: bsel_s_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = load <8 x i16>* %c + ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7) + %4 = icmp sgt <8 x i16> %1, %2 + ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %5 = select <8 x i1> %4, <8 x i16> %1, <8 x i16> %3 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <8 x i16> %5, <8 x i16>* %d + ; CHECK-DAG: st.h [[R4]], 0($4) + + ret void + ; CHECK: .size bsel_s_v8i16 +} + +define void @bsel_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b, + <4 x i32>* %c) nounwind { + ; CHECK: bsel_s_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = load <4 x i32>* %c + ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) + %4 = icmp sgt <4 x i32> %1, %2 + ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %5 = select <4 x i1> %4, <4 x i32> %1, <4 x i32> %3 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <4 x i32> %5, <4 x i32>* %d + ; CHECK-DAG: st.w [[R4]], 0($4) + + ret void + ; CHECK: .size bsel_s_v4i32 +} + +define void @bsel_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b, + <2 x i64>* %c) nounwind { + ; CHECK: bsel_s_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = load <2 x i64>* %c + ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) + %4 = icmp sgt <2 x i64> %1, %2 + ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %5 = select <2 x i1> %4, <2 x i64> %1, <2 x i64> %3 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <2 x i64> %5, <2 x i64>* %d + ; CHECK-DAG: st.d [[R4]], 0($4) + + ret void + ; CHECK: .size bsel_s_v2i64 +} + +define void @bsel_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b, + <16 x i8>* %c) nounwind { + ; CHECK: bsel_u_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = load <16 x i8>* %c + ; CHECK-DAG: ld.b [[R3:\$w[0-9]+]], 0($7) + %4 = icmp ugt <16 x i8> %1, %2 + ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %5 = select <16 x i1> %4, <16 x i8> %1, <16 x i8> %3 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <16 x i8> %5, <16 x i8>* %d + ; CHECK-DAG: st.b [[R4]], 0($4) + + ret void + ; CHECK: .size bsel_u_v16i8 +} + +define void @bsel_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b, + <8 x i16>* %c) nounwind { + ; CHECK: bsel_u_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = load <8 x i16>* %c + ; CHECK-DAG: ld.h [[R3:\$w[0-9]+]], 0($7) + %4 = icmp ugt <8 x i16> %1, %2 + ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %5 = select <8 x i1> %4, <8 x i16> %1, <8 x i16> %3 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <8 x i16> %5, <8 x i16>* %d + ; CHECK-DAG: st.h [[R4]], 0($4) + + ret void + ; CHECK: .size bsel_u_v8i16 +} + +define void @bsel_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b, + <4 x i32>* %c) nounwind { + ; CHECK: bsel_u_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = load <4 x i32>* %c + ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) + %4 = icmp ugt <4 x i32> %1, %2 + ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %5 = select <4 x i1> %4, <4 x i32> %1, <4 x i32> %3 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <4 x i32> %5, <4 x i32>* %d + ; CHECK-DAG: st.w [[R4]], 0($4) + + ret void + ; CHECK: .size bsel_u_v4i32 +} + +define void @bsel_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b, + <2 x i64>* %c) nounwind { + ; CHECK: bsel_u_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = load <2 x i64>* %c + ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) + %4 = icmp ugt <2 x i64> %1, %2 + ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %5 = select <2 x i1> %4, <2 x i64> %1, <2 x i64> %3 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <2 x i64> %5, <2 x i64>* %d + ; CHECK-DAG: st.d [[R4]], 0($4) + + ret void + ; CHECK: .size bsel_u_v2i64 +} + +define void @bseli_s_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b, + <16 x i8>* %c) nounwind { + ; CHECK: bseli_s_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sgt <16 x i8> %1, %2 + ; CHECK-DAG: clt_s.b [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> + ; CHECK-DAG: bseli.b [[R4]], [[R1]], 1 + store <16 x i8> %4, <16 x i8>* %d + ; CHECK-DAG: st.b [[R4]], 0($4) + + ret void + ; CHECK: .size bseli_s_v16i8 +} + +define void @bseli_s_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b, + <8 x i16>* %c) nounwind { + ; CHECK: bseli_s_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sgt <8 x i16> %1, %2 + ; CHECK-DAG: clt_s.h [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> + ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <8 x i16> %4, <8 x i16>* %d + ; CHECK-DAG: st.h [[R4]], 0($4) + + ret void + ; CHECK: .size bseli_s_v8i16 +} + +define void @bseli_s_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b, + <4 x i32>* %c) nounwind { + ; CHECK: bseli_s_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sgt <4 x i32> %1, %2 + ; CHECK-DAG: clt_s.w [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> + ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <4 x i32> %4, <4 x i32>* %d + ; CHECK-DAG: st.w [[R4]], 0($4) + + ret void + ; CHECK: .size bseli_s_v4i32 +} + +define void @bseli_s_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b, + <2 x i64>* %c) nounwind { + ; CHECK: bseli_s_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = icmp sgt <2 x i64> %1, %2 + ; CHECK-DAG: clt_s.d [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> + ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <2 x i64> %4, <2 x i64>* %d + ; CHECK-DAG: st.d [[R4]], 0($4) + + ret void + ; CHECK: .size bseli_s_v2i64 +} + +define void @bseli_u_v16i8(<16 x i8>* %d, <16 x i8>* %a, <16 x i8>* %b, + <16 x i8>* %c) nounwind { + ; CHECK: bseli_u_v16i8: + + %1 = load <16 x i8>* %a + ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) + %2 = load <16 x i8>* %b + ; CHECK-DAG: ld.b [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ugt <16 x i8> %1, %2 + ; CHECK-DAG: clt_u.b [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %4 = select <16 x i1> %3, <16 x i8> %1, <16 x i8> + ; CHECK-DAG: bseli.b [[R4]], [[R1]], 1 + store <16 x i8> %4, <16 x i8>* %d + ; CHECK-DAG: st.b [[R4]], 0($4) + + ret void + ; CHECK: .size bseli_u_v16i8 +} + +define void @bseli_u_v8i16(<8 x i16>* %d, <8 x i16>* %a, <8 x i16>* %b, + <8 x i16>* %c) nounwind { + ; CHECK: bseli_u_v8i16: + + %1 = load <8 x i16>* %a + ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) + %2 = load <8 x i16>* %b + ; CHECK-DAG: ld.h [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ugt <8 x i16> %1, %2 + ; CHECK-DAG: clt_u.h [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %4 = select <8 x i1> %3, <8 x i16> %1, <8 x i16> + ; CHECK-DAG: ldi.h [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <8 x i16> %4, <8 x i16>* %d + ; CHECK-DAG: st.h [[R4]], 0($4) + + ret void + ; CHECK: .size bseli_u_v8i16 +} + +define void @bseli_u_v4i32(<4 x i32>* %d, <4 x i32>* %a, <4 x i32>* %b, + <4 x i32>* %c) nounwind { + ; CHECK: bseli_u_v4i32: + + %1 = load <4 x i32>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x i32>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ugt <4 x i32> %1, %2 + ; CHECK-DAG: clt_u.w [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %4 = select <4 x i1> %3, <4 x i32> %1, <4 x i32> + ; CHECK-DAG: ldi.w [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <4 x i32> %4, <4 x i32>* %d + ; CHECK-DAG: st.w [[R4]], 0($4) + + ret void + ; CHECK: .size bseli_u_v4i32 +} + +define void @bseli_u_v2i64(<2 x i64>* %d, <2 x i64>* %a, <2 x i64>* %b, + <2 x i64>* %c) nounwind { + ; CHECK: bseli_u_v2i64: + + %1 = load <2 x i64>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x i64>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = icmp ugt <2 x i64> %1, %2 + ; CHECK-DAG: clt_u.d [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %4 = select <2 x i1> %3, <2 x i64> %1, <2 x i64> + ; CHECK-DAG: ldi.d [[R3:\$w[0-9]+]], 1 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <2 x i64> %4, <2 x i64>* %d + ; CHECK-DAG: st.d [[R4]], 0($4) + + ret void + ; CHECK: .size bseli_u_v2i64 +} diff --git a/test/CodeGen/Mips/msa/compare_float.ll b/test/CodeGen/Mips/msa/compare_float.ll index b75f839d38f..106653f47de 100644 --- a/test/CodeGen/Mips/msa/compare_float.ll +++ b/test/CodeGen/Mips/msa/compare_float.ll @@ -516,3 +516,83 @@ define void @true_v2f64(<2 x i64>* %c, <2 x double>* %a, <2 x double>* %b) nounw ; CHECK-DAG: st.d [[R4]], 0($4) ; CHECK: .size true_v2f64 } + +define void @bsel_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, + <4 x float>* %c) nounwind { + ; CHECK: bsel_v4f32: + + %1 = load <4 x float>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x float>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = load <4 x float>* %c + ; CHECK-DAG: ld.w [[R3:\$w[0-9]+]], 0($7) + %4 = fcmp ogt <4 x float> %1, %2 + ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %5 = select <4 x i1> %4, <4 x float> %1, <4 x float> %3 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <4 x float> %5, <4 x float>* %d + ; CHECK-DAG: st.w [[R4]], 0($4) + + ret void + ; CHECK: .size bsel_v4f32 +} + +define void @bsel_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, + <2 x double>* %c) nounwind { + ; CHECK: bsel_v2f64: + + %1 = load <2 x double>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x double>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = load <2 x double>* %c + ; CHECK-DAG: ld.d [[R3:\$w[0-9]+]], 0($7) + %4 = fcmp ogt <2 x double> %1, %2 + ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %5 = select <2 x i1> %4, <2 x double> %1, <2 x double> %3 + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3]] + store <2 x double> %5, <2 x double>* %d + ; CHECK-DAG: st.d [[R4]], 0($4) + + ret void + ; CHECK: .size bsel_v2f64 +} + +define void @bseli_v4f32(<4 x float>* %d, <4 x float>* %a, <4 x float>* %b, + <4 x float>* %c) nounwind { + ; CHECK: bseli_v4f32: + + %1 = load <4 x float>* %a + ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) + %2 = load <4 x float>* %b + ; CHECK-DAG: ld.w [[R2:\$w[0-9]+]], 0($6) + %3 = fcmp ogt <4 x float> %1, %2 + ; CHECK-DAG: fclt.w [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %4 = select <4 x i1> %3, <4 x float> %1, <4 x float> zeroinitializer + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3:\$w[0-9]+]] + store <4 x float> %4, <4 x float>* %d + ; CHECK-DAG: st.w [[R4]], 0($4) + + ret void + ; CHECK: .size bseli_v4f32 +} + +define void @bseli_v2f64(<2 x double>* %d, <2 x double>* %a, <2 x double>* %b, + <2 x double>* %c) nounwind { + ; CHECK: bseli_v2f64: + + %1 = load <2 x double>* %a + ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) + %2 = load <2 x double>* %b + ; CHECK-DAG: ld.d [[R2:\$w[0-9]+]], 0($6) + %3 = fcmp ogt <2 x double> %1, %2 + ; CHECK-DAG: fclt.d [[R4:\$w[0-9]+]], [[R2]], [[R1]] + %4 = select <2 x i1> %3, <2 x double> %1, <2 x double> zeroinitializer + ; CHECK-DAG: bsel.v [[R4]], [[R1]], [[R3:\$w[0-9]+]] + store <2 x double> %4, <2 x double>* %d + ; CHECK-DAG: st.d [[R4]], 0($4) + + ret void + ; CHECK: .size bseli_v2f64 +} diff --git a/test/CodeGen/Mips/msa/i8.ll b/test/CodeGen/Mips/msa/i8.ll index e629b046624..ad8d0a44f9e 100644 --- a/test/CodeGen/Mips/msa/i8.ll +++ b/test/CodeGen/Mips/msa/i8.ll @@ -65,12 +65,12 @@ declare <16 x i8> @llvm.mips.bmzi.b(<16 x i8>, i32) nounwind define void @llvm_mips_bseli_b_test() nounwind { entry: %0 = load <16 x i8>* @llvm_mips_bseli_b_ARG1 - %1 = tail call <16 x i8> @llvm.mips.bseli.b(<16 x i8> %0, i32 25) + %1 = tail call <16 x i8> @llvm.mips.bseli.b(<16 x i8> %0, <16 x i8> %0, i32 25) store <16 x i8> %1, <16 x i8>* @llvm_mips_bseli_b_RES ret void } -declare <16 x i8> @llvm.mips.bseli.b(<16 x i8>, i32) nounwind +declare <16 x i8> @llvm.mips.bseli.b(<16 x i8>, <16 x i8>, i32) nounwind ; CHECK: llvm_mips_bseli_b_test: ; CHECK: ld.b diff --git a/test/CodeGen/Mips/msa/vec.ll b/test/CodeGen/Mips/msa/vec.ll index 7ad640b7797..ee13493d7fd 100644 --- a/test/CodeGen/Mips/msa/vec.ll +++ b/test/CodeGen/Mips/msa/vec.ll @@ -355,7 +355,7 @@ entry: %1 = load <16 x i8>* @llvm_mips_bsel_v_b_ARG2 %2 = bitcast <16 x i8> %0 to <16 x i8> %3 = bitcast <16 x i8> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %3) + %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %2, <16 x i8> %3) %5 = bitcast <16 x i8> %4 to <16 x i8> store <16 x i8> %5, <16 x i8>* @llvm_mips_bsel_v_b_RES ret void @@ -378,7 +378,7 @@ entry: %1 = load <8 x i16>* @llvm_mips_bsel_v_h_ARG2 %2 = bitcast <8 x i16> %0 to <16 x i8> %3 = bitcast <8 x i16> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %3) + %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %2, <16 x i8> %3) %5 = bitcast <16 x i8> %4 to <8 x i16> store <8 x i16> %5, <8 x i16>* @llvm_mips_bsel_v_h_RES ret void @@ -401,7 +401,7 @@ entry: %1 = load <4 x i32>* @llvm_mips_bsel_v_w_ARG2 %2 = bitcast <4 x i32> %0 to <16 x i8> %3 = bitcast <4 x i32> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %3) + %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %2, <16 x i8> %3) %5 = bitcast <16 x i8> %4 to <4 x i32> store <4 x i32> %5, <4 x i32>* @llvm_mips_bsel_v_w_RES ret void @@ -424,7 +424,7 @@ entry: %1 = load <2 x i64>* @llvm_mips_bsel_v_d_ARG2 %2 = bitcast <2 x i64> %0 to <16 x i8> %3 = bitcast <2 x i64> %1 to <16 x i8> - %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %3) + %4 = tail call <16 x i8> @llvm.mips.bsel.v(<16 x i8> %2, <16 x i8> %2, <16 x i8> %3) %5 = bitcast <16 x i8> %4 to <2 x i64> store <2 x i64> %5, <2 x i64>* @llvm_mips_bsel_v_d_RES ret void @@ -848,7 +848,7 @@ entry: declare <16 x i8> @llvm.mips.and.v(<16 x i8>, <16 x i8>) nounwind declare <16 x i8> @llvm.mips.bmnz.v(<16 x i8>, <16 x i8>) nounwind declare <16 x i8> @llvm.mips.bmz.v(<16 x i8>, <16 x i8>) nounwind -declare <16 x i8> @llvm.mips.bsel.v(<16 x i8>, <16 x i8>) nounwind +declare <16 x i8> @llvm.mips.bsel.v(<16 x i8>, <16 x i8>, <16 x i8>) nounwind declare <16 x i8> @llvm.mips.nor.v(<16 x i8>, <16 x i8>) nounwind declare <16 x i8> @llvm.mips.or.v(<16 x i8>, <16 x i8>) nounwind declare <16 x i8> @llvm.mips.xor.v(<16 x i8>, <16 x i8>) nounwind