diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6195441cfc0..254fea67fc4 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -623,6 +623,88 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { } } +// getVCmpInst: return the vector compare instruction for the specified +// vector type and condition code. Since this is for altivec specific code, +// only support the altivec types (v16i8, v8i16, v4i32, and v4f32). +static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) { + switch (CC) { + case ISD::SETEQ: + case ISD::SETUEQ: + case ISD::SETNE: + case ISD::SETUNE: + if (VecVT == MVT::v16i8) + return PPC::VCMPEQUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPEQUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPEQUW; + // v4f32 != v4f32 could be translate to unordered not equal + else if (VecVT == MVT::v4f32) + return PPC::VCMPEQFP; + break; + case ISD::SETLT: + case ISD::SETGT: + case ISD::SETLE: + case ISD::SETGE: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTSB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTSH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTSW; + else if (VecVT == MVT::v4f32) + return PPC::VCMPGTFP; + break; + case ISD::SETULT: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULE: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTUW; + break; + case ISD::SETOEQ: + if (VecVT == MVT::v4f32) + return PPC::VCMPEQFP; + break; + case ISD::SETOLT: + case ISD::SETOGT: + case ISD::SETOLE: + if (VecVT == MVT::v4f32) + return PPC::VCMPGTFP; + break; + case ISD::SETOGE: + if (VecVT == MVT::v4f32) + return PPC::VCMPGEFP; + break; + default: + break; + } + llvm_unreachable("Invalid integer vector compare condition"); +} + +// getVCmpEQInst: return the equal compare instruction for the specified vector +// type. Since this is for altivec specific code, only support the altivec +// types (v16i8, v8i16, v4i32, and v4f32). +static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) { + switch (VecVT) { + case MVT::v16i8: + return PPC::VCMPEQUB; + case MVT::v8i16: + return PPC::VCMPEQUH; + case MVT::v4i32: + return PPC::VCMPEQUW; + case MVT::v4f32: + return PPC::VCMPEQFP; + default: + llvm_unreachable("Invalid integer vector compare condition"); + } +} + + SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { DebugLoc dl = N->getDebugLoc(); unsigned Imm; @@ -706,20 +788,58 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - // Altivec Vector compare instructions do not set any CR register by default + // Altivec Vector compare instructions do not set any CR register by default and + // vector compare operations return the same type as the operands. if (LHS.getValueType().isVector()) { - unsigned int Opc; - if (LHS.getValueType() == MVT::v16i8) - Opc = PPC::VCMPEQUB; - else if (LHS.getValueType() == MVT::v4i32) - Opc = PPC::VCMPEQUW; - else if (LHS.getValueType() == MVT::v8i16) - Opc = PPC::VCMPEQUH; - else if (LHS.getValueType() == MVT::v4f32) - Opc = PPC::VCMPEQFP; - else - llvm_unreachable("Invalid vector compare type: should be expanded by legalize"); - return CurDAG->SelectNodeTo(N, Opc, LHS.getValueType(), LHS, RHS); + EVT VecVT = LHS.getValueType(); + MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy; + unsigned int VCmpInst = getVCmpInst(VT, CC); + + switch (CC) { + case ISD::SETEQ: + case ISD::SETOEQ: + case ISD::SETUEQ: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + case ISD::SETNE: + case ISD::SETONE: + case ISD::SETUNE: { + SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VNOR, VecVT, VCmp, VCmp); + } + case ISD::SETLT: + case ISD::SETOLT: + case ISD::SETULT: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS); + case ISD::SETGT: + case ISD::SETOGT: + case ISD::SETUGT: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + case ISD::SETGE: + case ISD::SETOGE: + case ISD::SETUGE: { + // Small optimization: Altivec provides a 'Vector Compare Greater Than + // or Equal To' instruction (vcmpgefp), so in this case there is no + // need for extra logic for the equal compare. + if (VecVT.getSimpleVT().isFloatingPoint()) { + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + } else { + SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); + unsigned int VCmpEQInst = getVCmpEQInst(VT); + SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ); + } + } + case ISD::SETLE: + case ISD::SETOLE: + case ISD::SETULE: { + SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0); + unsigned int VCmpEQInst = getVCmpEQInst(VT); + SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ); + } + default: + llvm_unreachable("Invalid vector compare type: should be expanded by legalize"); + } } bool Inv; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f27791013e0..8754b2623ce 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -396,6 +396,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + + // Altivec does not contain unordered floating-point compare instructions + setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); } if (Subtarget->has64BitSupport()) { diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll index b2b59db8f18..3180f464d12 100644 --- a/test/CodeGen/PowerPC/vec_cmp.ll +++ b/test/CodeGen/PowerPC/vec_cmp.ll @@ -1,6 +1,9 @@ -; RUN: llc -mattr=+altivec < %s | FileCheck %s +; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s -; Check vector comparisons using altivec. +; Check vector comparisons using altivec. For non native types, just basic +; comparison instruction check is done. For altivec supported type (16i8, +; 8i16, 4i32, and 4f32) all the comparisons operators (==, !=, >, >=, <, <=) +; are checked. target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" @@ -33,13 +36,105 @@ define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone { ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -define <16 x i8> @v16si8_cmp(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +; Adicional tests for v16i8 since it is a altivec native type + +define <16 x i8> @v16si8_cmp_eq(<16 x i8> %x, <16 x i8> %y) nounwind readnone { %cmp = icmp eq <16 x i8> %x, %y %sext = sext <16 x i1> %cmp to <16 x i8> ret <16 x i8> %sext } -; CHECK: v16si8_cmp: -; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: v16si8_cmp_eq: +; CHECK: vcmpequb 2, 2, 3 + +define <16 x i8> @v16si8_cmp_ne(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ne <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_ne: +; CHECK: vcmpequb [[RET:[0-9]+]], 2, 3 +; CHECK-NOR: vnor 2, [[RET]], [[RET]] + +define <16 x i8> @v16si8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp sle <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_le: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsb [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <16 x i8> @v16ui8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ule <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_le: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtub [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <16 x i8> @v16si8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp slt <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_lt: +; CHECK: vcmpgtsb 2, 3, 2 + +define <16 x i8> @v16ui8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ult <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_lt: +; CHECK: vcmpgtub 2, 3, 2 + +define <16 x i8> @v16si8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp sgt <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_gt: +; CHECK: vcmpgtsb 2, 2, 3 + +define <16 x i8> @v16ui8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ugt <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_gt: +; CHECK: vcmpgtub 2, 2, 3 + +define <16 x i8> @v16si8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp sge <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_ge: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsb [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + +define <16 x i8> @v16ui8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp uge <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_ge: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtub [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] define <32 x i8> @v32si8_cmp(<32 x i8> %x, <32 x i8> %y) nounwind readnone { @@ -70,13 +165,106 @@ define <4 x i16> @v4si16_cmp(<4 x i16> %x, <4 x i16> %y) nounwind readnone { ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -define <8 x i16> @v8si16_cmp(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +; Adicional tests for v8i16 since it is an altivec native type + +define <8 x i16> @v8si16_cmp_eq(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: %cmp = icmp eq <8 x i16> %x, %y %sext = sext <8 x i1> %cmp to <8 x i16> ret <8 x i16> %sext } -; CHECK: v8si16_cmp: -; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: v8si16_cmp_eq: +; CHECK: vcmpequh 2, 2, 3 + +define <8 x i16> @v8si16_cmp_ne(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ne <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_ne: +; CHECK: vcmpequh [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] + +define <8 x i16> @v8si16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp sle <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_le: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsh [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <8 x i16> @v8ui16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ule <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_le: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuh [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <8 x i16> @v8si16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp slt <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_lt: +; CHECK: vcmpgtsh 2, 3, 2 + +define <8 x i16> @v8ui16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ult <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_lt: +; CHECK: vcmpgtuh 2, 3, 2 + +define <8 x i16> @v8si16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp sgt <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_gt: +; CHECK: vcmpgtsh 2, 2, 3 + +define <8 x i16> @v8ui16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ugt <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_gt: +; CHECK: vcmpgtuh 2, 2, 3 + +define <8 x i16> @v8si16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp sge <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_ge: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsh [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + +define <8 x i16> @v8ui16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp uge <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_ge: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuh [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] define <16 x i16> @v16si16_cmp(<16 x i16> %x, <16 x i16> %y) nounwind readnone { @@ -110,13 +298,106 @@ define <2 x i32> @v2si32_cmp(<2 x i32> %x, <2 x i32> %y) nounwind readnone { ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -define <4 x i32> @v4si32_cmp(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +; Adicional tests for v4si32 since it is an altivec native type + +define <4 x i32> @v4si32_cmp_eq(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: %cmp = icmp eq <4 x i32> %x, %y %sext = sext <4 x i1> %cmp to <4 x i32> ret <4 x i32> %sext } -; CHECK: v4si32_cmp: -; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: v4si32_cmp_eq: +; CHECK: vcmpequw 2, 2, 3 + +define <4 x i32> @v4si32_cmp_ne(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ne <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_ne: +; CHECK: vcmpequw [[RCMP:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RCMP]], [[RCMP]] + +define <4 x i32> @v4si32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp sle <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_le: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsw [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <4 x i32> @v4ui32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ule <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_le: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuw [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <4 x i32> @v4si32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp slt <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_lt: +; CHECK: vcmpgtsw 2, 3, 2 + +define <4 x i32> @v4ui32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ult <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_lt: +; CHECK: vcmpgtuw 2, 3, 2 + +define <4 x i32> @v4si32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp sgt <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_gt: +; CHECK: vcmpgtsw 2, 2, 3 + +define <4 x i32> @v4ui32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ugt <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_gt: +; CHECK: vcmpgtuw 2, 2, 3 + +define <4 x i32> @v4si32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp sge <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_ge: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsw [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + +define <4 x i32> @v4ui32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp uge <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_ge: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuw [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] define <8 x i32> @v8si32_cmp(<8 x i32> %x, <8 x i32> %y) nounwind readnone { @@ -168,15 +449,70 @@ entry: ; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -define <4 x float> @v4f32_cmp(<4 x float> %x, <4 x float> %y) nounwind readnone { +; Adicional tests for v4f32 since it is a altivec native type + +define <4 x float> @v4f32_cmp_eq(<4 x float> %x, <4 x float> %y) nounwind readnone { entry: %cmp = fcmp oeq <4 x float> %x, %y %sext = sext <4 x i1> %cmp to <4 x i32> %0 = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %0 } -; CHECK: v4f32_cmp: -; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: v4f32_cmp_eq: +; CHECK: vcmpeqfp 2, 2, 3 + +define <4 x float> @v4f32_cmp_ne(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp une <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_ne: +; CHECK: vcmpeqfp [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] + +define <4 x float> @v4f32_cmp_le(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp ole <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_le: +; CHECK: vcmpeqfp [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtfp [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <4 x float> @v4f32_cmp_lt(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp olt <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_lt: +; CHECK: vcmpgtfp 2, 3, 2 + +define <4 x float> @v4f32_cmp_ge(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp oge <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_ge: +; CHECK: vcmpgefp 2, 2, 3 + +define <4 x float> @v4f32_cmp_gt(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp ogt <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_gt: +; CHECK: vcmpgtfp 2, 2, 3 define <8 x float> @v8f32_cmp(<8 x float> %x, <8 x float> %y) nounwind readnone {