From 5f41fd685b6e82c3194be782566bbc438d697cc9 Mon Sep 17 00:00:00 2001 From: Adhemerval Zanella Date: Tue, 30 Oct 2012 13:50:19 +0000 Subject: [PATCH] PowerPC: More support for Altivec compare operations This patch adds more support for vector type comparisons using altivec. It adds correct support for v16i8, v8i16, v4i32, and v4f32 vector types for comparison operators ==, !=, >, >=, <, and <=. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@167015 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 146 +++++++++- lib/Target/PowerPC/PPCISelLowering.cpp | 8 + test/CodeGen/PowerPC/vec_cmp.ll | 364 ++++++++++++++++++++++++- 3 files changed, 491 insertions(+), 27 deletions(-) diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 6195441cfc0..254fea67fc4 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -623,6 +623,88 @@ static unsigned getCRIdxForSetCC(ISD::CondCode CC, bool &Invert, int &Other) { } } +// getVCmpInst: return the vector compare instruction for the specified +// vector type and condition code. Since this is for altivec specific code, +// only support the altivec types (v16i8, v8i16, v4i32, and v4f32). +static unsigned int getVCmpInst(MVT::SimpleValueType VecVT, ISD::CondCode CC) { + switch (CC) { + case ISD::SETEQ: + case ISD::SETUEQ: + case ISD::SETNE: + case ISD::SETUNE: + if (VecVT == MVT::v16i8) + return PPC::VCMPEQUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPEQUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPEQUW; + // v4f32 != v4f32 could be translate to unordered not equal + else if (VecVT == MVT::v4f32) + return PPC::VCMPEQFP; + break; + case ISD::SETLT: + case ISD::SETGT: + case ISD::SETLE: + case ISD::SETGE: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTSB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTSH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTSW; + else if (VecVT == MVT::v4f32) + return PPC::VCMPGTFP; + break; + case ISD::SETULT: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULE: + if (VecVT == MVT::v16i8) + return PPC::VCMPGTUB; + else if (VecVT == MVT::v8i16) + return PPC::VCMPGTUH; + else if (VecVT == MVT::v4i32) + return PPC::VCMPGTUW; + break; + case ISD::SETOEQ: + if (VecVT == MVT::v4f32) + return PPC::VCMPEQFP; + break; + case ISD::SETOLT: + case ISD::SETOGT: + case ISD::SETOLE: + if (VecVT == MVT::v4f32) + return PPC::VCMPGTFP; + break; + case ISD::SETOGE: + if (VecVT == MVT::v4f32) + return PPC::VCMPGEFP; + break; + default: + break; + } + llvm_unreachable("Invalid integer vector compare condition"); +} + +// getVCmpEQInst: return the equal compare instruction for the specified vector +// type. Since this is for altivec specific code, only support the altivec +// types (v16i8, v8i16, v4i32, and v4f32). +static unsigned int getVCmpEQInst(MVT::SimpleValueType VecVT) { + switch (VecVT) { + case MVT::v16i8: + return PPC::VCMPEQUB; + case MVT::v8i16: + return PPC::VCMPEQUH; + case MVT::v4i32: + return PPC::VCMPEQUW; + case MVT::v4f32: + return PPC::VCMPEQFP; + default: + llvm_unreachable("Invalid integer vector compare condition"); + } +} + + SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { DebugLoc dl = N->getDebugLoc(); unsigned Imm; @@ -706,20 +788,58 @@ SDNode *PPCDAGToDAGISel::SelectSETCC(SDNode *N) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - // Altivec Vector compare instructions do not set any CR register by default + // Altivec Vector compare instructions do not set any CR register by default and + // vector compare operations return the same type as the operands. if (LHS.getValueType().isVector()) { - unsigned int Opc; - if (LHS.getValueType() == MVT::v16i8) - Opc = PPC::VCMPEQUB; - else if (LHS.getValueType() == MVT::v4i32) - Opc = PPC::VCMPEQUW; - else if (LHS.getValueType() == MVT::v8i16) - Opc = PPC::VCMPEQUH; - else if (LHS.getValueType() == MVT::v4f32) - Opc = PPC::VCMPEQFP; - else - llvm_unreachable("Invalid vector compare type: should be expanded by legalize"); - return CurDAG->SelectNodeTo(N, Opc, LHS.getValueType(), LHS, RHS); + EVT VecVT = LHS.getValueType(); + MVT::SimpleValueType VT = VecVT.getSimpleVT().SimpleTy; + unsigned int VCmpInst = getVCmpInst(VT, CC); + + switch (CC) { + case ISD::SETEQ: + case ISD::SETOEQ: + case ISD::SETUEQ: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + case ISD::SETNE: + case ISD::SETONE: + case ISD::SETUNE: { + SDValue VCmp(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VNOR, VecVT, VCmp, VCmp); + } + case ISD::SETLT: + case ISD::SETOLT: + case ISD::SETULT: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, RHS, LHS); + case ISD::SETGT: + case ISD::SETOGT: + case ISD::SETUGT: + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + case ISD::SETGE: + case ISD::SETOGE: + case ISD::SETUGE: { + // Small optimization: Altivec provides a 'Vector Compare Greater Than + // or Equal To' instruction (vcmpgefp), so in this case there is no + // need for extra logic for the equal compare. + if (VecVT.getSimpleVT().isFloatingPoint()) { + return CurDAG->SelectNodeTo(N, VCmpInst, VecVT, LHS, RHS); + } else { + SDValue VCmpGT(CurDAG->getMachineNode(VCmpInst, dl, VecVT, LHS, RHS), 0); + unsigned int VCmpEQInst = getVCmpEQInst(VT); + SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpGT, VCmpEQ); + } + } + case ISD::SETLE: + case ISD::SETOLE: + case ISD::SETULE: { + SDValue VCmpLE(CurDAG->getMachineNode(VCmpInst, dl, VecVT, RHS, LHS), 0); + unsigned int VCmpEQInst = getVCmpEQInst(VT); + SDValue VCmpEQ(CurDAG->getMachineNode(VCmpEQInst, dl, VecVT, LHS, RHS), 0); + return CurDAG->SelectNodeTo(N, PPC::VOR, VecVT, VCmpLE, VCmpEQ); + } + default: + llvm_unreachable("Invalid vector compare type: should be expanded by legalize"); + } } bool Inv; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index f27791013e0..8754b2623ce 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -396,6 +396,14 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + + // Altivec does not contain unordered floating-point compare instructions + setCondCodeAction(ISD::SETUO, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUEQ, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUGT, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETUGE, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETULT, MVT::v4f32, Expand); + setCondCodeAction(ISD::SETULE, MVT::v4f32, Expand); } if (Subtarget->has64BitSupport()) { diff --git a/test/CodeGen/PowerPC/vec_cmp.ll b/test/CodeGen/PowerPC/vec_cmp.ll index b2b59db8f18..3180f464d12 100644 --- a/test/CodeGen/PowerPC/vec_cmp.ll +++ b/test/CodeGen/PowerPC/vec_cmp.ll @@ -1,6 +1,9 @@ -; RUN: llc -mattr=+altivec < %s | FileCheck %s +; RUN: llc -mcpu=pwr6 -mattr=+altivec < %s | FileCheck %s -; Check vector comparisons using altivec. +; Check vector comparisons using altivec. For non native types, just basic +; comparison instruction check is done. For altivec supported type (16i8, +; 8i16, 4i32, and 4f32) all the comparisons operators (==, !=, >, >=, <, <=) +; are checked. target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" @@ -33,13 +36,105 @@ define <8 x i8> @v8si8_cmp(<8 x i8> %x, <8 x i8> %y) nounwind readnone { ; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -define <16 x i8> @v16si8_cmp(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +; Adicional tests for v16i8 since it is a altivec native type + +define <16 x i8> @v16si8_cmp_eq(<16 x i8> %x, <16 x i8> %y) nounwind readnone { %cmp = icmp eq <16 x i8> %x, %y %sext = sext <16 x i1> %cmp to <16 x i8> ret <16 x i8> %sext } -; CHECK: v16si8_cmp: -; CHECK: vcmpequb {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: v16si8_cmp_eq: +; CHECK: vcmpequb 2, 2, 3 + +define <16 x i8> @v16si8_cmp_ne(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ne <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_ne: +; CHECK: vcmpequb [[RET:[0-9]+]], 2, 3 +; CHECK-NOR: vnor 2, [[RET]], [[RET]] + +define <16 x i8> @v16si8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp sle <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_le: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsb [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <16 x i8> @v16ui8_cmp_le(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ule <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_le: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtub [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <16 x i8> @v16si8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp slt <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_lt: +; CHECK: vcmpgtsb 2, 3, 2 + +define <16 x i8> @v16ui8_cmp_lt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ult <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_lt: +; CHECK: vcmpgtub 2, 3, 2 + +define <16 x i8> @v16si8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp sgt <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_gt: +; CHECK: vcmpgtsb 2, 2, 3 + +define <16 x i8> @v16ui8_cmp_gt(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp ugt <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_gt: +; CHECK: vcmpgtub 2, 2, 3 + +define <16 x i8> @v16si8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp sge <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16si8_cmp_ge: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsb [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + +define <16 x i8> @v16ui8_cmp_ge(<16 x i8> %x, <16 x i8> %y) nounwind readnone { +entry: + %cmp = icmp uge <16 x i8> %x, %y + %sext = sext <16 x i1> %cmp to <16 x i8> + ret <16 x i8> %sext +} +; CHECK: v16ui8_cmp_ge: +; CHECK: vcmpequb [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtub [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] define <32 x i8> @v32si8_cmp(<32 x i8> %x, <32 x i8> %y) nounwind readnone { @@ -70,13 +165,106 @@ define <4 x i16> @v4si16_cmp(<4 x i16> %x, <4 x i16> %y) nounwind readnone { ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -define <8 x i16> @v8si16_cmp(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +; Adicional tests for v8i16 since it is an altivec native type + +define <8 x i16> @v8si16_cmp_eq(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: %cmp = icmp eq <8 x i16> %x, %y %sext = sext <8 x i1> %cmp to <8 x i16> ret <8 x i16> %sext } -; CHECK: v8si16_cmp: -; CHECK: vcmpequh {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: v8si16_cmp_eq: +; CHECK: vcmpequh 2, 2, 3 + +define <8 x i16> @v8si16_cmp_ne(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ne <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_ne: +; CHECK: vcmpequh [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] + +define <8 x i16> @v8si16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp sle <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_le: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsh [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <8 x i16> @v8ui16_cmp_le(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ule <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_le: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuh [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <8 x i16> @v8si16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp slt <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_lt: +; CHECK: vcmpgtsh 2, 3, 2 + +define <8 x i16> @v8ui16_cmp_lt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ult <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_lt: +; CHECK: vcmpgtuh 2, 3, 2 + +define <8 x i16> @v8si16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp sgt <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_gt: +; CHECK: vcmpgtsh 2, 2, 3 + +define <8 x i16> @v8ui16_cmp_gt(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp ugt <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_gt: +; CHECK: vcmpgtuh 2, 2, 3 + +define <8 x i16> @v8si16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp sge <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8si16_cmp_ge: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsh [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + +define <8 x i16> @v8ui16_cmp_ge(<8 x i16> %x, <8 x i16> %y) nounwind readnone { +entry: + %cmp = icmp uge <8 x i16> %x, %y + %sext = sext <8 x i1> %cmp to <8 x i16> + ret <8 x i16> %sext +} +; CHECK: v8ui16_cmp_ge: +; CHECK: vcmpequh [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuh [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] define <16 x i16> @v16si16_cmp(<16 x i16> %x, <16 x i16> %y) nounwind readnone { @@ -110,13 +298,106 @@ define <2 x i32> @v2si32_cmp(<2 x i32> %x, <2 x i32> %y) nounwind readnone { ; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -define <4 x i32> @v4si32_cmp(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +; Adicional tests for v4si32 since it is an altivec native type + +define <4 x i32> @v4si32_cmp_eq(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: %cmp = icmp eq <4 x i32> %x, %y %sext = sext <4 x i1> %cmp to <4 x i32> ret <4 x i32> %sext } -; CHECK: v4si32_cmp: -; CHECK: vcmpequw {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: v4si32_cmp_eq: +; CHECK: vcmpequw 2, 2, 3 + +define <4 x i32> @v4si32_cmp_ne(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ne <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_ne: +; CHECK: vcmpequw [[RCMP:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RCMP]], [[RCMP]] + +define <4 x i32> @v4si32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp sle <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_le: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsw [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <4 x i32> @v4ui32_cmp_le(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ule <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_le: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuw [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <4 x i32> @v4si32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp slt <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_lt: +; CHECK: vcmpgtsw 2, 3, 2 + +define <4 x i32> @v4ui32_cmp_lt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ult <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_lt: +; CHECK: vcmpgtuw 2, 3, 2 + +define <4 x i32> @v4si32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp sgt <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_gt: +; CHECK: vcmpgtsw 2, 2, 3 + +define <4 x i32> @v4ui32_cmp_gt(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp ugt <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_gt: +; CHECK: vcmpgtuw 2, 2, 3 + +define <4 x i32> @v4si32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp sge <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4si32_cmp_ge: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtsw [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] + +define <4 x i32> @v4ui32_cmp_ge(<4 x i32> %x, <4 x i32> %y) nounwind readnone { +entry: + %cmp = icmp uge <4 x i32> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + ret <4 x i32> %sext +} +; CHECK: v4ui32_cmp_ge: +; CHECK: vcmpequw [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtuw [[RCMPGT:[0-9]+]], 2, 3 +; CHECK-NEXT: vor 2, [[RCMPGT]], [[RCMPEQ]] define <8 x i32> @v8si32_cmp(<8 x i32> %x, <8 x i32> %y) nounwind readnone { @@ -168,15 +449,70 @@ entry: ; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} -define <4 x float> @v4f32_cmp(<4 x float> %x, <4 x float> %y) nounwind readnone { +; Adicional tests for v4f32 since it is a altivec native type + +define <4 x float> @v4f32_cmp_eq(<4 x float> %x, <4 x float> %y) nounwind readnone { entry: %cmp = fcmp oeq <4 x float> %x, %y %sext = sext <4 x i1> %cmp to <4 x i32> %0 = bitcast <4 x i32> %sext to <4 x float> ret <4 x float> %0 } -; CHECK: v4f32_cmp: -; CHECK: vcmpeqfp {{[0-9]+}}, {{[0-9]+}}, {{[0-9]+}} +; CHECK: v4f32_cmp_eq: +; CHECK: vcmpeqfp 2, 2, 3 + +define <4 x float> @v4f32_cmp_ne(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp une <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_ne: +; CHECK: vcmpeqfp [[RET:[0-9]+]], 2, 3 +; CHECK-NEXT: vnor 2, [[RET]], [[RET]] + +define <4 x float> @v4f32_cmp_le(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp ole <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_le: +; CHECK: vcmpeqfp [[RCMPEQ:[0-9]+]], 2, 3 +; CHECK-NEXT: vcmpgtfp [[RCMPLE:[0-9]+]], 3, 2 +; CHECK-NEXT: vor 2, [[RCMPLE]], [[RCMPEQ]] + +define <4 x float> @v4f32_cmp_lt(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp olt <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_lt: +; CHECK: vcmpgtfp 2, 3, 2 + +define <4 x float> @v4f32_cmp_ge(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp oge <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_ge: +; CHECK: vcmpgefp 2, 2, 3 + +define <4 x float> @v4f32_cmp_gt(<4 x float> %x, <4 x float> %y) nounwind readnone { +entry: + %cmp = fcmp ogt <4 x float> %x, %y + %sext = sext <4 x i1> %cmp to <4 x i32> + %0 = bitcast <4 x i32> %sext to <4 x float> + ret <4 x float> %0 +} +; CHECK: v4f32_cmp_gt: +; CHECK: vcmpgtfp 2, 2, 3 define <8 x float> @v8f32_cmp(<8 x float> %x, <8 x float> %y) nounwind readnone {