From 9f6c4c141ffa9c8b13e90dce2f2285c4479ff403 Mon Sep 17 00:00:00 2001 From: Bob Wilson Date: Thu, 18 Feb 2010 06:05:53 +0000 Subject: [PATCH] Use NEON vmin/vmax instructions for floating-point selects. Radar 7461718. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@96572 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 93 +++++++++++++++++++++++++++--- lib/Target/ARM/ARMISelLowering.h | 6 +- lib/Target/ARM/ARMInstrNEON.td | 19 ++++++ test/CodeGen/ARM/neon_minmax.ll | 65 +++++++++++++++++++++ 4 files changed, 174 insertions(+), 9 deletions(-) create mode 100644 test/CodeGen/ARM/neon_minmax.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index adf16442e8e..ecc8289ff77 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -294,6 +294,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); setTargetDAGCombine(ISD::ANY_EXTEND); + setTargetDAGCombine(ISD::SELECT_CC); } computeRegisterProperties(); @@ -544,6 +545,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::VZIP: return "ARMISD::VZIP"; case ARMISD::VUZP: return "ARMISD::VUZP"; case ARMISD::VTRN: return "ARMISD::VTRN"; + case ARMISD::FMAX: return "ARMISD::FMAX"; + case ARMISD::FMIN: return "ARMISD::FMIN"; } } @@ -3856,23 +3859,97 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC +/// to match f32 max/min patterns to use NEON vmax/vmin instructions. +static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, + const ARMSubtarget *ST) { + // If the target supports NEON, try to use vmax/vmin instructions for f32 + // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set, + // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is + // a NaN; only do the transformation when it matches that behavior. + + // For now only do this when using NEON for FP operations; if using VFP, it + // is not obvious that the benefit outweighs the cost of switching to the + // NEON pipeline. + if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() || + N->getValueType(0) != MVT::f32) + return SDValue(); + + SDValue CondLHS = N->getOperand(0); + SDValue CondRHS = N->getOperand(1); + SDValue LHS = N->getOperand(2); + SDValue RHS = N->getOperand(3); + ISD::CondCode CC = cast(N->getOperand(4))->get(); + + unsigned Opcode = 0; + bool IsReversed; + if (LHS == CondLHS && RHS == CondRHS) { + IsReversed = false; // x CC y ? x : y + } else if (LHS == CondRHS && RHS == CondLHS) { + IsReversed = true ; // x CC y ? y : x + } else { + return SDValue(); + } + + switch (CC) { + default: break; + case ISD::SETOLT: + case ISD::SETOLE: + case ISD::SETLT: + case ISD::SETLE: + // This can be vmin if we can prove that the LHS is not a NaN. + // (If either operand is NaN, the comparison will be false and the result + // will be the RHS, which matches vmin if RHS is the NaN.) + if (DAG.isKnownNeverNaN(LHS)) + Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; + break; + + case ISD::SETULT: + case ISD::SETULE: + // Likewise, for ULT/ULE we need to know that RHS is not a NaN. + if (DAG.isKnownNeverNaN(RHS)) + Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN; + break; + + case ISD::SETOGT: + case ISD::SETOGE: + case ISD::SETGT: + case ISD::SETGE: + // This can be vmax if we can prove that the LHS is not a NaN. + // (If either operand is NaN, the comparison will be false and the result + // will be the RHS, which matches vmax if RHS is the NaN.) + if (DAG.isKnownNeverNaN(LHS)) + Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; + break; + + case ISD::SETUGT: + case ISD::SETUGE: + // Likewise, for UGT/UGE we need to know that RHS is not a NaN. + if (DAG.isKnownNeverNaN(RHS)) + Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX; + break; + } + + if (!Opcode) + return SDValue(); + return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); +} + SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; - case ISD::ADD: return PerformADDCombine(N, DCI); - case ISD::SUB: return PerformSUBCombine(N, DCI); + case ISD::ADD: return PerformADDCombine(N, DCI); + case ISD::SUB: return PerformSUBCombine(N, DCI); case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI); - case ISD::INTRINSIC_WO_CHAIN: - return PerformIntrinsicCombine(N, DCI.DAG); + case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG); case ISD::SHL: case ISD::SRA: - case ISD::SRL: - return PerformShiftCombine(N, DCI.DAG, Subtarget); + case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget); case ISD::SIGN_EXTEND: case ISD::ZERO_EXTEND: - case ISD::ANY_EXTEND: - return PerformExtendCombine(N, DCI.DAG, Subtarget); + case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget); + case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget); } return SDValue(); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 3c5df45dc55..f8f8adc70af 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -131,7 +131,11 @@ namespace llvm { VREV16, // reverse elements within 16-bit halfwords VZIP, // zip (interleave) VUZP, // unzip (deinterleave) - VTRN // transpose + VTRN, // transpose + + // Floating-point max and min: + FMAX, + FMIN }; } diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td index f981572c76c..99e81970657 100644 --- a/lib/Target/ARM/ARMInstrNEON.td +++ b/lib/Target/ARM/ARMInstrNEON.td @@ -89,6 +89,11 @@ def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>; def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>; def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>; +def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>]>; +def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>; +def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>; + //===----------------------------------------------------------------------===// // NEON operand definitions //===----------------------------------------------------------------------===// @@ -3023,6 +3028,20 @@ def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0, "vneg", "f32", "$dst, $src", "", []>; def : N2VSPat; +// Vector Maximum used for single-precision FP +let neverHasSideEffects = 1 in +def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), + (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, + "vmax", "f32", "$dst, $src1, $src2", "", []>; +def : N3VSPat; + +// Vector Minimum used for single-precision FP +let neverHasSideEffects = 1 in +def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst), + (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND, + "vmin", "f32", "$dst, $src1, $src2", "", []>; +def : N3VSPat; + // Vector Convert between single-precision FP and integer let neverHasSideEffects = 1 in def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32", diff --git a/test/CodeGen/ARM/neon_minmax.ll b/test/CodeGen/ARM/neon_minmax.ll new file mode 100644 index 00000000000..64349d650e9 --- /dev/null +++ b/test/CodeGen/ARM/neon_minmax.ll @@ -0,0 +1,65 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s + +define float @fmin_ole(float %x) nounwind { +;CHECK: fmin_ole: +;CHECK: vmin.f32 + %cond = fcmp ole float 1.0, %x + %min1 = select i1 %cond, float 1.0, float %x + ret float %min1 +} + +define float @fmin_ult(float %x) nounwind { +;CHECK: fmin_ult: +;CHECK: vmin.f32 + %cond = fcmp ult float %x, 1.0 + %min1 = select i1 %cond, float %x, float 1.0 + ret float %min1 +} + +define float @fmax_ogt(float %x) nounwind { +;CHECK: fmax_ogt: +;CHECK: vmax.f32 + %cond = fcmp ogt float 1.0, %x + %max1 = select i1 %cond, float 1.0, float %x + ret float %max1 +} + +define float @fmax_uge(float %x) nounwind { +;CHECK: fmax_uge: +;CHECK: vmax.f32 + %cond = fcmp uge float %x, 1.0 + %max1 = select i1 %cond, float %x, float 1.0 + ret float %max1 +} + +define float @fmax_olt_reverse(float %x) nounwind { +;CHECK: fmax_olt_reverse: +;CHECK: vmax.f32 + %cond = fcmp olt float %x, 1.0 + %max1 = select i1 %cond, float 1.0, float %x + ret float %max1 +} + +define float @fmax_ule_reverse(float %x) nounwind { +;CHECK: fmax_ule_reverse: +;CHECK: vmax.f32 + %cond = fcmp ult float 1.0, %x + %max1 = select i1 %cond, float %x, float 1.0 + ret float %max1 +} + +define float @fmin_oge_reverse(float %x) nounwind { +;CHECK: fmin_oge_reverse: +;CHECK: vmin.f32 + %cond = fcmp oge float %x, 1.0 + %min1 = select i1 %cond, float 1.0, float %x + ret float %min1 +} + +define float @fmin_ugt_reverse(float %x) nounwind { +;CHECK: fmin_ugt_reverse: +;CHECK: vmin.f32 + %cond = fcmp ugt float 1.0, %x + %min1 = select i1 %cond, float %x, float 1.0 + ret float %min1 +}