Use NEON vmin/vmax instructions for floating-point selects.

Radar 7461718.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@96572 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bob Wilson 2010-02-18 06:05:53 +00:00
parent 2155d459a7
commit 9f6c4c141f
4 changed files with 174 additions and 9 deletions

View File

@ -294,6 +294,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
setTargetDAGCombine(ISD::SELECT_CC);
}
computeRegisterProperties();
@ -544,6 +545,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
case ARMISD::FMAX: return "ARMISD::FMAX";
case ARMISD::FMIN: return "ARMISD::FMIN";
}
}
@ -3856,6 +3859,82 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
const ARMSubtarget *ST) {
// If the target supports NEON, try to use vmax/vmin instructions for f32
// selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set,
// be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
// a NaN; only do the transformation when it matches that behavior.
// For now only do this when using NEON for FP operations; if using VFP, it
// is not obvious that the benefit outweighs the cost of switching to the
// NEON pipeline.
if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
N->getValueType(0) != MVT::f32)
return SDValue();
SDValue CondLHS = N->getOperand(0);
SDValue CondRHS = N->getOperand(1);
SDValue LHS = N->getOperand(2);
SDValue RHS = N->getOperand(3);
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
unsigned Opcode = 0;
bool IsReversed;
if (LHS == CondLHS && RHS == CondRHS) {
IsReversed = false; // x CC y ? x : y
} else if (LHS == CondRHS && RHS == CondLHS) {
IsReversed = true ; // x CC y ? y : x
} else {
return SDValue();
}
switch (CC) {
default: break;
case ISD::SETOLT:
case ISD::SETOLE:
case ISD::SETLT:
case ISD::SETLE:
// This can be vmin if we can prove that the LHS is not a NaN.
// (If either operand is NaN, the comparison will be false and the result
// will be the RHS, which matches vmin if RHS is the NaN.)
if (DAG.isKnownNeverNaN(LHS))
Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
break;
case ISD::SETULT:
case ISD::SETULE:
// Likewise, for ULT/ULE we need to know that RHS is not a NaN.
if (DAG.isKnownNeverNaN(RHS))
Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
break;
case ISD::SETOGT:
case ISD::SETOGE:
case ISD::SETGT:
case ISD::SETGE:
// This can be vmax if we can prove that the LHS is not a NaN.
// (If either operand is NaN, the comparison will be false and the result
// will be the RHS, which matches vmax if RHS is the NaN.)
if (DAG.isKnownNeverNaN(LHS))
Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
break;
case ISD::SETUGT:
case ISD::SETUGE:
// Likewise, for UGT/UGE we need to know that RHS is not a NaN.
if (DAG.isKnownNeverNaN(RHS))
Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
break;
}
if (!Opcode)
return SDValue();
return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
}
SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
@ -3863,16 +3942,14 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
case ISD::ADD: return PerformADDCombine(N, DCI);
case ISD::SUB: return PerformSUBCombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
return PerformShiftCombine(N, DCI.DAG, Subtarget);
case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
case ISD::ANY_EXTEND:
return PerformExtendCombine(N, DCI.DAG, Subtarget);
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
}
return SDValue();
}

View File

@ -131,7 +131,11 @@ namespace llvm {
VREV16, // reverse elements within 16-bit halfwords
VZIP, // zip (interleave)
VUZP, // unzip (deinterleave)
VTRN // transpose
VTRN, // transpose
// Floating-point max and min:
FMAX,
FMIN
};
}

View File

@ -89,6 +89,11 @@ def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>]>;
def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
//===----------------------------------------------------------------------===//
// NEON operand definitions
//===----------------------------------------------------------------------===//
@ -3023,6 +3028,20 @@ def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
"vneg", "f32", "$dst, $src", "", []>;
def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
// Vector Maximum used for single-precision FP
let neverHasSideEffects = 1 in
def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
(ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
"vmax", "f32", "$dst, $src1, $src2", "", []>;
def : N3VSPat<NEONfmax, VMAXfd_sfp>;
// Vector Minimum used for single-precision FP
let neverHasSideEffects = 1 in
def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
(ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
"vmin", "f32", "$dst, $src1, $src2", "", []>;
def : N3VSPat<NEONfmin, VMINfd_sfp>;
// Vector Convert between single-precision FP and integer
let neverHasSideEffects = 1 in
def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",

View File

@ -0,0 +1,65 @@
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
define float @fmin_ole(float %x) nounwind {
;CHECK: fmin_ole:
;CHECK: vmin.f32
%cond = fcmp ole float 1.0, %x
%min1 = select i1 %cond, float 1.0, float %x
ret float %min1
}
define float @fmin_ult(float %x) nounwind {
;CHECK: fmin_ult:
;CHECK: vmin.f32
%cond = fcmp ult float %x, 1.0
%min1 = select i1 %cond, float %x, float 1.0
ret float %min1
}
define float @fmax_ogt(float %x) nounwind {
;CHECK: fmax_ogt:
;CHECK: vmax.f32
%cond = fcmp ogt float 1.0, %x
%max1 = select i1 %cond, float 1.0, float %x
ret float %max1
}
define float @fmax_uge(float %x) nounwind {
;CHECK: fmax_uge:
;CHECK: vmax.f32
%cond = fcmp uge float %x, 1.0
%max1 = select i1 %cond, float %x, float 1.0
ret float %max1
}
define float @fmax_olt_reverse(float %x) nounwind {
;CHECK: fmax_olt_reverse:
;CHECK: vmax.f32
%cond = fcmp olt float %x, 1.0
%max1 = select i1 %cond, float 1.0, float %x
ret float %max1
}
define float @fmax_ule_reverse(float %x) nounwind {
;CHECK: fmax_ule_reverse:
;CHECK: vmax.f32
%cond = fcmp ult float 1.0, %x
%max1 = select i1 %cond, float %x, float 1.0
ret float %max1
}
define float @fmin_oge_reverse(float %x) nounwind {
;CHECK: fmin_oge_reverse:
;CHECK: vmin.f32
%cond = fcmp oge float %x, 1.0
%min1 = select i1 %cond, float 1.0, float %x
ret float %min1
}
define float @fmin_ugt_reverse(float %x) nounwind {
;CHECK: fmin_ugt_reverse:
;CHECK: vmin.f32
%cond = fcmp ugt float 1.0, %x
%min1 = select i1 %cond, float %x, float 1.0
ret float %min1
}