mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-16 14:31:59 +00:00
Use NEON vmin/vmax instructions for floating-point selects.
Radar 7461718. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@96572 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2155d459a7
commit
9f6c4c141f
@ -294,6 +294,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||
setTargetDAGCombine(ISD::SIGN_EXTEND);
|
||||
setTargetDAGCombine(ISD::ZERO_EXTEND);
|
||||
setTargetDAGCombine(ISD::ANY_EXTEND);
|
||||
setTargetDAGCombine(ISD::SELECT_CC);
|
||||
}
|
||||
|
||||
computeRegisterProperties();
|
||||
@ -544,6 +545,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case ARMISD::VZIP: return "ARMISD::VZIP";
|
||||
case ARMISD::VUZP: return "ARMISD::VUZP";
|
||||
case ARMISD::VTRN: return "ARMISD::VTRN";
|
||||
case ARMISD::FMAX: return "ARMISD::FMAX";
|
||||
case ARMISD::FMIN: return "ARMISD::FMIN";
|
||||
}
|
||||
}
|
||||
|
||||
@ -3856,6 +3859,82 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
|
||||
/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
|
||||
static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
|
||||
const ARMSubtarget *ST) {
|
||||
// If the target supports NEON, try to use vmax/vmin instructions for f32
|
||||
// selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set,
|
||||
// be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
|
||||
// a NaN; only do the transformation when it matches that behavior.
|
||||
|
||||
// For now only do this when using NEON for FP operations; if using VFP, it
|
||||
// is not obvious that the benefit outweighs the cost of switching to the
|
||||
// NEON pipeline.
|
||||
if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
|
||||
N->getValueType(0) != MVT::f32)
|
||||
return SDValue();
|
||||
|
||||
SDValue CondLHS = N->getOperand(0);
|
||||
SDValue CondRHS = N->getOperand(1);
|
||||
SDValue LHS = N->getOperand(2);
|
||||
SDValue RHS = N->getOperand(3);
|
||||
ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
|
||||
|
||||
unsigned Opcode = 0;
|
||||
bool IsReversed;
|
||||
if (LHS == CondLHS && RHS == CondRHS) {
|
||||
IsReversed = false; // x CC y ? x : y
|
||||
} else if (LHS == CondRHS && RHS == CondLHS) {
|
||||
IsReversed = true ; // x CC y ? y : x
|
||||
} else {
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
switch (CC) {
|
||||
default: break;
|
||||
case ISD::SETOLT:
|
||||
case ISD::SETOLE:
|
||||
case ISD::SETLT:
|
||||
case ISD::SETLE:
|
||||
// This can be vmin if we can prove that the LHS is not a NaN.
|
||||
// (If either operand is NaN, the comparison will be false and the result
|
||||
// will be the RHS, which matches vmin if RHS is the NaN.)
|
||||
if (DAG.isKnownNeverNaN(LHS))
|
||||
Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
|
||||
break;
|
||||
|
||||
case ISD::SETULT:
|
||||
case ISD::SETULE:
|
||||
// Likewise, for ULT/ULE we need to know that RHS is not a NaN.
|
||||
if (DAG.isKnownNeverNaN(RHS))
|
||||
Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
|
||||
break;
|
||||
|
||||
case ISD::SETOGT:
|
||||
case ISD::SETOGE:
|
||||
case ISD::SETGT:
|
||||
case ISD::SETGE:
|
||||
// This can be vmax if we can prove that the LHS is not a NaN.
|
||||
// (If either operand is NaN, the comparison will be false and the result
|
||||
// will be the RHS, which matches vmax if RHS is the NaN.)
|
||||
if (DAG.isKnownNeverNaN(LHS))
|
||||
Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
|
||||
break;
|
||||
|
||||
case ISD::SETUGT:
|
||||
case ISD::SETUGE:
|
||||
// Likewise, for UGT/UGE we need to know that RHS is not a NaN.
|
||||
if (DAG.isKnownNeverNaN(RHS))
|
||||
Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!Opcode)
|
||||
return SDValue();
|
||||
return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
|
||||
}
|
||||
|
||||
SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
DAGCombinerInfo &DCI) const {
|
||||
switch (N->getOpcode()) {
|
||||
@ -3863,16 +3942,14 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
|
||||
case ISD::ADD: return PerformADDCombine(N, DCI);
|
||||
case ISD::SUB: return PerformSUBCombine(N, DCI);
|
||||
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
|
||||
case ISD::INTRINSIC_WO_CHAIN:
|
||||
return PerformIntrinsicCombine(N, DCI.DAG);
|
||||
case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
|
||||
case ISD::SHL:
|
||||
case ISD::SRA:
|
||||
case ISD::SRL:
|
||||
return PerformShiftCombine(N, DCI.DAG, Subtarget);
|
||||
case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
|
||||
case ISD::SIGN_EXTEND:
|
||||
case ISD::ZERO_EXTEND:
|
||||
case ISD::ANY_EXTEND:
|
||||
return PerformExtendCombine(N, DCI.DAG, Subtarget);
|
||||
case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
|
||||
case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
|
||||
}
|
||||
return SDValue();
|
||||
}
|
||||
|
@ -131,7 +131,11 @@ namespace llvm {
|
||||
VREV16, // reverse elements within 16-bit halfwords
|
||||
VZIP, // zip (interleave)
|
||||
VUZP, // unzip (deinterleave)
|
||||
VTRN // transpose
|
||||
VTRN, // transpose
|
||||
|
||||
// Floating-point max and min:
|
||||
FMAX,
|
||||
FMIN
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -89,6 +89,11 @@ def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
|
||||
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
|
||||
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
|
||||
|
||||
def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
|
||||
SDTCisSameAs<0, 2>]>;
|
||||
def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
|
||||
def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// NEON operand definitions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -3023,6 +3028,20 @@ def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
|
||||
"vneg", "f32", "$dst, $src", "", []>;
|
||||
def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
|
||||
|
||||
// Vector Maximum used for single-precision FP
|
||||
let neverHasSideEffects = 1 in
|
||||
def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
|
||||
(ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
|
||||
"vmax", "f32", "$dst, $src1, $src2", "", []>;
|
||||
def : N3VSPat<NEONfmax, VMAXfd_sfp>;
|
||||
|
||||
// Vector Minimum used for single-precision FP
|
||||
let neverHasSideEffects = 1 in
|
||||
def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
|
||||
(ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
|
||||
"vmin", "f32", "$dst, $src1, $src2", "", []>;
|
||||
def : N3VSPat<NEONfmin, VMINfd_sfp>;
|
||||
|
||||
// Vector Convert between single-precision FP and integer
|
||||
let neverHasSideEffects = 1 in
|
||||
def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
|
||||
|
65
test/CodeGen/ARM/neon_minmax.ll
Normal file
65
test/CodeGen/ARM/neon_minmax.ll
Normal file
@ -0,0 +1,65 @@
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
|
||||
|
||||
define float @fmin_ole(float %x) nounwind {
|
||||
;CHECK: fmin_ole:
|
||||
;CHECK: vmin.f32
|
||||
%cond = fcmp ole float 1.0, %x
|
||||
%min1 = select i1 %cond, float 1.0, float %x
|
||||
ret float %min1
|
||||
}
|
||||
|
||||
define float @fmin_ult(float %x) nounwind {
|
||||
;CHECK: fmin_ult:
|
||||
;CHECK: vmin.f32
|
||||
%cond = fcmp ult float %x, 1.0
|
||||
%min1 = select i1 %cond, float %x, float 1.0
|
||||
ret float %min1
|
||||
}
|
||||
|
||||
define float @fmax_ogt(float %x) nounwind {
|
||||
;CHECK: fmax_ogt:
|
||||
;CHECK: vmax.f32
|
||||
%cond = fcmp ogt float 1.0, %x
|
||||
%max1 = select i1 %cond, float 1.0, float %x
|
||||
ret float %max1
|
||||
}
|
||||
|
||||
define float @fmax_uge(float %x) nounwind {
|
||||
;CHECK: fmax_uge:
|
||||
;CHECK: vmax.f32
|
||||
%cond = fcmp uge float %x, 1.0
|
||||
%max1 = select i1 %cond, float %x, float 1.0
|
||||
ret float %max1
|
||||
}
|
||||
|
||||
define float @fmax_olt_reverse(float %x) nounwind {
|
||||
;CHECK: fmax_olt_reverse:
|
||||
;CHECK: vmax.f32
|
||||
%cond = fcmp olt float %x, 1.0
|
||||
%max1 = select i1 %cond, float 1.0, float %x
|
||||
ret float %max1
|
||||
}
|
||||
|
||||
define float @fmax_ule_reverse(float %x) nounwind {
|
||||
;CHECK: fmax_ule_reverse:
|
||||
;CHECK: vmax.f32
|
||||
%cond = fcmp ult float 1.0, %x
|
||||
%max1 = select i1 %cond, float %x, float 1.0
|
||||
ret float %max1
|
||||
}
|
||||
|
||||
define float @fmin_oge_reverse(float %x) nounwind {
|
||||
;CHECK: fmin_oge_reverse:
|
||||
;CHECK: vmin.f32
|
||||
%cond = fcmp oge float %x, 1.0
|
||||
%min1 = select i1 %cond, float 1.0, float %x
|
||||
ret float %min1
|
||||
}
|
||||
|
||||
define float @fmin_ugt_reverse(float %x) nounwind {
|
||||
;CHECK: fmin_ugt_reverse:
|
||||
;CHECK: vmin.f32
|
||||
%cond = fcmp ugt float 1.0, %x
|
||||
%min1 = select i1 %cond, float %x, float 1.0
|
||||
ret float %min1
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user