mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-02 07:11:49 +00:00
Extend the r107852 optimization which turns some fp compare to code sequence using only i32 operations. It now optimize some f64 compares when fp compare is exceptionally slow (e.g. cortex-a8). It also catches comparison against 0.0.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108258 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7a41599962
commit
218977b53e
@ -565,6 +565,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
|||||||
case ARMISD::CMPZ: return "ARMISD::CMPZ";
|
case ARMISD::CMPZ: return "ARMISD::CMPZ";
|
||||||
case ARMISD::CMPFP: return "ARMISD::CMPFP";
|
case ARMISD::CMPFP: return "ARMISD::CMPFP";
|
||||||
case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
|
case ARMISD::CMPFPw0: return "ARMISD::CMPFPw0";
|
||||||
|
case ARMISD::BCC_i64: return "ARMISD::BCC_i64";
|
||||||
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
|
case ARMISD::FMSTAT: return "ARMISD::FMSTAT";
|
||||||
case ARMISD::CMOV: return "ARMISD::CMOV";
|
case ARMISD::CMOV: return "ARMISD::CMOV";
|
||||||
case ARMISD::CNEG: return "ARMISD::CNEG";
|
case ARMISD::CNEG: return "ARMISD::CNEG";
|
||||||
@ -2216,7 +2217,7 @@ static bool isFloatingPointZero(SDValue Op) {
|
|||||||
/// the given operands.
|
/// the given operands.
|
||||||
SDValue
|
SDValue
|
||||||
ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
||||||
SDValue &ARMCC, SelectionDAG &DAG,
|
SDValue &ARMcc, SelectionDAG &DAG,
|
||||||
DebugLoc dl) const {
|
DebugLoc dl) const {
|
||||||
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
|
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
|
||||||
unsigned C = RHSC->getZExtValue();
|
unsigned C = RHSC->getZExtValue();
|
||||||
@ -2268,48 +2269,14 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
|||||||
CompareType = ARMISD::CMPZ;
|
CompareType = ARMISD::CMPZ;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
ARMCC = DAG.getConstant(CondCode, MVT::i32);
|
ARMcc = DAG.getConstant(CondCode, MVT::i32);
|
||||||
return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
|
return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS);
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool canBitcastToInt(SDNode *Op) {
|
|
||||||
return Op->hasOneUse() &&
|
|
||||||
ISD::isNormalLoad(Op) &&
|
|
||||||
Op->getValueType(0) == MVT::f32;
|
|
||||||
}
|
|
||||||
|
|
||||||
static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) {
|
|
||||||
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
|
|
||||||
return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
|
|
||||||
Ld->getChain(), Ld->getBasePtr(),
|
|
||||||
Ld->getSrcValue(), Ld->getSrcValueOffset(),
|
|
||||||
Ld->isVolatile(), Ld->isNonTemporal(),
|
|
||||||
Ld->getAlignment());
|
|
||||||
|
|
||||||
llvm_unreachable("Unknown VFP cmp argument!");
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
|
/// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands.
|
||||||
SDValue
|
SDValue
|
||||||
ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
|
ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
|
||||||
SDValue &ARMCC, SelectionDAG &DAG,
|
|
||||||
DebugLoc dl) const {
|
DebugLoc dl) const {
|
||||||
if (UnsafeFPMath && FiniteOnlyFPMath() &&
|
|
||||||
(CC == ISD::SETEQ || CC == ISD::SETOEQ ||
|
|
||||||
CC == ISD::SETNE || CC == ISD::SETUNE) &&
|
|
||||||
canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) {
|
|
||||||
// If unsafe fp math optimization is enabled and there are no othter uses of
|
|
||||||
// the CMP operands, and the condition code is EQ oe NE, we can optimize it
|
|
||||||
// to an integer comparison.
|
|
||||||
if (CC == ISD::SETOEQ)
|
|
||||||
CC = ISD::SETEQ;
|
|
||||||
else if (CC == ISD::SETUNE)
|
|
||||||
CC = ISD::SETNE;
|
|
||||||
LHS = bitcastToInt(LHS, DAG);
|
|
||||||
RHS = bitcastToInt(RHS, DAG);
|
|
||||||
return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
|
|
||||||
}
|
|
||||||
|
|
||||||
SDValue Cmp;
|
SDValue Cmp;
|
||||||
if (!isFloatingPointZero(RHS))
|
if (!isFloatingPointZero(RHS))
|
||||||
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
|
Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS);
|
||||||
@ -2328,59 +2295,184 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
|
|
||||||
if (LHS.getValueType() == MVT::i32) {
|
if (LHS.getValueType() == MVT::i32) {
|
||||||
SDValue ARMCC;
|
SDValue ARMcc;
|
||||||
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
||||||
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
|
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
|
||||||
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
|
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
ARMCC::CondCodes CondCode, CondCode2;
|
ARMCC::CondCodes CondCode, CondCode2;
|
||||||
FPCCToARMCC(CC, CondCode, CondCode2);
|
FPCCToARMCC(CC, CondCode, CondCode2);
|
||||||
|
|
||||||
SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
|
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
|
||||||
|
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
|
||||||
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
||||||
SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
|
|
||||||
SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
|
SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal,
|
||||||
ARMCC, CCR, Cmp);
|
ARMcc, CCR, Cmp);
|
||||||
if (CondCode2 != ARMCC::AL) {
|
if (CondCode2 != ARMCC::AL) {
|
||||||
SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32);
|
SDValue ARMcc2 = DAG.getConstant(CondCode2, MVT::i32);
|
||||||
// FIXME: Needs another CMP because flag can have but one use.
|
// FIXME: Needs another CMP because flag can have but one use.
|
||||||
SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl);
|
SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl);
|
||||||
Result = DAG.getNode(ARMISD::CMOV, dl, VT,
|
Result = DAG.getNode(ARMISD::CMOV, dl, VT,
|
||||||
Result, TrueVal, ARMCC2, CCR, Cmp2);
|
Result, TrueVal, ARMcc2, CCR, Cmp2);
|
||||||
}
|
}
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
|
/// canChangeToInt - Given the fp compare operand, return true if it is suitable
|
||||||
SDValue Chain = Op.getOperand(0);
|
/// to morph to an integer compare sequence.
|
||||||
|
static bool canChangeToInt(SDValue Op, bool &SeenZero,
|
||||||
|
const ARMSubtarget *Subtarget) {
|
||||||
|
SDNode *N = Op.getNode();
|
||||||
|
if (!N->hasOneUse())
|
||||||
|
// Otherwise it requires moving the value from fp to integer registers.
|
||||||
|
return false;
|
||||||
|
if (!N->getNumValues())
|
||||||
|
return false;
|
||||||
|
EVT VT = Op.getValueType();
|
||||||
|
if (VT != MVT::f32 && !Subtarget->isFPBrccSlow())
|
||||||
|
// f32 case is generally profitable. f64 case only makes sense when vcmpe +
|
||||||
|
// vmrs are very slow, e.g. cortex-a8.
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (isFloatingPointZero(Op)) {
|
||||||
|
SeenZero = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return ISD::isNormalLoad(N);
|
||||||
|
}
|
||||||
|
|
||||||
|
static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) {
|
||||||
|
if (isFloatingPointZero(Op))
|
||||||
|
return DAG.getConstant(0, MVT::i32);
|
||||||
|
|
||||||
|
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op))
|
||||||
|
return DAG.getLoad(MVT::i32, Op.getDebugLoc(),
|
||||||
|
Ld->getChain(), Ld->getBasePtr(),
|
||||||
|
Ld->getSrcValue(), Ld->getSrcValueOffset(),
|
||||||
|
Ld->isVolatile(), Ld->isNonTemporal(),
|
||||||
|
Ld->getAlignment());
|
||||||
|
|
||||||
|
llvm_unreachable("Unknown VFP cmp argument!");
|
||||||
|
}
|
||||||
|
|
||||||
|
static void expandf64Toi32(SDValue Op, SelectionDAG &DAG,
|
||||||
|
SDValue &RetVal1, SDValue &RetVal2) {
|
||||||
|
if (isFloatingPointZero(Op)) {
|
||||||
|
RetVal1 = DAG.getConstant(0, MVT::i32);
|
||||||
|
RetVal2 = DAG.getConstant(0, MVT::i32);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Op)) {
|
||||||
|
SDValue Ptr = Ld->getBasePtr();
|
||||||
|
RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
|
||||||
|
Ld->getChain(), Ptr,
|
||||||
|
Ld->getSrcValue(), Ld->getSrcValueOffset(),
|
||||||
|
Ld->isVolatile(), Ld->isNonTemporal(),
|
||||||
|
Ld->getAlignment());
|
||||||
|
|
||||||
|
EVT PtrType = Ptr.getValueType();
|
||||||
|
unsigned NewAlign = MinAlign(Ld->getAlignment(), 4);
|
||||||
|
SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(),
|
||||||
|
PtrType, Ptr, DAG.getConstant(4, PtrType));
|
||||||
|
RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(),
|
||||||
|
Ld->getChain(), NewPtr,
|
||||||
|
Ld->getSrcValue(), Ld->getSrcValueOffset() + 4,
|
||||||
|
Ld->isVolatile(), Ld->isNonTemporal(),
|
||||||
|
NewAlign);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm_unreachable("Unknown VFP cmp argument!");
|
||||||
|
}
|
||||||
|
|
||||||
|
/// OptimizeVFPBrcond - With -enable-unsafe-fp-math, it's legal to optimize some
|
||||||
|
/// f32 and even f64 comparisons to integer ones.
|
||||||
|
SDValue
|
||||||
|
ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
SDValue Chain = Op.getOperand(0);
|
||||||
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
|
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
|
||||||
SDValue LHS = Op.getOperand(2);
|
SDValue LHS = Op.getOperand(2);
|
||||||
SDValue RHS = Op.getOperand(3);
|
SDValue RHS = Op.getOperand(3);
|
||||||
SDValue Dest = Op.getOperand(4);
|
SDValue Dest = Op.getOperand(4);
|
||||||
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
|
|
||||||
|
bool SeenZero = false;
|
||||||
|
if (canChangeToInt(LHS, SeenZero, Subtarget) &&
|
||||||
|
canChangeToInt(RHS, SeenZero, Subtarget) &&
|
||||||
|
// If one of the operand is zero, it's safe to ignore the NaN case.
|
||||||
|
(FiniteOnlyFPMath() || SeenZero)) {
|
||||||
|
// If unsafe fp math optimization is enabled and there are no othter uses of
|
||||||
|
// the CMP operands, and the condition code is EQ oe NE, we can optimize it
|
||||||
|
// to an integer comparison.
|
||||||
|
if (CC == ISD::SETOEQ)
|
||||||
|
CC = ISD::SETEQ;
|
||||||
|
else if (CC == ISD::SETUNE)
|
||||||
|
CC = ISD::SETNE;
|
||||||
|
|
||||||
|
SDValue ARMcc;
|
||||||
|
if (LHS.getValueType() == MVT::f32) {
|
||||||
|
LHS = bitcastf32Toi32(LHS, DAG);
|
||||||
|
RHS = bitcastf32Toi32(RHS, DAG);
|
||||||
|
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
|
||||||
|
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
||||||
|
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
|
||||||
|
Chain, Dest, ARMcc, CCR, Cmp);
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue LHS1, LHS2;
|
||||||
|
SDValue RHS1, RHS2;
|
||||||
|
expandf64Toi32(LHS, DAG, LHS1, LHS2);
|
||||||
|
expandf64Toi32(RHS, DAG, RHS1, RHS2);
|
||||||
|
ARMCC::CondCodes CondCode = IntCCToARMCC(CC);
|
||||||
|
ARMcc = DAG.getConstant(CondCode, MVT::i32);
|
||||||
|
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
|
||||||
|
SDValue Ops[] = { Chain, ARMcc, LHS1, LHS2, RHS1, RHS2, Dest };
|
||||||
|
return DAG.getNode(ARMISD::BCC_i64, dl, VTList, Ops, 7);
|
||||||
|
}
|
||||||
|
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
|
||||||
|
SDValue Chain = Op.getOperand(0);
|
||||||
|
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
|
||||||
|
SDValue LHS = Op.getOperand(2);
|
||||||
|
SDValue RHS = Op.getOperand(3);
|
||||||
|
SDValue Dest = Op.getOperand(4);
|
||||||
DebugLoc dl = Op.getDebugLoc();
|
DebugLoc dl = Op.getDebugLoc();
|
||||||
|
|
||||||
if (LHS.getValueType() == MVT::i32) {
|
if (LHS.getValueType() == MVT::i32) {
|
||||||
SDValue ARMCC;
|
SDValue ARMcc;
|
||||||
|
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl);
|
||||||
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
||||||
SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
|
|
||||||
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
|
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
|
||||||
Chain, Dest, ARMCC, CCR,Cmp);
|
Chain, Dest, ARMcc, CCR, Cmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
|
assert(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64);
|
||||||
|
|
||||||
|
if (UnsafeFPMath &&
|
||||||
|
(CC == ISD::SETEQ || CC == ISD::SETOEQ ||
|
||||||
|
CC == ISD::SETNE || CC == ISD::SETUNE)) {
|
||||||
|
SDValue Result = OptimizeVFPBrcond(Op, DAG);
|
||||||
|
if (Result.getNode())
|
||||||
|
return Result;
|
||||||
|
}
|
||||||
|
|
||||||
ARMCC::CondCodes CondCode, CondCode2;
|
ARMCC::CondCodes CondCode, CondCode2;
|
||||||
FPCCToARMCC(CC, CondCode, CondCode2);
|
FPCCToARMCC(CC, CondCode, CondCode2);
|
||||||
|
|
||||||
SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32);
|
SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32);
|
||||||
SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl);
|
SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl);
|
||||||
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
||||||
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
|
SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag);
|
||||||
SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp };
|
SDValue Ops[] = { Chain, Dest, ARMcc, CCR, Cmp };
|
||||||
SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
|
SDValue Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
|
||||||
if (CondCode2 != ARMCC::AL) {
|
if (CondCode2 != ARMCC::AL) {
|
||||||
ARMCC = DAG.getConstant(CondCode2, MVT::i32);
|
ARMcc = DAG.getConstant(CondCode2, MVT::i32);
|
||||||
SDValue Ops[] = { Res, Dest, ARMCC, CCR, Res.getValue(1) };
|
SDValue Ops[] = { Res, Dest, ARMcc, CCR, Res.getValue(1) };
|
||||||
Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
|
Res = DAG.getNode(ARMISD::BRCOND, dl, VTList, Ops, 5);
|
||||||
}
|
}
|
||||||
return Res;
|
return Res;
|
||||||
@ -2469,12 +2561,11 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const {
|
|||||||
EVT VT = Op.getValueType();
|
EVT VT = Op.getValueType();
|
||||||
EVT SrcVT = Tmp1.getValueType();
|
EVT SrcVT = Tmp1.getValueType();
|
||||||
SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
|
SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0);
|
||||||
SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32);
|
SDValue ARMcc = DAG.getConstant(ARMCC::LT, MVT::i32);
|
||||||
SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
|
SDValue FP0 = DAG.getConstantFP(0.0, SrcVT);
|
||||||
SDValue Cmp = getVFPCmp(Tmp1, FP0,
|
SDValue Cmp = getVFPCmp(Tmp1, FP0, DAG, dl);
|
||||||
ISD::SETLT, ARMCC, DAG, dl);
|
|
||||||
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
||||||
return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp);
|
return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMcc, CCR, Cmp);
|
||||||
}
|
}
|
||||||
|
|
||||||
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
|
SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{
|
||||||
@ -2611,7 +2702,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
|
|||||||
SDValue ShOpLo = Op.getOperand(0);
|
SDValue ShOpLo = Op.getOperand(0);
|
||||||
SDValue ShOpHi = Op.getOperand(1);
|
SDValue ShOpHi = Op.getOperand(1);
|
||||||
SDValue ShAmt = Op.getOperand(2);
|
SDValue ShAmt = Op.getOperand(2);
|
||||||
SDValue ARMCC;
|
SDValue ARMcc;
|
||||||
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
|
unsigned Opc = (Op.getOpcode() == ISD::SRA_PARTS) ? ISD::SRA : ISD::SRL;
|
||||||
|
|
||||||
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
|
assert(Op.getOpcode() == ISD::SRA_PARTS || Op.getOpcode() == ISD::SRL_PARTS);
|
||||||
@ -2627,9 +2718,9 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op,
|
|||||||
|
|
||||||
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
||||||
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
|
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
|
||||||
ARMCC, DAG, dl);
|
ARMcc, DAG, dl);
|
||||||
SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
|
SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
|
||||||
SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
|
SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc,
|
||||||
CCR, Cmp);
|
CCR, Cmp);
|
||||||
|
|
||||||
SDValue Ops[2] = { Lo, Hi };
|
SDValue Ops[2] = { Lo, Hi };
|
||||||
@ -2647,7 +2738,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
|
|||||||
SDValue ShOpLo = Op.getOperand(0);
|
SDValue ShOpLo = Op.getOperand(0);
|
||||||
SDValue ShOpHi = Op.getOperand(1);
|
SDValue ShOpHi = Op.getOperand(1);
|
||||||
SDValue ShAmt = Op.getOperand(2);
|
SDValue ShAmt = Op.getOperand(2);
|
||||||
SDValue ARMCC;
|
SDValue ARMcc;
|
||||||
|
|
||||||
assert(Op.getOpcode() == ISD::SHL_PARTS);
|
assert(Op.getOpcode() == ISD::SHL_PARTS);
|
||||||
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
|
SDValue RevShAmt = DAG.getNode(ISD::SUB, dl, MVT::i32,
|
||||||
@ -2661,9 +2752,9 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op,
|
|||||||
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
|
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
|
||||||
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
|
||||||
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
|
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
|
||||||
ARMCC, DAG, dl);
|
ARMcc, DAG, dl);
|
||||||
SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
|
SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
|
||||||
SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
|
SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMcc,
|
||||||
CCR, Cmp);
|
CCR, Cmp);
|
||||||
|
|
||||||
SDValue Ops[2] = { Lo, Hi };
|
SDValue Ops[2] = { Lo, Hi };
|
||||||
@ -3825,6 +3916,15 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
|
|||||||
return BB;
|
return BB;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static
|
||||||
|
MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) {
|
||||||
|
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
|
||||||
|
E = MBB->succ_end(); I != E; ++I)
|
||||||
|
if (*I != Succ)
|
||||||
|
return *I;
|
||||||
|
llvm_unreachable("Expecting a BB with two successors!");
|
||||||
|
}
|
||||||
|
|
||||||
MachineBasicBlock *
|
MachineBasicBlock *
|
||||||
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
||||||
MachineBasicBlock *BB) const {
|
MachineBasicBlock *BB) const {
|
||||||
@ -3941,6 +4041,46 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
|
|||||||
return BB;
|
return BB;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
case ARM::BCCi64:
|
||||||
|
case ARM::BCCZi64: {
|
||||||
|
// Compare both parts that make up the double comparison separately for
|
||||||
|
// equality.
|
||||||
|
bool RHSisZero = MI->getOpcode() == ARM::BCCZi64;
|
||||||
|
|
||||||
|
unsigned LHS1 = MI->getOperand(1).getReg();
|
||||||
|
unsigned LHS2 = MI->getOperand(2).getReg();
|
||||||
|
if (RHSisZero) {
|
||||||
|
AddDefaultPred(BuildMI(BB, dl,
|
||||||
|
TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
|
||||||
|
.addReg(LHS1).addImm(0));
|
||||||
|
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
|
||||||
|
.addReg(LHS2).addImm(0)
|
||||||
|
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
|
||||||
|
} else {
|
||||||
|
unsigned RHS1 = MI->getOperand(3).getReg();
|
||||||
|
unsigned RHS2 = MI->getOperand(4).getReg();
|
||||||
|
AddDefaultPred(BuildMI(BB, dl,
|
||||||
|
TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
|
||||||
|
.addReg(LHS1).addReg(RHS1));
|
||||||
|
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
|
||||||
|
.addReg(LHS2).addReg(RHS2)
|
||||||
|
.addImm(ARMCC::EQ).addReg(ARM::CPSR);
|
||||||
|
}
|
||||||
|
|
||||||
|
MachineBasicBlock *destMBB = MI->getOperand(RHSisZero ? 3 : 5).getMBB();
|
||||||
|
MachineBasicBlock *exitMBB = OtherSucc(BB, destMBB);
|
||||||
|
if (MI->getOperand(0).getImm() == ARMCC::NE)
|
||||||
|
std::swap(destMBB, exitMBB);
|
||||||
|
|
||||||
|
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
|
||||||
|
.addMBB(destMBB).addImm(ARMCC::EQ).addReg(ARM::CPSR);
|
||||||
|
BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2B : ARM::B))
|
||||||
|
.addMBB(exitMBB);
|
||||||
|
|
||||||
|
MI->eraseFromParent(); // The pseudo instruction is gone now.
|
||||||
|
return BB;
|
||||||
|
}
|
||||||
|
|
||||||
case ARM::tANDsp:
|
case ARM::tANDsp:
|
||||||
case ARM::tADDspr_:
|
case ARM::tADDspr_:
|
||||||
case ARM::tSUBspi_:
|
case ARM::tSUBspi_:
|
||||||
|
@ -53,6 +53,8 @@ namespace llvm {
|
|||||||
CMOV, // ARM conditional move instructions.
|
CMOV, // ARM conditional move instructions.
|
||||||
CNEG, // ARM conditional negate instructions.
|
CNEG, // ARM conditional negate instructions.
|
||||||
|
|
||||||
|
BCC_i64,
|
||||||
|
|
||||||
RBIT, // ARM bitreverse instruction
|
RBIT, // ARM bitreverse instruction
|
||||||
|
|
||||||
FTOSI, // FP to sint within a FP register.
|
FTOSI, // FP to sint within a FP register.
|
||||||
@ -363,9 +365,11 @@ namespace llvm {
|
|||||||
DebugLoc dl, SelectionDAG &DAG) const;
|
DebugLoc dl, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
|
||||||
SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
|
SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const;
|
||||||
SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC,
|
SDValue getVFPCmp(SDValue LHS, SDValue RHS,
|
||||||
SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const;
|
SelectionDAG &DAG, DebugLoc dl) const;
|
||||||
|
|
||||||
|
SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const;
|
||||||
|
|
||||||
MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
|
MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
|
||||||
MachineBasicBlock *BB,
|
MachineBasicBlock *BB,
|
||||||
|
@ -38,6 +38,12 @@ def SDT_ARMBr2JT : SDTypeProfile<0, 4,
|
|||||||
[SDTCisPtrTy<0>, SDTCisVT<1, i32>,
|
[SDTCisPtrTy<0>, SDTCisVT<1, i32>,
|
||||||
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
|
SDTCisVT<2, i32>, SDTCisVT<3, i32>]>;
|
||||||
|
|
||||||
|
def SDT_ARMBCC_i64 : SDTypeProfile<0, 6,
|
||||||
|
[SDTCisVT<0, i32>,
|
||||||
|
SDTCisVT<1, i32>, SDTCisVT<2, i32>,
|
||||||
|
SDTCisVT<3, i32>, SDTCisVT<4, i32>,
|
||||||
|
SDTCisVT<5, OtherVT>]>;
|
||||||
|
|
||||||
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
|
def SDT_ARMCmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>]>;
|
||||||
|
|
||||||
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
|
def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
|
||||||
@ -90,6 +96,9 @@ def ARMbrjt : SDNode<"ARMISD::BR_JT", SDT_ARMBrJT,
|
|||||||
def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
|
def ARMbr2jt : SDNode<"ARMISD::BR2_JT", SDT_ARMBr2JT,
|
||||||
[SDNPHasChain]>;
|
[SDNPHasChain]>;
|
||||||
|
|
||||||
|
def ARMBcci64 : SDNode<"ARMISD::BCC_i64", SDT_ARMBCC_i64,
|
||||||
|
[SDNPHasChain]>;
|
||||||
|
|
||||||
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
|
def ARMcmp : SDNode<"ARMISD::CMP", SDT_ARMCmp,
|
||||||
[SDNPOutFlag]>;
|
[SDNPOutFlag]>;
|
||||||
|
|
||||||
@ -2279,6 +2288,22 @@ defm CMNz : AI1_cmp_irs<0b1011, "cmn",
|
|||||||
def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
|
def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
|
||||||
(CMNzri GPR:$src, so_imm_neg:$imm)>;
|
(CMNzri GPR:$src, so_imm_neg:$imm)>;
|
||||||
|
|
||||||
|
// Pseudo i64 compares for some floating point compares.
|
||||||
|
let usesCustomInserter = 1, isBranch = 1, isTerminator = 1,
|
||||||
|
Defs = [CPSR] in {
|
||||||
|
def BCCi64 : PseudoInst<(outs),
|
||||||
|
(ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst),
|
||||||
|
IIC_Br,
|
||||||
|
"${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, imm:$cc",
|
||||||
|
[(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>;
|
||||||
|
|
||||||
|
def BCCZi64 : PseudoInst<(outs),
|
||||||
|
(ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst),
|
||||||
|
IIC_Br,
|
||||||
|
"${:comment} B\t$dst GPR:$lhs1, GPR:$lhs2, 0, 0, imm:$cc",
|
||||||
|
[(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>;
|
||||||
|
} // usesCustomInserter
|
||||||
|
|
||||||
|
|
||||||
// Conditional moves
|
// Conditional moves
|
||||||
// FIXME: should be able to write a pattern for ARMcmov, but can't use
|
// FIXME: should be able to write a pattern for ARMcmov, but can't use
|
||||||
|
@ -1,16 +1,24 @@
|
|||||||
; RUN: llc < %s -march=arm -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck %s
|
; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math -enable-finite-only-fp-math | FileCheck -check-prefix=FINITE %s
|
||||||
|
; RUN: llc < %s -march=arm -mcpu=cortex-a8 -mattr=+vfp2 -enable-unsafe-fp-math | FileCheck -check-prefix=NAN %s
|
||||||
; rdar://7461510
|
; rdar://7461510
|
||||||
|
|
||||||
define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
|
define arm_apcscc i32 @t1(float* %a, float* %b) nounwind {
|
||||||
entry:
|
entry:
|
||||||
; CHECK: t1:
|
; FINITE: t1:
|
||||||
; CHECK-NOT: vldr
|
; FINITE-NOT: vldr
|
||||||
; CHECK: ldr
|
; FINITE: ldr
|
||||||
; CHECK: ldr
|
; FINITE: ldr
|
||||||
; CHECK: cmp r0, r1
|
; FINITE: cmp r0, r1
|
||||||
; CHECK-NOT: vcmpe.f32
|
; FINITE-NOT: vcmpe.f32
|
||||||
; CHECK-NOT: vmrs
|
; FINITE-NOT: vmrs
|
||||||
; CHECK: beq
|
; FINITE: beq
|
||||||
|
|
||||||
|
; NAN: t1:
|
||||||
|
; NAN: vldr.32 s0,
|
||||||
|
; NAN: vldr.32 s1,
|
||||||
|
; NAN: vcmpe.f32 s1, s0
|
||||||
|
; NAN: vmrs apsr_nzcv, fpscr
|
||||||
|
; NAN: beq
|
||||||
%0 = load float* %a
|
%0 = load float* %a
|
||||||
%1 = load float* %b
|
%1 = load float* %b
|
||||||
%2 = fcmp une float %0, %1
|
%2 = fcmp une float %0, %1
|
||||||
@ -25,5 +33,50 @@ bb2:
|
|||||||
ret i32 %4
|
ret i32 %4
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define arm_apcscc i32 @t2(double* %a, double* %b) nounwind {
|
||||||
|
entry:
|
||||||
|
; FINITE: t2:
|
||||||
|
; FINITE-NOT: vldr
|
||||||
|
; FINITE: ldrd r0, [r0]
|
||||||
|
; FINITE: cmp r0, #0
|
||||||
|
; FINITE: cmpeq r1, #0
|
||||||
|
; FINITE-NOT: vcmpe.f32
|
||||||
|
; FINITE-NOT: vmrs
|
||||||
|
; FINITE: bne
|
||||||
|
%0 = load double* %a
|
||||||
|
%1 = fcmp oeq double %0, 0.000000e+00
|
||||||
|
br i1 %1, label %bb1, label %bb2
|
||||||
|
|
||||||
|
bb1:
|
||||||
|
%2 = call i32 @bar()
|
||||||
|
ret i32 %2
|
||||||
|
|
||||||
|
bb2:
|
||||||
|
%3 = call i32 @foo()
|
||||||
|
ret i32 %3
|
||||||
|
}
|
||||||
|
|
||||||
|
define arm_apcscc i32 @t3(float* %a, float* %b) nounwind {
|
||||||
|
entry:
|
||||||
|
; FINITE: t3:
|
||||||
|
; FINITE-NOT: vldr
|
||||||
|
; FINITE: ldr r0, [r0]
|
||||||
|
; FINITE: cmp r0, #0
|
||||||
|
; FINITE-NOT: vcmpe.f32
|
||||||
|
; FINITE-NOT: vmrs
|
||||||
|
; FINITE: bne
|
||||||
|
%0 = load float* %a
|
||||||
|
%1 = fcmp oeq float %0, 0.000000e+00
|
||||||
|
br i1 %1, label %bb1, label %bb2
|
||||||
|
|
||||||
|
bb1:
|
||||||
|
%2 = call i32 @bar()
|
||||||
|
ret i32 %2
|
||||||
|
|
||||||
|
bb2:
|
||||||
|
%3 = call i32 @foo()
|
||||||
|
ret i32 %3
|
||||||
|
}
|
||||||
|
|
||||||
declare i32 @bar()
|
declare i32 @bar()
|
||||||
declare i32 @foo()
|
declare i32 @foo()
|
||||||
|
Loading…
Reference in New Issue
Block a user