From 515fe3a58877c745a922252a4492e866a2f1e42e Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Thu, 8 Jul 2010 02:08:50 +0000 Subject: [PATCH] Optimize some vfp comparisons to integer ones. This patch implements the simplest case when the following conditions are met: 1. The arguments are f32. 2. The arguments are loads and they have no uses other than the comparison. 3. The comparison code is EQ or NE. e.g. vldr.32 s0, [r1] vldr.32 s1, [r0] vcmpe.f32 s1, s0 vmrs apsr_nzcv, fpscr beq LBB0_2 => ldr r1, [r1] ldr r0, [r0] cmp r0, r1 beq LBB0_2 More complicated cases will be implemented in subsequent patches. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@107852 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 55 ++++++++++++++++++++++++------ lib/Target/ARM/ARMISelLowering.h | 3 ++ test/CodeGen/ARM/fpcmp-opt.ll | 29 ++++++++++++++++ 3 files changed, 77 insertions(+), 10 deletions(-) create mode 100644 test/CodeGen/ARM/fpcmp-opt.ll diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 8670d37c836..e3db5832980 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -59,7 +59,7 @@ EnableARMTailCalls("arm-tail-calls", cl::Hidden, static cl::opt EnableARMLongCalls("arm-long-calls", cl::Hidden, - cl::desc("Generate calls via indirect call instructions."), + cl::desc("Generate calls via indirect call instructions"), cl::init(false)); static cl::opt @@ -69,7 +69,7 @@ ARMInterworking("arm-interworking", cl::Hidden, static cl::opt EnableARMCodePlacement("arm-code-placement", cl::Hidden, - cl::desc("Enable code placement pass for ARM."), + cl::desc("Enable code placement pass for ARM"), cl::init(false)); static bool CC_ARM_APCS_Custom_f64(unsigned &ValNo, EVT &ValVT, EVT &LocVT, @@ -2273,9 +2273,42 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, return DAG.getNode(CompareType, dl, MVT::Flag, LHS, RHS); } +static bool canBitcastToInt(SDNode *Op) { + return Op->hasOneUse() && + ISD::isNormalLoad(Op) && + Op->getValueType(0) == MVT::f32; +} + +static SDValue bitcastToInt(SDValue Op, SelectionDAG &DAG) { + if (LoadSDNode *Ld = dyn_cast(Op)) + return DAG.getLoad(MVT::i32, Op.getDebugLoc(), + Ld->getChain(), Ld->getBasePtr(), + Ld->getSrcValue(), Ld->getSrcValueOffset(), + Ld->isVolatile(), Ld->isNonTemporal(), + Ld->getAlignment()); + + llvm_unreachable("Unknown VFP cmp argument!"); +} + /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. -static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - DebugLoc dl) { +SDValue +ARMTargetLowering::getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, + DebugLoc dl) const { + if ((CC == ISD::SETEQ || CC == ISD::SETOEQ || + CC == ISD::SETNE || CC == ISD::SETUNE) && + canBitcastToInt(LHS.getNode()) && canBitcastToInt(RHS.getNode())) { + // If there are no othter uses of the CMP operands, and the condition + // code is EQ oe NE, we can optimize it to an integer comparison. + if (CC == ISD::SETOEQ) + CC = ISD::SETEQ; + else if (CC == ISD::SETUNE) + CC = ISD::SETNE; + LHS = bitcastToInt(LHS, DAG); + RHS = bitcastToInt(RHS, DAG); + return getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl); + } + SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Flag, LHS, RHS); @@ -2305,13 +2338,13 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl); SDValue Result = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, - ARMCC, CCR, Cmp); + ARMCC, CCR, Cmp); if (CondCode2 != ARMCC::AL) { SDValue ARMCC2 = DAG.getConstant(CondCode2, MVT::i32); // FIXME: Needs another CMP because flag can have but one use. - SDValue Cmp2 = getVFPCmp(LHS, RHS, DAG, dl); + SDValue Cmp2 = getVFPCmp(LHS, RHS, CC, ARMCC2, DAG, dl); Result = DAG.getNode(ARMISD::CMOV, dl, VT, Result, TrueVal, ARMCC2, CCR, Cmp2); } @@ -2338,8 +2371,8 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); - SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue ARMCC = DAG.getConstant(CondCode, MVT::i32); + SDValue Cmp = getVFPCmp(LHS, RHS, CC, ARMCC, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDVTList VTList = DAG.getVTList(MVT::Other, MVT::Flag); SDValue Ops[] = { Chain, Dest, ARMCC, CCR, Cmp }; @@ -2427,7 +2460,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { return DAG.getNode(Opc, dl, VT, Op); } -static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { +SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Implement fcopysign with a fabs and a conditional fneg. SDValue Tmp0 = Op.getOperand(0); SDValue Tmp1 = Op.getOperand(1); @@ -2435,8 +2468,10 @@ static SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); SDValue AbsVal = DAG.getNode(ISD::FABS, dl, VT, Tmp0); - SDValue Cmp = getVFPCmp(Tmp1, DAG.getConstantFP(0.0, SrcVT), DAG, dl); SDValue ARMCC = DAG.getConstant(ARMCC::LT, MVT::i32); + SDValue FP0 = DAG.getConstantFP(0.0, SrcVT); + SDValue Cmp = getVFPCmp(Tmp1, FP0, + ISD::SETLT, ARMCC, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); return DAG.getNode(ARMISD::CNEG, dl, VT, AbsVal, AbsVal, ARMCC, CCR, Cmp); } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index c85832499c0..3a3866928a0 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -313,6 +313,7 @@ namespace llvm { SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const; @@ -363,6 +364,8 @@ namespace llvm { SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; + SDValue getVFPCmp(SDValue &LHS, SDValue &RHS, ISD::CondCode CC, + SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) const; MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI, MachineBasicBlock *BB, diff --git a/test/CodeGen/ARM/fpcmp-opt.ll b/test/CodeGen/ARM/fpcmp-opt.ll new file mode 100644 index 00000000000..ed515cc1e7c --- /dev/null +++ b/test/CodeGen/ARM/fpcmp-opt.ll @@ -0,0 +1,29 @@ +; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s +; rdar://7461510 + +define arm_apcscc i32 @t1(float* %a, float* %b) nounwind { +entry: +; CHECK: t1: +; CHECK-NOT: vldr +; CHECK: ldr +; CHECK: ldr +; CHECK: cmp r0, r1 +; CHECK-NOT: vcmpe.f32 +; CHECK-NOT: vmrs +; CHECK: beq + %0 = load float* %a + %1 = load float* %b + %2 = fcmp une float %0, %1 + br i1 %2, label %bb1, label %bb2 + +bb1: + %3 = call i32 @bar() + ret i32 %3 + +bb2: + %4 = call i32 @foo() + ret i32 %4 +} + +declare i32 @bar() +declare i32 @foo()