From de2b151dbf125af49717807b9cfc1f6f7a5b9ea6 Mon Sep 17 00:00:00 2001 From: Bill Wendling Date: Wed, 11 Aug 2010 08:43:16 +0000 Subject: [PATCH] Consider this code snippet: float t1(int argc) { return (argc == 1123) ? 1.234f : 2.38213f; } We would generate truly awful code on ARM (those with a weak stomach should look away): _t1: movw r1, #1123 movs r2, #1 movs r3, #0 cmp r0, r1 mov.w r0, #0 it eq moveq r0, r2 movs r1, #4 cmp r0, #0 it ne movne r3, r1 adr r0, #LCPI1_0 ldr r0, [r0, r3] bx lr The problem was that legalization was creating a cascade of SELECT_CC nodes, for for the comparison of "argc == 1123" which was fed into a SELECT node for the ?: statement which was itself converted to a SELECT_CC node. This is because the ARM back-end doesn't have custom lowering for SELECT nodes, so it used the default "Expand". I added a fairly simple "LowerSELECT" to the ARM back-end. It takes care of this testcase, but can obviously be expanded to include more cases. Now we generate this, which looks optimal to me: _t1: movw r1, #1123 movs r2, #0 cmp r0, r1 adr r0, #LCPI0_0 it eq moveq r2, #4 ldr r0, [r0, r2] bx lr .align 2 LCPI0_0: .long 1075344593 @ float 2.382130e+00 .long 1067316150 @ float 1.234000e+00 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110799 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMISelLowering.cpp | 53 ++++++++++++++++++++++++++++-- lib/Target/ARM/ARMISelLowering.h | 1 + test/CodeGen/ARM/select.ll | 25 ++++++++++++++ 3 files changed, 76 insertions(+), 3 deletions(-) diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 56adcf2b37f..e4b556228fc 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -483,9 +483,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SETCC, MVT::i32, Expand); setOperationAction(ISD::SETCC, MVT::f32, Expand); setOperationAction(ISD::SETCC, MVT::f64, Expand); - setOperationAction(ISD::SELECT, MVT::i32, Expand); - setOperationAction(ISD::SELECT, MVT::f32, Expand); - setOperationAction(ISD::SELECT, MVT::f64, Expand); + setOperationAction(ISD::SELECT, MVT::i32, Custom); + setOperationAction(ISD::SELECT, MVT::f32, Custom); + setOperationAction(ISD::SELECT, MVT::f64, Custom); setOperationAction(ISD::SELECT_CC, MVT::i32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f32, Custom); setOperationAction(ISD::SELECT_CC, MVT::f64, Custom); @@ -2314,6 +2314,52 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp); } +SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { + SDValue Cond = Op.getOperand(0); + SDValue SelectTrue = Op.getOperand(1); + SDValue SelectFalse = Op.getOperand(2); + DebugLoc dl = Op.getDebugLoc(); + + // Convert: + // + // (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond) + // (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond) + // + if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) { + const ConstantSDNode *CMOVTrue = + dyn_cast(Cond.getOperand(0)); + const ConstantSDNode *CMOVFalse = + dyn_cast(Cond.getOperand(1)); + + if (CMOVTrue && CMOVFalse) { + unsigned CMOVTrueVal = CMOVTrue->getZExtValue(); + unsigned CMOVFalseVal = CMOVFalse->getZExtValue(); + + SDValue True; + SDValue False; + if (CMOVTrueVal == 1 && CMOVFalseVal == 0) { + True = SelectTrue; + False = SelectFalse; + } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) { + True = SelectFalse; + False = SelectTrue; + } + + if (True.getNode() && False.getNode()) { + EVT VT = Cond.getValueType(); + SDValue ARMcc = Cond.getOperand(2); + SDValue CCR = Cond.getOperand(3); + SDValue Cmp = Cond.getOperand(4); + return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp); + } + } + } + + return DAG.getSelectCC(dl, Cond, + DAG.getConstant(0, Cond.getValueType()), + SelectTrue, SelectFalse, ISD::SETNE); +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); @@ -3687,6 +3733,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) : LowerGlobalAddressELF(Op, DAG); case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG); + case ISD::SELECT: return LowerSELECT(Op, DAG); case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG); case ISD::BR_CC: return LowerBR_CC(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index df153030c5f..51dee2de52a 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -335,6 +335,7 @@ namespace llvm { SelectionDAG &DAG) const; SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const; diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll index 29c55c6bd97..7413bed5c5b 100644 --- a/test/CodeGen/ARM/select.ll +++ b/test/CodeGen/ARM/select.ll @@ -1,5 +1,6 @@ ; RUN: llc < %s -march=arm | FileCheck %s ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP +; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON define i32 @f1(i32 %a.s) { ;CHECK: f1: @@ -65,3 +66,27 @@ define double @f7(double %a, double %b) { %tmp1 = select i1 %tmp, double -1.000e+00, double %b ret double %tmp1 } + +; +; +; We used to generate really horrible code for this function. The main cause was +; a lack of a custom lowering routine for an ISD::SELECT. This would result in +; two "it" blocks in the code: one for the "icmp" and another to move the index +; into the constant pool based on the value of the "icmp". If we have one "it" +; block generated, odds are good that we have close to the ideal code for this: +; +; CHECK-NEON: _f8: +; CHECK-NEON: movw [[REGISTER_1:r[0-9]+]], #1123 +; CHECK-NEON-NEXT: movs [[REGISTER_2:r[0-9]+]], #0 +; CHECK-NEON-NEXT: cmp r0, [[REGISTER_1]] +; CHECK-NEON-NEXT: adr [[REGISTER_3:r[0-9]+]], #LCPI +; CHECK-NEON-NEXT: it eq +; CHECK-NEON-NEXT: moveq [[REGISTER_2]], #4 +; CHECK-NEON-NEXT: ldr +; CHECK-NEON: bx + +define arm_apcscc float @f8(i32 %a) nounwind { + %tmp = icmp eq i32 %a, 1123 + %tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000 + ret float %tmp1 +}