From de2b151dbf125af49717807b9cfc1f6f7a5b9ea6 Mon Sep 17 00:00:00 2001
From: Bill Wendling <isanbard@gmail.com>
Date: Wed, 11 Aug 2010 08:43:16 +0000
Subject: [PATCH] Consider this code snippet:

float t1(int argc) {
  return (argc == 1123) ? 1.234f : 2.38213f;
}

We would generate truly awful code on ARM (those with a weak stomach should look
away):

_t1:
  movw   r1, #1123
  movs   r2, #1
  movs   r3, #0
  cmp    r0, r1
  mov.w  r0, #0
  it     eq
  moveq  r0, r2
  movs   r1, #4
  cmp    r0, #0
  it     ne
  movne  r3, r1
  adr    r0, #LCPI1_0
  ldr    r0, [r0, r3]
  bx     lr

The problem was that legalization was creating a cascade of SELECT_CC nodes, for
for the comparison of "argc == 1123" which was fed into a SELECT node for the ?:
statement which was itself converted to a SELECT_CC node. This is because the
ARM back-end doesn't have custom lowering for SELECT nodes, so it used the
default "Expand".

I added a fairly simple "LowerSELECT" to the ARM back-end. It takes care of this
testcase, but can obviously be expanded to include more cases.

Now we generate this, which looks optimal to me:

_t1:
  movw   r1, #1123
  movs   r2, #0
  cmp    r0, r1
  adr    r0, #LCPI0_0
  it     eq
  moveq  r2, #4
  ldr    r0, [r0, r2]
  bx     lr
  .align  2
LCPI0_0:
  .long   1075344593  @ float 2.382130e+00
  .long   1067316150  @ float 1.234000e+00



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110799 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/ARM/ARMISelLowering.cpp | 53 ++++++++++++++++++++++++++++--
 lib/Target/ARM/ARMISelLowering.h   |  1 +
 test/CodeGen/ARM/select.ll         | 25 ++++++++++++++
 3 files changed, 76 insertions(+), 3 deletions(-)
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index 56adcf2b37f..e4b556228fc 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -483,9 +483,9 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
   setOperationAction(ISD::SETCC,     MVT::i32, Expand);
   setOperationAction(ISD::SETCC,     MVT::f32, Expand);
   setOperationAction(ISD::SETCC,     MVT::f64, Expand);
-  setOperationAction(ISD::SELECT,    MVT::i32, Expand);
-  setOperationAction(ISD::SELECT,    MVT::f32, Expand);
-  setOperationAction(ISD::SELECT,    MVT::f64, Expand);
+  setOperationAction(ISD::SELECT,    MVT::i32, Custom);
+  setOperationAction(ISD::SELECT,    MVT::f32, Custom);
+  setOperationAction(ISD::SELECT,    MVT::f64, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
   setOperationAction(ISD::SELECT_CC, MVT::f64, Custom);
@@ -2314,6 +2314,52 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
   return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
 }
 
+SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const {
+  SDValue Cond = Op.getOperand(0);
+  SDValue SelectTrue = Op.getOperand(1);
+  SDValue SelectFalse = Op.getOperand(2);
+  DebugLoc dl = Op.getDebugLoc();
+
+  // Convert:
+  //
+  //   (select (cmov 1, 0, cond), t, f) -> (cmov t, f, cond)
+  //   (select (cmov 0, 1, cond), t, f) -> (cmov f, t, cond)
+  //
+  if (Cond.getOpcode() == ARMISD::CMOV && Cond.hasOneUse()) {
+    const ConstantSDNode *CMOVTrue =
+      dyn_cast<ConstantSDNode>(Cond.getOperand(0));
+    const ConstantSDNode *CMOVFalse =
+      dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+
+    if (CMOVTrue && CMOVFalse) {
+      unsigned CMOVTrueVal = CMOVTrue->getZExtValue();
+      unsigned CMOVFalseVal = CMOVFalse->getZExtValue();
+
+      SDValue True;
+      SDValue False;
+      if (CMOVTrueVal == 1 && CMOVFalseVal == 0) {
+        True = SelectTrue;
+        False = SelectFalse;
+      } else if (CMOVTrueVal == 0 && CMOVFalseVal == 1) {
+        True = SelectFalse;
+        False = SelectTrue;
+      }
+
+      if (True.getNode() && False.getNode()) {
+        EVT VT = Cond.getValueType();
+        SDValue ARMcc = Cond.getOperand(2);
+        SDValue CCR = Cond.getOperand(3);
+        SDValue Cmp = Cond.getOperand(4);
+        return DAG.getNode(ARMISD::CMOV, dl, VT, True, False, ARMcc, CCR, Cmp);
+      }
+    }
+  }
+
+  return DAG.getSelectCC(dl, Cond,
+                         DAG.getConstant(0, Cond.getValueType()),
+                         SelectTrue, SelectFalse, ISD::SETNE);
+}
+
 SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
   EVT VT = Op.getValueType();
   SDValue LHS = Op.getOperand(0);
@@ -3687,6 +3733,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
     return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
       LowerGlobalAddressELF(Op, DAG);
   case ISD::GlobalTLSAddress:   return LowerGlobalTLSAddress(Op, DAG);
+  case ISD::SELECT:        return LowerSELECT(Op, DAG);
   case ISD::SELECT_CC:     return LowerSELECT_CC(Op, DAG);
   case ISD::BR_CC:         return LowerBR_CC(Op, DAG);
   case ISD::BR_JT:         return LowerBR_JT(Op, DAG);
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index df153030c5f..51dee2de52a 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -335,6 +335,7 @@ namespace llvm {
                                    SelectionDAG &DAG) const;
     SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const;
+    SDValue LowerSELECT(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG) const;
     SDValue LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const;
diff --git a/test/CodeGen/ARM/select.ll b/test/CodeGen/ARM/select.ll
index 29c55c6bd97..7413bed5c5b 100644
--- a/test/CodeGen/ARM/select.ll
+++ b/test/CodeGen/ARM/select.ll
@@ -1,5 +1,6 @@
 ; RUN: llc < %s -march=arm | FileCheck %s
 ; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
+; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON
 
 define i32 @f1(i32 %a.s) {
 ;CHECK: f1:
@@ -65,3 +66,27 @@ define double @f7(double %a, double %b) {
     %tmp1 = select i1 %tmp, double -1.000e+00, double %b
     ret double %tmp1
 }
+
+; <rdar://problem/7260094>
+;
+; We used to generate really horrible code for this function. The main cause was
+; a lack of a custom lowering routine for an ISD::SELECT. This would result in
+; two "it" blocks in the code: one for the "icmp" and another to move the index
+; into the constant pool based on the value of the "icmp". If we have one "it"
+; block generated, odds are good that we have close to the ideal code for this:
+;
+; CHECK-NEON:      _f8:
+; CHECK-NEON:      movw   [[REGISTER_1:r[0-9]+]], #1123
+; CHECK-NEON-NEXT: movs   [[REGISTER_2:r[0-9]+]], #0
+; CHECK-NEON-NEXT: cmp    r0, [[REGISTER_1]]
+; CHECK-NEON-NEXT: adr    [[REGISTER_3:r[0-9]+]], #LCPI
+; CHECK-NEON-NEXT: it     eq
+; CHECK-NEON-NEXT: moveq  [[REGISTER_2]], #4
+; CHECK-NEON-NEXT: ldr
+; CHECK-NEON:      bx
+
+define arm_apcscc float @f8(i32 %a) nounwind {
+  %tmp = icmp eq i32 %a, 1123
+  %tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000
+  ret float %tmp1
+}