Bill Wendling de2b151dbf Consider this code snippet:
float t1(int argc) {
  return (argc == 1123) ? 1.234f : 2.38213f;
}

We would generate truly awful code on ARM (those with a weak stomach should look
away):

_t1:
  movw   r1, #1123
  movs   r2, #1
  movs   r3, #0
  cmp    r0, r1
  mov.w  r0, #0
  it     eq
  moveq  r0, r2
  movs   r1, #4
  cmp    r0, #0
  it     ne
  movne  r3, r1
  adr    r0, #LCPI1_0
  ldr    r0, [r0, r3]
  bx     lr

The problem was that legalization was creating a cascade of SELECT_CC nodes, for
for the comparison of "argc == 1123" which was fed into a SELECT node for the ?:
statement which was itself converted to a SELECT_CC node. This is because the
ARM back-end doesn't have custom lowering for SELECT nodes, so it used the
default "Expand".

I added a fairly simple "LowerSELECT" to the ARM back-end. It takes care of this
testcase, but can obviously be expanded to include more cases.

Now we generate this, which looks optimal to me:

_t1:
  movw   r1, #1123
  movs   r2, #0
  cmp    r0, r1
  adr    r0, #LCPI0_0
  it     eq
  moveq  r2, #4
  ldr    r0, [r0, r2]
  bx     lr
  .align  2
LCPI0_0:
  .long   1075344593  @ float 2.382130e+00
  .long   1067316150  @ float 1.234000e+00



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@110799 91177308-0d34-0410-b5e6-96231b3b80d8
2010-08-11 08:43:16 +00:00

93 lines
2.4 KiB
LLVM

; RUN: llc < %s -march=arm | FileCheck %s
; RUN: llc < %s -march=arm -mattr=+vfp2 | FileCheck %s --check-prefix=CHECK-VFP
; RUN: llc < %s -mattr=+neon,+thumb2 -mtriple=thumbv7-apple-darwin | FileCheck %s --check-prefix=CHECK-NEON
define i32 @f1(i32 %a.s) {
;CHECK: f1:
;CHECK: moveq
entry:
%tmp = icmp eq i32 %a.s, 4
%tmp1.s = select i1 %tmp, i32 2, i32 3
ret i32 %tmp1.s
}
define i32 @f2(i32 %a.s) {
;CHECK: f2:
;CHECK: movgt
entry:
%tmp = icmp sgt i32 %a.s, 4
%tmp1.s = select i1 %tmp, i32 2, i32 3
ret i32 %tmp1.s
}
define i32 @f3(i32 %a.s, i32 %b.s) {
;CHECK: f3:
;CHECK: movlt
entry:
%tmp = icmp slt i32 %a.s, %b.s
%tmp1.s = select i1 %tmp, i32 2, i32 3
ret i32 %tmp1.s
}
define i32 @f4(i32 %a.s, i32 %b.s) {
;CHECK: f4:
;CHECK: movle
entry:
%tmp = icmp sle i32 %a.s, %b.s
%tmp1.s = select i1 %tmp, i32 2, i32 3
ret i32 %tmp1.s
}
define i32 @f5(i32 %a.u, i32 %b.u) {
;CHECK: f5:
;CHECK: movls
entry:
%tmp = icmp ule i32 %a.u, %b.u
%tmp1.s = select i1 %tmp, i32 2, i32 3
ret i32 %tmp1.s
}
define i32 @f6(i32 %a.u, i32 %b.u) {
;CHECK: f6:
;CHECK: movhi
entry:
%tmp = icmp ugt i32 %a.u, %b.u
%tmp1.s = select i1 %tmp, i32 2, i32 3
ret i32 %tmp1.s
}
define double @f7(double %a, double %b) {
;CHECK: f7:
;CHECK: movlt
;CHECK: movlt
;CHECK-VFP: f7:
;CHECK-VFP: vmovmi
%tmp = fcmp olt double %a, 1.234e+00
%tmp1 = select i1 %tmp, double -1.000e+00, double %b
ret double %tmp1
}
; <rdar://problem/7260094>
;
; We used to generate really horrible code for this function. The main cause was
; a lack of a custom lowering routine for an ISD::SELECT. This would result in
; two "it" blocks in the code: one for the "icmp" and another to move the index
; into the constant pool based on the value of the "icmp". If we have one "it"
; block generated, odds are good that we have close to the ideal code for this:
;
; CHECK-NEON: _f8:
; CHECK-NEON: movw [[REGISTER_1:r[0-9]+]], #1123
; CHECK-NEON-NEXT: movs [[REGISTER_2:r[0-9]+]], #0
; CHECK-NEON-NEXT: cmp r0, [[REGISTER_1]]
; CHECK-NEON-NEXT: adr [[REGISTER_3:r[0-9]+]], #LCPI
; CHECK-NEON-NEXT: it eq
; CHECK-NEON-NEXT: moveq [[REGISTER_2]], #4
; CHECK-NEON-NEXT: ldr
; CHECK-NEON: bx
define arm_apcscc float @f8(i32 %a) nounwind {
%tmp = icmp eq i32 %a, 1123
%tmp1 = select i1 %tmp, float 0x3FF3BE76C0000000, float 0x40030E9A20000000
ret float %tmp1
}