llvm-6502/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll

; RUN: llc < %s -O3 -relocation-model=pic -mattr=+thumb2 -mcpu=cortex-a8 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
target triple = "thumbv7-apple-darwin10"

; This is a case where the coalescer was too eager. These two copies were
; considered equivalent and coalescable:
;
; 140 %reg1038:dsub_0<def> = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0
; 148 %reg1038:dsub_1<def> = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0
;
; Only one can be coalesced.

@.str = private constant [7 x i8] c"%g %g\0A\00", align 4 ; <[7 x i8]*> [#uses=1]

define i32 @main(i32 %argc, i8** nocapture %Argv) nounwind {
entry:
  %0 = icmp eq i32 %argc, 2123                    ; <i1> [#uses=1]
  %U.0 = select i1 %0, double 3.282190e+01, double 8.731834e+02 ; <double> [#uses=2]
  %1 = icmp eq i32 %argc, 5123                    ; <i1> [#uses=1]
  %V.0.ph = select i1 %1, double 7.779980e+01, double 0x409CCB9C779A6B51 ; <double> [#uses=1]
  %2 = insertelement <2 x double> undef, double %U.0, i32 0 ; <<2 x double>> [#uses=2]
  %3 = insertelement <2 x double> %2, double %U.0, i32 1 ; <<2 x double>> [#uses=2]
  %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
; Constant pool load followed by add.
; Then clobber the loaded register, not the sum.
; CHECK: vldr.64 [[LDR:d.*]],
; CHECK: LPC0_0:
; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]
; CHECK: vmov.f64 [[LDR]]
  %5 = fadd <2 x double> %3, %3                   ; <<2 x double>> [#uses=2]
  %6 = fadd <2 x double> %4, %4                   ; <<2 x double>> [#uses=2]
  %tmp7 = extractelement <2 x double> %5, i32 0   ; <double> [#uses=1]
  %tmp5 = extractelement <2 x double> %5, i32 1   ; <double> [#uses=1]
; CHECK: printf
  %7 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp7, double %tmp5) nounwind ; <i32> [#uses=0]
  %tmp3 = extractelement <2 x double> %6, i32 0   ; <double> [#uses=1]
  %tmp1 = extractelement <2 x double> %6, i32 1   ; <double> [#uses=1]
  %8 = tail call  i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp3, double %tmp1) nounwind ; <i32> [#uses=0]
  ret i32 0
}

declare i32 @printf(i8* nocapture, ...) nounwind
Add CoalescerPair helper class. Given a copy instruction, CoalescerPair can determine which registers to coalesce in order to eliminate the copy. It deals with all the subreg fun to determine a tuple (DstReg, SrcReg, SubIdx) such that: - SrcReg is a virtual register that will disappear after coalescing. - DstReg is a virtual or physical register whose live range will be extended. - SubIdx is 0 when DstReg is a physical register. - SrcReg can be joined with DstReg:SubIdx. CoalescerPair::isCoalescable() determines if another copy instruction is compatible with the same tuple. This fixes some NEON miscompilations where shuffles are getting coalesced as if they were copies. The CoalescerPair class will replace a lot of the spaghetti logic in JoinCopy later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105997 91177308-0d34-0410-b5e6-96231b3b80d8 2010-06-15 16:04:21 +00:00			`; RUN: llc < %s -O3 -relocation-model=pic -mattr=+thumb2 -mcpu=cortex-a8 \| FileCheck %s`
			`target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"`
			`target triple = "thumbv7-apple-darwin10"`

			`; This is a case where the coalescer was too eager. These two copies were`
			`; considered equivalent and coalescable:`
			`;`
			`; 140 %reg1038:dsub_0<def> = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0`
			`; 148 %reg1038:dsub_1<def> = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0`
			`;`
			`; Only one can be coalesced.`

			`@.str = private constant [7 x i8] c"%g %g\0A\00", align 4 ; <[7 x i8]*> [#uses=1]`

Remove arm_apcscc from the test files. It is the default and doing this matches what llvm-gcc and clang now produce. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106221 91177308-0d34-0410-b5e6-96231b3b80d8 2010-06-17 15:18:27 +00:00			`define i32 @main(i32 %argc, i8** nocapture %Argv) nounwind {`
Add CoalescerPair helper class. Given a copy instruction, CoalescerPair can determine which registers to coalesce in order to eliminate the copy. It deals with all the subreg fun to determine a tuple (DstReg, SrcReg, SubIdx) such that: - SrcReg is a virtual register that will disappear after coalescing. - DstReg is a virtual or physical register whose live range will be extended. - SubIdx is 0 when DstReg is a physical register. - SrcReg can be joined with DstReg:SubIdx. CoalescerPair::isCoalescable() determines if another copy instruction is compatible with the same tuple. This fixes some NEON miscompilations where shuffles are getting coalesced as if they were copies. The CoalescerPair class will replace a lot of the spaghetti logic in JoinCopy later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105997 91177308-0d34-0410-b5e6-96231b3b80d8 2010-06-15 16:04:21 +00:00			`entry:`
			`%0 = icmp eq i32 %argc, 2123 ; <i1> [#uses=1]`
			`%U.0 = select i1 %0, double 3.282190e+01, double 8.731834e+02 ; <double> [#uses=2]`
			`%1 = icmp eq i32 %argc, 5123 ; <i1> [#uses=1]`
			`%V.0.ph = select i1 %1, double 7.779980e+01, double 0x409CCB9C779A6B51 ; <double> [#uses=1]`
			`%2 = insertelement <2 x double> undef, double %U.0, i32 0 ; <<2 x double>> [#uses=2]`
			`%3 = insertelement <2 x double> %2, double %U.0, i32 1 ; <<2 x double>> [#uses=2]`
			`%4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]`
			`; Constant pool load followed by add.`
			`; Then clobber the loaded register, not the sum.`
Change register allocation order for ARM VFP and NEON registers to put the callee-saved registers at the end of the lists. Also prefer to avoid using the low registers that are in register subclasses required by certain instructions, so that those registers will more likely be available when needed. This change makes a huge improvement in spilling in some cases. Thanks to Jakob for helping me realize the problem. Most of this patch is fixing the testsuite. There are quite a few places where we're checking for specific registers. I changed those to wildcards in places where that doesn't weaken the tests. The spill-q.ll and thumb2-spill-q.ll tests stopped spilling with this change, so I added a bunch of live values to force spills on those tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116055 91177308-0d34-0410-b5e6-96231b3b80d8 2010-10-08 06:15:13 +00:00			`; CHECK: vldr.64 [[LDR:d.*]],`
Avoiding overly aggressive latency scheduling. If the two nodes share an operand and one of them has a single use that is a live out copy, favor the one that is live out. Otherwise it will be difficult to eliminate the copy if the instruction is a loop induction variable update. e.g. BB: sub r1, r3, #1 str r0, [r2, r3] mov r3, r1 cmp bne BB => BB: str r0, [r2, r3] sub r3, r3, #1 cmp bne BB This fixed the recent 256.bzip2 regression. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117675 91177308-0d34-0410-b5e6-96231b3b80d8 2010-10-29 18:09:28 +00:00			`; CHECK: LPC0_0:`
Change register allocation order for ARM VFP and NEON registers to put the callee-saved registers at the end of the lists. Also prefer to avoid using the low registers that are in register subclasses required by certain instructions, so that those registers will more likely be available when needed. This change makes a huge improvement in spilling in some cases. Thanks to Jakob for helping me realize the problem. Most of this patch is fixing the testsuite. There are quite a few places where we're checking for specific registers. I changed those to wildcards in places where that doesn't weaken the tests. The spill-q.ll and thumb2-spill-q.ll tests stopped spilling with this change, so I added a bunch of live values to force spills on those tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@116055 91177308-0d34-0410-b5e6-96231b3b80d8 2010-10-08 06:15:13 +00:00			`; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]`
Add CoalescerPair helper class. Given a copy instruction, CoalescerPair can determine which registers to coalesce in order to eliminate the copy. It deals with all the subreg fun to determine a tuple (DstReg, SrcReg, SubIdx) such that: - SrcReg is a virtual register that will disappear after coalescing. - DstReg is a virtual or physical register whose live range will be extended. - SubIdx is 0 when DstReg is a physical register. - SrcReg can be joined with DstReg:SubIdx. CoalescerPair::isCoalescable() determines if another copy instruction is compatible with the same tuple. This fixes some NEON miscompilations where shuffles are getting coalesced as if they were copies. The CoalescerPair class will replace a lot of the spaghetti logic in JoinCopy later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105997 91177308-0d34-0410-b5e6-96231b3b80d8 2010-06-15 16:04:21 +00:00			`; CHECK: vmov.f64 [[LDR]]`
			`%5 = fadd <2 x double> %3, %3 ; <<2 x double>> [#uses=2]`
			`%6 = fadd <2 x double> %4, %4 ; <<2 x double>> [#uses=2]`
			`%tmp7 = extractelement <2 x double> %5, i32 0 ; <double> [#uses=1]`
			`%tmp5 = extractelement <2 x double> %5, i32 1 ; <double> [#uses=1]`
			`; CHECK: printf`
Remove arm_apcscc from the test files. It is the default and doing this matches what llvm-gcc and clang now produce. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106221 91177308-0d34-0410-b5e6-96231b3b80d8 2010-06-17 15:18:27 +00:00			`%7 = tail call i32 (i8, ...) @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp7, double %tmp5) nounwind ; <i32> [#uses=0]`
Add CoalescerPair helper class. Given a copy instruction, CoalescerPair can determine which registers to coalesce in order to eliminate the copy. It deals with all the subreg fun to determine a tuple (DstReg, SrcReg, SubIdx) such that: - SrcReg is a virtual register that will disappear after coalescing. - DstReg is a virtual or physical register whose live range will be extended. - SubIdx is 0 when DstReg is a physical register. - SrcReg can be joined with DstReg:SubIdx. CoalescerPair::isCoalescable() determines if another copy instruction is compatible with the same tuple. This fixes some NEON miscompilations where shuffles are getting coalesced as if they were copies. The CoalescerPair class will replace a lot of the spaghetti logic in JoinCopy later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105997 91177308-0d34-0410-b5e6-96231b3b80d8 2010-06-15 16:04:21 +00:00			`%tmp3 = extractelement <2 x double> %6, i32 0 ; <double> [#uses=1]`
			`%tmp1 = extractelement <2 x double> %6, i32 1 ; <double> [#uses=1]`
Remove arm_apcscc from the test files. It is the default and doing this matches what llvm-gcc and clang now produce. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106221 91177308-0d34-0410-b5e6-96231b3b80d8 2010-06-17 15:18:27 +00:00			`%8 = tail call i32 (i8, ...) @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp3, double %tmp1) nounwind ; <i32> [#uses=0]`
Add CoalescerPair helper class. Given a copy instruction, CoalescerPair can determine which registers to coalesce in order to eliminate the copy. It deals with all the subreg fun to determine a tuple (DstReg, SrcReg, SubIdx) such that: - SrcReg is a virtual register that will disappear after coalescing. - DstReg is a virtual or physical register whose live range will be extended. - SubIdx is 0 when DstReg is a physical register. - SrcReg can be joined with DstReg:SubIdx. CoalescerPair::isCoalescable() determines if another copy instruction is compatible with the same tuple. This fixes some NEON miscompilations where shuffles are getting coalesced as if they were copies. The CoalescerPair class will replace a lot of the spaghetti logic in JoinCopy later. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105997 91177308-0d34-0410-b5e6-96231b3b80d8 2010-06-15 16:04:21 +00:00			`ret i32 0`
			`}`

Remove arm_apcscc from the test files. It is the default and doing this matches what llvm-gcc and clang now produce. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@106221 91177308-0d34-0410-b5e6-96231b3b80d8 2010-06-17 15:18:27 +00:00			`declare i32 @printf(i8* nocapture, ...) nounwind`