llvm-6502/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll
Jakob Stoklund Olesen 40d07bbebb Add CoalescerPair helper class.
Given a copy instruction, CoalescerPair can determine which registers to
coalesce in order to eliminate the copy. It deals with all the subreg fun to
determine a tuple (DstReg, SrcReg, SubIdx) such that:

- SrcReg is a virtual register that will disappear after coalescing.
- DstReg is a virtual or physical register whose live range will be extended.
- SubIdx is 0 when DstReg is a physical register.
- SrcReg can be joined with DstReg:SubIdx.

CoalescerPair::isCoalescable() determines if another copy instruction is
compatible with the same tuple. This fixes some NEON miscompilations where
shuffles are getting coalesced as if they were copies.

The CoalescerPair class will replace a lot of the spaghetti logic in JoinCopy
later.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105997 91177308-0d34-0410-b5e6-96231b3b80d8
2010-06-15 16:04:21 +00:00

42 lines
2.3 KiB
LLVM

; RUN: llc < %s -O3 -relocation-model=pic -mattr=+thumb2 -mcpu=cortex-a8 | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
target triple = "thumbv7-apple-darwin10"
; This is a case where the coalescer was too eager. These two copies were
; considered equivalent and coalescable:
;
; 140 %reg1038:dsub_0<def> = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0
; 148 %reg1038:dsub_1<def> = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0
;
; Only one can be coalesced.
@.str = private constant [7 x i8] c"%g %g\0A\00", align 4 ; <[7 x i8]*> [#uses=1]
define arm_apcscc i32 @main(i32 %argc, i8** nocapture %Argv) nounwind {
entry:
%0 = icmp eq i32 %argc, 2123 ; <i1> [#uses=1]
%U.0 = select i1 %0, double 3.282190e+01, double 8.731834e+02 ; <double> [#uses=2]
%1 = icmp eq i32 %argc, 5123 ; <i1> [#uses=1]
%V.0.ph = select i1 %1, double 7.779980e+01, double 0x409CCB9C779A6B51 ; <double> [#uses=1]
%2 = insertelement <2 x double> undef, double %U.0, i32 0 ; <<2 x double>> [#uses=2]
%3 = insertelement <2 x double> %2, double %U.0, i32 1 ; <<2 x double>> [#uses=2]
%4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
; Constant pool load followed by add.
; Then clobber the loaded register, not the sum.
; CHECK: vldr.64 [[LDR:d.]]
; CHECK: vadd.f64 [[ADD:d.]], [[LDR]], [[LDR]]
; CHECK: vmov.f64 [[LDR]]
%5 = fadd <2 x double> %3, %3 ; <<2 x double>> [#uses=2]
%6 = fadd <2 x double> %4, %4 ; <<2 x double>> [#uses=2]
%tmp7 = extractelement <2 x double> %5, i32 0 ; <double> [#uses=1]
%tmp5 = extractelement <2 x double> %5, i32 1 ; <double> [#uses=1]
; CHECK: printf
%7 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp7, double %tmp5) nounwind ; <i32> [#uses=0]
%tmp3 = extractelement <2 x double> %6, i32 0 ; <double> [#uses=1]
%tmp1 = extractelement <2 x double> %6, i32 1 ; <double> [#uses=1]
%8 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp3, double %tmp1) nounwind ; <i32> [#uses=0]
ret i32 0
}
declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind