Kill and collapse outstanding DomainValues.

DomainValues that are only used by "don't care" instructions are now
collapsed to the first possible execution domain after all basic blocks
have been processed.  This typically means the PS domain on x86.

For example, the vsel_i64 and vsel_double functions in sse2-blend.ll are
completely collapsed to the PS domain instead of containing a mix of
execution domains created by isel.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144037 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jakob Stoklund Olesen 2011-11-07 23:08:21 +00:00
parent a29fc806fe
commit b26c7727c9
8 changed files with 58 additions and 28 deletions

View File

@ -510,11 +510,20 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
leaveBasicBlock(MBB); leaveBasicBlock(MBB);
} }
// Clear the LiveOuts vectors. Should we also collapse any remaining // Clear the LiveOuts vectors and collapse any remaining DomainValues.
// DomainValues? for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
for (LiveOutMap::const_iterator i = LiveOuts.begin(), e = LiveOuts.end(); MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
i != e; ++i) LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
delete[] i->second; if (FI == LiveOuts.end())
continue;
assert(FI->second && "Null entry");
// The DomainValue is collapsed when the last reference is killed.
LiveRegs = FI->second;
for (unsigned i = 0, e = NumRegs; i != e; ++i)
if (LiveRegs[i])
Kill(i);
delete[] LiveRegs;
}
LiveOuts.clear(); LiveOuts.clear();
Avail.clear(); Avail.clear();
Allocator.DestroyAll(); Allocator.DestroyAll();

View File

@ -315,24 +315,31 @@ declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>) nounwind readnone
define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) { define void @test_x86_sse2_movnt_dq(i8* %a0, <2 x i64> %a1) {
; CHECK: test_x86_sse2_movnt_dq
; CHECK: movl ; CHECK: movl
; CHECK: vmovntdq ; CHECK: vmovntdq
call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a1) ; add operation forces the execution domain.
%a2 = add <2 x i64> %a1, <i64 1, i64 1>
call void @llvm.x86.sse2.movnt.dq(i8* %a0, <2 x i64> %a2)
ret void ret void
} }
declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind declare void @llvm.x86.sse2.movnt.dq(i8*, <2 x i64>) nounwind
define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) { define void @test_x86_sse2_movnt_pd(i8* %a0, <2 x double> %a1) {
; CHECK test_x86_sse2_movnt_pd
; CHECK: movl ; CHECK: movl
; CHECK: vmovntpd ; CHECK: vmovntpd
call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a1) ; fadd operation forces the execution domain.
%a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
call void @llvm.x86.sse2.movnt.pd(i8* %a0, <2 x double> %a2)
ret void ret void
} }
declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind declare void @llvm.x86.sse2.movnt.pd(i8*, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) { define <2 x double> @test_x86_sse2_mul_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: test_x86_sse2_mul_sd
; CHECK: vmulsd ; CHECK: vmulsd
%res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] %res = call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res ret <2 x double> %res
@ -749,6 +756,7 @@ declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) { define void @test_x86_sse2_storel_dq(i8* %a0, <4 x i32> %a1) {
; CHECK: test_x86_sse2_storel_dq
; CHECK: movl ; CHECK: movl
; CHECK: vmovq ; CHECK: vmovq
call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1) call void @llvm.x86.sse2.storel.dq(i8* %a0, <4 x i32> %a1)
@ -758,6 +766,7 @@ declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) { define void @test_x86_sse2_storeu_dq(i8* %a0, <16 x i8> %a1) {
; CHECK: test_x86_sse2_storeu_dq
; CHECK: movl ; CHECK: movl
; CHECK: vmovdqu ; CHECK: vmovdqu
call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1) call void @llvm.x86.sse2.storeu.dq(i8* %a0, <16 x i8> %a1)
@ -767,15 +776,18 @@ declare void @llvm.x86.sse2.storeu.dq(i8*, <16 x i8>) nounwind
define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) { define void @test_x86_sse2_storeu_pd(i8* %a0, <2 x double> %a1) {
; CHECK: test_x86_sse2_storeu_pd
; CHECK: movl ; CHECK: movl
; CHECK: vmovupd ; CHECK: vmovupd
call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a1) %a2 = fadd <2 x double> %a1, <double 0x0, double 0x4200000000000000>
call void @llvm.x86.sse2.storeu.pd(i8* %a0, <2 x double> %a2)
ret void ret void
} }
declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind declare void @llvm.x86.sse2.storeu.pd(i8*, <2 x double>) nounwind
define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) { define <2 x double> @test_x86_sse2_sub_sd(<2 x double> %a0, <2 x double> %a1) {
; CHECK: test_x86_sse2_sub_sd
; CHECK: vsubsd ; CHECK: vsubsd
%res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1] %res = call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a0, <2 x double> %a1) ; <<2 x double>> [#uses=1]
ret <2 x double> %res ret <2 x double> %res

View File

@ -165,7 +165,9 @@ entry:
; CHECK: vpandn %xmm ; CHECK: vpandn %xmm
define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { define <2 x i64> @vpandn(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
entry: entry:
%y = xor <2 x i64> %a, <i64 -1, i64 -1> ; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
%y = xor <2 x i64> %a2, <i64 -1, i64 -1>
%x = and <2 x i64> %a, %y %x = and <2 x i64> %a, %y
ret <2 x i64> %x ret <2 x i64> %x
} }
@ -173,7 +175,9 @@ entry:
; CHECK: vpand %xmm ; CHECK: vpand %xmm
define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { define <2 x i64> @vpand(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp {
entry: entry:
%x = and <2 x i64> %a, %b ; Force the execution domain with an add.
%a2 = add <2 x i64> %a, <i64 1, i64 1>
%x = and <2 x i64> %a2, %b
ret <2 x i64> %x ret <2 x i64> %x
} }

View File

@ -3,13 +3,16 @@
define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) { define void @f(<4 x float> %A, i8* %B, <2 x double> %C, i32 %D, <2 x i64> %E) {
; CHECK: movntps ; CHECK: movntps
%cast = bitcast i8* %B to <4 x float>* %cast = bitcast i8* %B to <4 x float>*
store <4 x float> %A, <4 x float>* %cast, align 16, !nontemporal !0 %A2 = fadd <4 x float> %A, <float 0x0, float 0x0, float 0x0, float 0x4200000000000000>
store <4 x float> %A2, <4 x float>* %cast, align 16, !nontemporal !0
; CHECK: movntdq ; CHECK: movntdq
%cast1 = bitcast i8* %B to <2 x i64>* %cast1 = bitcast i8* %B to <2 x i64>*
store <2 x i64> %E, <2 x i64>* %cast1, align 16, !nontemporal !0 %E2 = add <2 x i64> %E, <i64 1, i64 2>
store <2 x i64> %E2, <2 x i64>* %cast1, align 16, !nontemporal !0
; CHECK: movntpd ; CHECK: movntpd
%cast2 = bitcast i8* %B to <2 x double>* %cast2 = bitcast i8* %B to <2 x double>*
store <2 x double> %C, <2 x double>* %cast2, align 16, !nontemporal !0 %C2 = fadd <2 x double> %C, <double 0x0, double 0x4200000000000000>
store <2 x double> %C2, <2 x double>* %cast2, align 16, !nontemporal !0
; CHECK: movnti ; CHECK: movnti
%cast3 = bitcast i8* %B to i32* %cast3 = bitcast i8* %B to i32*
store i32 %D, i32* %cast3, align 16, !nontemporal !0 store i32 %D, i32* %cast3, align 16, !nontemporal !0

View File

@ -1,8 +1,8 @@
; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s ; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s
; CHECK-NOT: movapd ; CHECK-NOT: movapd
; CHECK: movaps ; CHECK: movaps
; CHECK-NOT: movaps ; CHECK-NOT: movapd
; CHECK: movapd ; CHECK: movaps
; CHECK-NOT: movap ; CHECK-NOT: movap
define void @foo(<4 x float>* %p, <4 x float> %x) nounwind { define void @foo(<4 x float>* %p, <4 x float> %x) nounwind {

View File

@ -26,11 +26,10 @@ define void@vsel_i32(<4 x i32>* %v1, <4 x i32>* %v2) {
ret void ret void
} }
; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the ; Without forcing instructions, fall back to the preferred PS domain.
; mixed domains here.
; CHECK: vsel_i64 ; CHECK: vsel_i64
; CHECK: xorps ; CHECK: xorps
; CHECK: pand ; CHECK: andps
; CHECK: andnps ; CHECK: andnps
; CHECK: orps ; CHECK: orps
; CHECK: ret ; CHECK: ret
@ -43,16 +42,14 @@ define void@vsel_i64(<4 x i64>* %v1, <4 x i64>* %v2) {
ret void ret void
} }
; FIXME: The -mattr=+sse2,-sse41 disable the ExecutionDepsFix pass causing the ; Without forcing instructions, fall back to the preferred PS domain.
; mixed domains here.
; CHECK: vsel_double ; CHECK: vsel_double
; CHECK: xorps ; CHECK: xorps
; CHECK: pand ; CHECK: andps
; CHECK: andnps ; CHECK: andnps
; CHECK: orps ; CHECK: orps
; CHECK: ret ; CHECK: ret
define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) { define void@vsel_double(<4 x double>* %v1, <4 x double>* %v2) {
%A = load <4 x double>* %v1 %A = load <4 x double>* %v1
%B = load <4 x double>* %v2 %B = load <4 x double>* %v2

View File

@ -144,7 +144,7 @@ define <2 x double> @test11(double %a, double %b) nounwind {
%tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1] %tmp7 = insertelement <2 x double> %tmp, double %b, i32 1 ; <<2 x double>> [#uses=1]
ret <2 x double> %tmp7 ret <2 x double> %tmp7
; CHECK: test11: ; CHECK: test11:
; CHECK: movapd 4(%esp), %xmm0 ; CHECK: movaps 4(%esp), %xmm0
} }
define void @test12() nounwind { define void @test12() nounwind {

View File

@ -1,9 +1,8 @@
; RUN: llc < %s -march=x86 -mcpu=core2 -o %t ; RUN: llc < %s -march=x86 -mcpu=core2 | FileCheck %s
; RUN: grep movq %t | count 1
; RUN: grep pshufd %t | count 1
; RUN: grep movupd %t | count 1
; RUN: grep pshufhw %t | count 1
; CHECK: test_v4sf
; CHECK: movq 8(%esp)
; CHECK: pshufd $80
define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind { define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
%tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1] %tmp = insertelement <4 x float> zeroinitializer, float %X, i32 0 ; <<4 x float>> [#uses=1]
%tmp2 = insertelement <4 x float> %tmp, float %X, i32 1 ; <<4 x float>> [#uses=1] %tmp2 = insertelement <4 x float> %tmp, float %X, i32 1 ; <<4 x float>> [#uses=1]
@ -13,6 +12,9 @@ define void @test_v4sf(<4 x float>* %P, float %X, float %Y) nounwind {
ret void ret void
} }
; CHECK: test_v2sd
; CHECK: movups 8(%esp)
; CHECK: movaps
define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind { define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
%tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0 ; <<2 x double>> [#uses=1] %tmp = insertelement <2 x double> zeroinitializer, double %X, i32 0 ; <<2 x double>> [#uses=1]
%tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1 ; <<2 x double>> [#uses=1] %tmp2 = insertelement <2 x double> %tmp, double %Y, i32 1 ; <<2 x double>> [#uses=1]
@ -20,6 +22,9 @@ define void @test_v2sd(<2 x double>* %P, double %X, double %Y) nounwind {
ret void ret void
} }
; CHECK: test_v8i16
; CHECK: pshufhw $-58
; CHECK: movdqa
define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind { define void @test_v8i16(<2 x i64>* %res, <2 x i64>* %A) nounwind {
%tmp = load <2 x i64>* %A ; <<2 x i64>> [#uses=1] %tmp = load <2 x i64>* %A ; <<2 x i64>> [#uses=1]
%tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16> ; <<8 x i16>> [#uses=8] %tmp.upgrd.1 = bitcast <2 x i64> %tmp to <8 x i16> ; <<8 x i16>> [#uses=8]