diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index cab303dd5c3..7938a375050 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -89,6 +89,9 @@ static cl::opt DisableSchedCriticalPath( static cl::opt DisableSchedHeight( "disable-sched-height", cl::Hidden, cl::init(false), cl::desc("Disable scheduled-height priority in sched=list-ilp")); +static cl::opt Disable2AddrHack( + "disable-2addr-hack", cl::Hidden, cl::init(true), + cl::desc("Disable scheduler's two-address hack")); static cl::opt MaxReorderWindow( "max-sched-reorder", cl::Hidden, cl::init(6), @@ -2628,7 +2631,8 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { void RegReductionPQBase::initNodes(std::vector &sunits) { SUnits = &sunits; // Add pseudo dependency edges for two-address nodes. - AddPseudoTwoAddrDeps(); + if (!Disable2AddrHack) + AddPseudoTwoAddrDeps(); // Reroute edges to nodes with multiple uses. if (!TracksRegPressure) PrescheduleNodesWithMultipleUses(); diff --git a/test/CodeGen/X86/2006-05-11-InstrSched.ll b/test/CodeGen/X86/2006-05-11-InstrSched.ll index a871ea198cf..8bb9b926a2f 100644 --- a/test/CodeGen/X86/2006-05-11-InstrSched.ll +++ b/test/CodeGen/X86/2006-05-11-InstrSched.ll @@ -1,5 +1,5 @@ ; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 -stats -realign-stack=0 |&\ -; RUN: grep {asm-printer} | grep 34 +; RUN: grep {asm-printer} | grep 35 target datalayout = "e-p:32:32" define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind { diff --git a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll index 620e0f36674..e904b1c5cc5 100644 --- a/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll +++ b/test/CodeGen/X86/2009-04-21-NoReloadImpDef.ll @@ -5,7 +5,6 @@ ; CHECK: pextrw $14 ; CHECK-NEXT: shrl $8 -; CHECK-NEXT: (%ebp) ; CHECK-NEXT: pinsrw define void @update(i8** %args_list) nounwind { diff --git a/test/CodeGen/X86/change-compare-stride-1.ll b/test/CodeGen/X86/change-compare-stride-1.ll index 8b53ae2817c..1c5c113a723 100644 --- a/test/CodeGen/X86/change-compare-stride-1.ll +++ b/test/CodeGen/X86/change-compare-stride-1.ll @@ -3,6 +3,10 @@ ; Nested LSR is required to optimize this case. ; We do not expect to see this form of IR without -enable-iv-rewrite. +; xfailed for now because the scheduler two-address hack has been disabled. +; Now it's generating a leal -1 rather than a decq. +; XFAIL: * + define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind { ; CHECK: borf: ; CHECK-NOT: inc diff --git a/test/CodeGen/X86/fold-pcmpeqd-0.ll b/test/CodeGen/X86/fold-pcmpeqd-0.ll index 647bbdb7f0f..1d315ffe359 100644 --- a/test/CodeGen/X86/fold-pcmpeqd-0.ll +++ b/test/CodeGen/X86/fold-pcmpeqd-0.ll @@ -1,5 +1,7 @@ -; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s ; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=X86-64 %s +; DISABLED: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s + +; i386 test has been disabled when scheduler 2-addr hack is disabled. ; This testcase shouldn't need to spill the -1 value, ; so it should just use pcmpeqd to materialize an all-ones vector. diff --git a/test/CodeGen/X86/iv-users-in-other-loops.ll b/test/CodeGen/X86/iv-users-in-other-loops.ll index 8f79fb8cde2..4a6f5316a68 100644 --- a/test/CodeGen/X86/iv-users-in-other-loops.ll +++ b/test/CodeGen/X86/iv-users-in-other-loops.ll @@ -1,9 +1,8 @@ ; RUN: llc < %s -march=x86-64 -enable-lsr-nested -o %t ; RUN: not grep inc %t ; RUN: grep dec %t | count 2 -; RUN: grep addq %t | count 12 +; RUN: grep addq %t | count 10 ; RUN: not grep addb %t -; RUN: not grep leaq %t ; RUN: not grep leal %t ; RUN: not grep movq %t diff --git a/test/CodeGen/X86/lsr-loop-exit-cond.ll b/test/CodeGen/X86/lsr-loop-exit-cond.ll index 938023ffe03..382b0e04839 100644 --- a/test/CodeGen/X86/lsr-loop-exit-cond.ll +++ b/test/CodeGen/X86/lsr-loop-exit-cond.ll @@ -1,6 +1,7 @@ ; RUN: llc -march=x86-64 < %s | FileCheck %s ; CHECK: decq +; CHECK-NEXT: movl ( ; CHECK-NEXT: jne @Te0 = external global [256 x i32] ; <[256 x i32]*> [#uses=5] diff --git a/test/CodeGen/X86/lsr-reuse-trunc.ll b/test/CodeGen/X86/lsr-reuse-trunc.ll index 1f87089f80e..5f5e0937a3b 100644 --- a/test/CodeGen/X86/lsr-reuse-trunc.ll +++ b/test/CodeGen/X86/lsr-reuse-trunc.ll @@ -4,13 +4,14 @@ ; Full strength reduction wouldn't reduce register pressure, so LSR should ; stick with indexing here. +; FIXME: This is worse off from disabling of scheduler 2-address hack. ; CHECK: movaps (%{{rsi|rdx}},%rax,4), [[X3:%xmm[0-9]+]] +; CHECK: leaq 4(%rax), %{{rcx|r9}} ; CHECK: cvtdq2ps ; CHECK: orps {{%xmm[0-9]+}}, [[X4:%xmm[0-9]+]] ; CHECK: movaps [[X4]], (%{{rdi|rcx}},%rax,4) -; CHECK: addq $4, %rax -; CHECK: cmpl %eax, (%{{rdx|r8}}) -; CHECK-NEXT: jg +; CHECK: cmpl %{{ecx|r9d}}, (%{{rdx|r8}}) +; CHECK: jg define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind { entry: diff --git a/test/CodeGen/X86/masked-iv-safe.ll b/test/CodeGen/X86/masked-iv-safe.ll index 0b4d73a683a..3a4acb8167f 100644 --- a/test/CodeGen/X86/masked-iv-safe.ll +++ b/test/CodeGen/X86/masked-iv-safe.ll @@ -3,10 +3,10 @@ ; RUN: not grep movz %t ; RUN: not grep sar %t ; RUN: not grep shl %t -; RUN: grep add %t | count 2 +; RUN: grep add %t | count 1 ; RUN: grep inc %t | count 4 ; RUN: grep dec %t | count 2 -; RUN: grep lea %t | count 2 +; RUN: grep lea %t | count 3 ; Optimize away zext-inreg and sext-inreg on the loop induction ; variable using trip-count information. diff --git a/test/CodeGen/X86/multiple-loop-post-inc.ll b/test/CodeGen/X86/multiple-loop-post-inc.ll index 51a06112aad..4f7e28ace3c 100644 --- a/test/CodeGen/X86/multiple-loop-post-inc.ll +++ b/test/CodeGen/X86/multiple-loop-post-inc.ll @@ -1,6 +1,10 @@ ; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s ; rdar://7236213 +; Xfailed now that scheduler 2-address hack is disabled a lea is generated. +; The code isn't any worse though. +; XFAIL: * + ; CodeGen shouldn't require any lea instructions inside the marked loop. ; It should properly set up post-increment uses and do coalescing for ; the induction variables. diff --git a/test/CodeGen/X86/sse2.ll b/test/CodeGen/X86/sse2.ll index 1d74af2ba36..36a0fd91bd8 100644 --- a/test/CodeGen/X86/sse2.ll +++ b/test/CodeGen/X86/sse2.ll @@ -178,8 +178,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind { %tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1] ret <4 x float> %tmp27 ; CHECK: test14: -; CHECK: addps [[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]] -; CHECK: subps [[X1]], [[X2:%xmm[0-9]+]] +; CHECK: subps [[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]] +; CHECK: addps [[X1]], [[X0:%xmm[0-9]+]] ; CHECK: movlhps [[X2]], [[X0]] } diff --git a/test/CodeGen/X86/sse3.ll b/test/CodeGen/X86/sse3.ll index d05c45321ba..291069d4625 100644 --- a/test/CodeGen/X86/sse3.ll +++ b/test/CodeGen/X86/sse3.ll @@ -226,15 +226,16 @@ entry: } - +; FIXME: t15 is worse off from disabling of scheduler 2-address hack. define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { entry: %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef > ret <8 x i16> %tmp8 ; X64: t15: -; X64: pextrw $7, %xmm0, %eax +; X64: movdqa %xmm0, %xmm2 ; X64: punpcklqdq %xmm1, %xmm0 ; X64: pshuflw $-128, %xmm0, %xmm0 +; X64: pextrw $7, %xmm2, %eax ; X64: pinsrw $2, %eax, %xmm0 ; X64: ret } @@ -247,12 +248,12 @@ entry: %tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 2, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef > ret <16 x i8> %tmp9 ; X64: t16: -; X64: movdqa %xmm1, %xmm0 -; X64: pslldq $2, %xmm0 -; X64: pextrw $1, %xmm0, %eax -; X64: movd %xmm0, %ecx -; X64: pinsrw $0, %ecx, %xmm0 -; X64: pextrw $8, %xmm1, %ecx +; X64: movdqa %xmm1, %xmm2 +; X64: pslldq $2, %xmm2 +; X64: movd %xmm2, %eax +; X64: pinsrw $0, %eax, %xmm0 +; X64: pextrw $8, %xmm1, %eax +; X64: pextrw $1, %xmm2, %ecx ; X64: ret }