Use a bigger hammer to fix PR11314 by disabling the "forcing two-address

instruction lower optimization" in the pre-RA scheduler.

The optimization, rather the hack, was done before MI use-list was available.
Now we should be able to implement it in a better way, perhaps in the
two-address pass until a MI scheduler is available.

Now that the scheduler has to backtrack to handle call sequences. Adding
artificial scheduling constraints is just not safe. Furthermore, the hack
is not taking all the other scheduling decisions into consideration so it's just
as likely to pessimize code. So I view disabling this optimization goodness
regardless of PR11314.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144267 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2011-11-10 07:43:16 +00:00
parent 4dbe96e22f
commit 623a7e146b
12 changed files with 36 additions and 21 deletions

View File

@ -89,6 +89,9 @@ static cl::opt<bool> DisableSchedCriticalPath(
static cl::opt<bool> DisableSchedHeight(
"disable-sched-height", cl::Hidden, cl::init(false),
cl::desc("Disable scheduled-height priority in sched=list-ilp"));
static cl::opt<bool> Disable2AddrHack(
"disable-2addr-hack", cl::Hidden, cl::init(true),
cl::desc("Disable scheduler's two-address hack"));
static cl::opt<int> MaxReorderWindow(
"max-sched-reorder", cl::Hidden, cl::init(6),
@ -2628,7 +2631,8 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
SUnits = &sunits;
// Add pseudo dependency edges for two-address nodes.
AddPseudoTwoAddrDeps();
if (!Disable2AddrHack)
AddPseudoTwoAddrDeps();
// Reroute edges to nodes with multiple uses.
if (!TracksRegPressure)
PrescheduleNodesWithMultipleUses();

View File

@ -1,5 +1,5 @@
; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 -stats -realign-stack=0 |&\
; RUN: grep {asm-printer} | grep 34
; RUN: grep {asm-printer} | grep 35
target datalayout = "e-p:32:32"
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {

View File

@ -5,7 +5,6 @@
; CHECK: pextrw $14
; CHECK-NEXT: shrl $8
; CHECK-NEXT: (%ebp)
; CHECK-NEXT: pinsrw
define void @update(i8** %args_list) nounwind {

View File

@ -3,6 +3,10 @@
; Nested LSR is required to optimize this case.
; We do not expect to see this form of IR without -enable-iv-rewrite.
; xfailed for now because the scheduler two-address hack has been disabled.
; Now it's generating a leal -1 rather than a decq.
; XFAIL: *
define void @borf(i8* nocapture %in, i8* nocapture %out) nounwind {
; CHECK: borf:
; CHECK-NOT: inc

View File

@ -1,5 +1,7 @@
; RUN: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck --check-prefix=X86-64 %s
; DISABLED: llc < %s -mtriple=i386-apple-darwin -mcpu=yonah -regalloc=linearscan | FileCheck --check-prefix=I386 %s
; i386 test has been disabled when scheduler 2-addr hack is disabled.
; This testcase shouldn't need to spill the -1 value,
; so it should just use pcmpeqd to materialize an all-ones vector.

View File

@ -1,9 +1,8 @@
; RUN: llc < %s -march=x86-64 -enable-lsr-nested -o %t
; RUN: not grep inc %t
; RUN: grep dec %t | count 2
; RUN: grep addq %t | count 12
; RUN: grep addq %t | count 10
; RUN: not grep addb %t
; RUN: not grep leaq %t
; RUN: not grep leal %t
; RUN: not grep movq %t

View File

@ -1,6 +1,7 @@
; RUN: llc -march=x86-64 < %s | FileCheck %s
; CHECK: decq
; CHECK-NEXT: movl (
; CHECK-NEXT: jne
@Te0 = external global [256 x i32] ; <[256 x i32]*> [#uses=5]

View File

@ -4,13 +4,14 @@
; Full strength reduction wouldn't reduce register pressure, so LSR should
; stick with indexing here.
; FIXME: This is worse off from disabling of scheduler 2-address hack.
; CHECK: movaps (%{{rsi|rdx}},%rax,4), [[X3:%xmm[0-9]+]]
; CHECK: leaq 4(%rax), %{{rcx|r9}}
; CHECK: cvtdq2ps
; CHECK: orps {{%xmm[0-9]+}}, [[X4:%xmm[0-9]+]]
; CHECK: movaps [[X4]], (%{{rdi|rcx}},%rax,4)
; CHECK: addq $4, %rax
; CHECK: cmpl %eax, (%{{rdx|r8}})
; CHECK-NEXT: jg
; CHECK: cmpl %{{ecx|r9d}}, (%{{rdx|r8}})
; CHECK: jg
define void @vvfloorf(float* nocapture %y, float* nocapture %x, i32* nocapture %n) nounwind {
entry:

View File

@ -3,10 +3,10 @@
; RUN: not grep movz %t
; RUN: not grep sar %t
; RUN: not grep shl %t
; RUN: grep add %t | count 2
; RUN: grep add %t | count 1
; RUN: grep inc %t | count 4
; RUN: grep dec %t | count 2
; RUN: grep lea %t | count 2
; RUN: grep lea %t | count 3
; Optimize away zext-inreg and sext-inreg on the loop induction
; variable using trip-count information.

View File

@ -1,6 +1,10 @@
; RUN: llc -asm-verbose=false -disable-branch-fold -disable-code-place -disable-tail-duplicate -march=x86-64 < %s | FileCheck %s
; rdar://7236213
; Xfailed now that scheduler 2-address hack is disabled a lea is generated.
; The code isn't any worse though.
; XFAIL: *
; CodeGen shouldn't require any lea instructions inside the marked loop.
; It should properly set up post-increment uses and do coalescing for
; the induction variables.

View File

@ -178,8 +178,8 @@ define <4 x float> @test14(<4 x float>* %x, <4 x float>* %y) nounwind {
%tmp27 = shufflevector <4 x float> %tmp9, <4 x float> %tmp21, <4 x i32> < i32 0, i32 1, i32 4, i32 5 > ; <<4 x float>> [#uses=1]
ret <4 x float> %tmp27
; CHECK: test14:
; CHECK: addps [[X1:%xmm[0-9]+]], [[X0:%xmm[0-9]+]]
; CHECK: subps [[X1]], [[X2:%xmm[0-9]+]]
; CHECK: subps [[X1:%xmm[0-9]+]], [[X2:%xmm[0-9]+]]
; CHECK: addps [[X1]], [[X0:%xmm[0-9]+]]
; CHECK: movlhps [[X2]], [[X0]]
}

View File

@ -226,15 +226,16 @@ entry:
}
; FIXME: t15 is worse off from disabling of scheduler 2-address hack.
define <8 x i16> @t15(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
entry:
%tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
ret <8 x i16> %tmp8
; X64: t15:
; X64: pextrw $7, %xmm0, %eax
; X64: movdqa %xmm0, %xmm2
; X64: punpcklqdq %xmm1, %xmm0
; X64: pshuflw $-128, %xmm0, %xmm0
; X64: pextrw $7, %xmm2, %eax
; X64: pinsrw $2, %eax, %xmm0
; X64: ret
}
@ -247,12 +248,12 @@ entry:
%tmp9 = shufflevector <16 x i8> %tmp8, <16 x i8> %T0, <16 x i32> < i32 0, i32 1, i32 2, i32 17, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef , i32 undef >
ret <16 x i8> %tmp9
; X64: t16:
; X64: movdqa %xmm1, %xmm0
; X64: pslldq $2, %xmm0
; X64: pextrw $1, %xmm0, %eax
; X64: movd %xmm0, %ecx
; X64: pinsrw $0, %ecx, %xmm0
; X64: pextrw $8, %xmm1, %ecx
; X64: movdqa %xmm1, %xmm2
; X64: pslldq $2, %xmm2
; X64: movd %xmm2, %eax
; X64: pinsrw $0, %eax, %xmm0
; X64: pextrw $8, %xmm1, %eax
; X64: pextrw $1, %xmm2, %ecx
; X64: ret
}