llvm-6502/test/CodeGen/X86/2006-05-11-InstrSched.ll
Evan Cheng 623a7e146b Use a bigger hammer to fix PR11314 by disabling the "forcing two-address
instruction lower optimization" in the pre-RA scheduler.

The optimization, rather the hack, was done before MI use-list was available.
Now we should be able to implement it in a better way, perhaps in the
two-address pass until a MI scheduler is available.

Now that the scheduler has to backtrack to handle call sequences. Adding
artificial scheduling constraints is just not safe. Furthermore, the hack
is not taking all the other scheduling decisions into consideration so it's just
as likely to pessimize code. So I view disabling this optimization goodness
regardless of PR11314.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144267 91177308-0d34-0410-b5e6-96231b3b80d8
2011-11-10 07:43:16 +00:00

52 lines
2.9 KiB
LLVM

; RUN: llc < %s -march=x86 -mtriple=i386-linux-gnu -mattr=+sse2 -stats -realign-stack=0 |&\
; RUN: grep {asm-printer} | grep 35
target datalayout = "e-p:32:32"
define void @foo(i32* %mc, i32* %bp, i32* %ms, i32* %xmb, i32* %mpp, i32* %tpmm, i32* %ip, i32* %tpim, i32* %dpp, i32* %tpdm, i32* %bpi, i32 %M) nounwind {
entry:
%tmp9 = icmp slt i32 %M, 5 ; <i1> [#uses=1]
br i1 %tmp9, label %return, label %cond_true
cond_true: ; preds = %cond_true, %entry
%indvar = phi i32 [ 0, %entry ], [ %indvar.next, %cond_true ] ; <i32> [#uses=2]
%tmp. = shl i32 %indvar, 2 ; <i32> [#uses=1]
%tmp.10 = add nsw i32 %tmp., 1 ; <i32> [#uses=2]
%tmp31 = add nsw i32 %tmp.10, -1 ; <i32> [#uses=4]
%tmp32 = getelementptr i32* %mpp, i32 %tmp31 ; <i32*> [#uses=1]
%tmp34 = bitcast i32* %tmp32 to <16 x i8>* ; <i8*> [#uses=1]
%tmp = load <16 x i8>* %tmp34, align 1
%tmp42 = getelementptr i32* %tpmm, i32 %tmp31 ; <i32*> [#uses=1]
%tmp42.upgrd.1 = bitcast i32* %tmp42 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
%tmp46 = load <4 x i32>* %tmp42.upgrd.1 ; <<4 x i32>> [#uses=1]
%tmp54 = bitcast <16 x i8> %tmp to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp55 = add <4 x i32> %tmp54, %tmp46 ; <<4 x i32>> [#uses=2]
%tmp55.upgrd.2 = bitcast <4 x i32> %tmp55 to <2 x i64> ; <<2 x i64>> [#uses=1]
%tmp62 = getelementptr i32* %ip, i32 %tmp31 ; <i32*> [#uses=1]
%tmp65 = bitcast i32* %tmp62 to <16 x i8>* ; <i8*> [#uses=1]
%tmp66 = load <16 x i8>* %tmp65, align 1
%tmp73 = getelementptr i32* %tpim, i32 %tmp31 ; <i32*> [#uses=1]
%tmp73.upgrd.3 = bitcast i32* %tmp73 to <4 x i32>* ; <<4 x i32>*> [#uses=1]
%tmp77 = load <4 x i32>* %tmp73.upgrd.3 ; <<4 x i32>> [#uses=1]
%tmp87 = bitcast <16 x i8> %tmp66 to <4 x i32> ; <<4 x i32>> [#uses=1]
%tmp88 = add <4 x i32> %tmp87, %tmp77 ; <<4 x i32>> [#uses=2]
%tmp88.upgrd.4 = bitcast <4 x i32> %tmp88 to <2 x i64> ; <<2 x i64>> [#uses=1]
%tmp99 = tail call <4 x i32> @llvm.x86.sse2.pcmpgt.d( <4 x i32> %tmp88, <4 x i32> %tmp55 ) ; <<4 x i32>> [#uses=1]
%tmp99.upgrd.5 = bitcast <4 x i32> %tmp99 to <2 x i64> ; <<2 x i64>> [#uses=2]
%tmp110 = xor <2 x i64> %tmp99.upgrd.5, < i64 -1, i64 -1 > ; <<2 x i64>> [#uses=1]
%tmp111 = and <2 x i64> %tmp110, %tmp55.upgrd.2 ; <<2 x i64>> [#uses=1]
%tmp121 = and <2 x i64> %tmp99.upgrd.5, %tmp88.upgrd.4 ; <<2 x i64>> [#uses=1]
%tmp131 = or <2 x i64> %tmp121, %tmp111 ; <<2 x i64>> [#uses=1]
%tmp137 = getelementptr i32* %mc, i32 %tmp.10 ; <i32*> [#uses=1]
%tmp137.upgrd.7 = bitcast i32* %tmp137 to <2 x i64>* ; <<2 x i64>*> [#uses=1]
store <2 x i64> %tmp131, <2 x i64>* %tmp137.upgrd.7
%tmp147 = add nsw i32 %tmp.10, 8 ; <i32> [#uses=1]
%tmp.upgrd.8 = icmp ne i32 %tmp147, %M ; <i1> [#uses=1]
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=1]
br i1 %tmp.upgrd.8, label %cond_true, label %return
return: ; preds = %cond_true, %entry
ret void
}
declare <4 x i32> @llvm.x86.sse2.pcmpgt.d(<4 x i32>, <4 x i32>)