llvm-6502/test/CodeGen/X86/machine-cp.ll
Evan Cheng 977679d603 Added a late machine instruction copy propagation pass. This catches
opportunities that only present themselves after late optimizations
such as tail duplication .e.g.
## BB#1:
        movl    %eax, %ecx
        movl    %ecx, %eax
        ret

The register allocator also leaves some of them around (due to false
dep between copies from phi-elimination, etc.)

This required some changes in codegen passes. Post-ra scheduler and the
pseudo-instruction expansion passes have been moved after branch folding
and tail merging. They were before branch folding before because it did
not always update block livein's. That's fixed now. The pass change makes
independently since we want to properly schedule instructions after
branch folding / tail duplication.

rdar://10428165
rdar://10640363



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@147716 91177308-0d34-0410-b5e6-96231b3b80d8
2012-01-07 03:02:36 +00:00

37 lines
1.2 KiB
LLVM

; RUN: llc -mtriple=x86_64-apple-macosx -mcpu=nocona < %s | FileCheck %s
; After tail duplication, two copies in an early exit BB can be cancelled out.
; rdar://10640363
define i32 @t1(i32 %a, i32 %b) nounwind {
entry:
; CHECK: t1:
; CHECK: jne
%cmp1 = icmp eq i32 %b, 0
br i1 %cmp1, label %while.end, label %while.body
; CHECK: BB
; CHECK-NOT: mov
; CHECK: ret
while.body: ; preds = %entry, %while.body
%a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ]
%b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ]
%rem = srem i32 %a.addr.03, %b.addr.02
%cmp = icmp eq i32 %rem, 0
br i1 %cmp, label %while.end, label %while.body
while.end: ; preds = %while.body, %entry
%a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ]
ret i32 %a.addr.0.lcssa
}
; Two movdqa (from phi-elimination) in the entry BB cancels out.
; rdar://10428165
define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone {
entry:
; CHECK: t2:
; CHECK-NOT: movdqa
%tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef >
ret <8 x i16> %tmp8
}