upgrade some entries, remove stuff that is done.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47109 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2008-02-14 06:19:02 +00:00
parent ea1cddf546
commit eb05f90c71
2 changed files with 44 additions and 132 deletions

View File

@ -56,22 +56,23 @@ store tmp -> [xslot]
time, not at spiller time). *Note* however that this can only be done
if Y is dead. Here's a testcase:
%.str_3 = external global [15 x sbyte] ; <[15 x sbyte]*> [#uses=0]
implementation ; Functions:
declare void %printf(int, ...)
void %main() {
@.str_3 = external global [15 x i8] ; <[15 x i8]*> [#uses=0]
declare void @printf(i32, ...)
define void @main() {
build_tree.exit:
br label %no_exit.i7
no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit
%tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.34.i18, %no_exit.i7 ] ; <double> [#uses=1]
%tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.28.i16, %no_exit.i7 ] ; <double> [#uses=1]
%tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00
%tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00
br bool false, label %Compute_Tree.exit23, label %no_exit.i7
Compute_Tree.exit23: ; preds = %no_exit.i7
tail call void (int, ...)* %printf( int 0 )
store double %tmp.34.i18, double* null
ret void
br label %no_exit.i7
no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit
%tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.34.i18, %no_exit.i7 ] ; <double> [#uses=1]
%tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.28.i16, %no_exit.i7 ] ; <double> [#uses=1]
%tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00 ; <double> [#uses=1]
%tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00 ; <double> [#uses=2]
br i1 false, label %Compute_Tree.exit23, label %no_exit.i7
Compute_Tree.exit23: ; preds = %no_exit.i7
tail call void (i32, ...)* @printf( i32 0 )
store double %tmp.34.i18, double* null
ret void
}
We currently emit:
@ -125,25 +126,6 @@ more experiments on different x86 machines.
//===---------------------------------------------------------------------===//
Currently the x86 codegen isn't very good at mixing SSE and FPStack
code:
unsigned int foo(double x) { return x; }
foo:
subl $20, %esp
movsd 24(%esp), %xmm0
movsd %xmm0, 8(%esp)
fldl 8(%esp)
fisttpll (%esp)
movl (%esp), %eax
addl $20, %esp
ret
This will be solved when we go to a dynamic programming based isel.
//===---------------------------------------------------------------------===//
Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
feasible.

View File

@ -435,44 +435,6 @@ require a copy to be inserted (in X86InstrInfo::convertToThreeAddress).
//===---------------------------------------------------------------------===//
Consider this:
typedef struct pair { float A, B; } pair;
void pairtest(pair P, float *FP) {
*FP = P.A+P.B;
}
We currently generate this code with llvmgcc4:
_pairtest:
movl 8(%esp), %eax
movl 4(%esp), %ecx
movd %eax, %xmm0
movd %ecx, %xmm1
addss %xmm0, %xmm1
movl 12(%esp), %eax
movss %xmm1, (%eax)
ret
we should be able to generate:
_pairtest:
movss 4(%esp), %xmm0
movl 12(%esp), %eax
addss 8(%esp), %xmm0
movss %xmm0, (%eax)
ret
The issue is that llvmgcc4 is forcing the struct to memory, then passing it as
integer chunks. It does this so that structs like {short,short} are passed in
a single 32-bit integer stack slot. We should handle the safe cases above much
nicer, while still handling the hard cases.
While true in general, in this specific case we could do better by promoting
load int + bitcast to float -> load fload. This basically needs alignment info,
the code is already implemented (but disabled) in dag combine).
//===---------------------------------------------------------------------===//
Another instruction selector deficiency:
void %bar() {
@ -551,25 +513,24 @@ do not make use of.
//===---------------------------------------------------------------------===//
int %foo(int* %a, int %t) {
define i32 @foo(i32* %a, i32 %t) {
entry:
br label %cond_true
br label %cond_true
cond_true: ; preds = %cond_true, %entry
%x.0.0 = phi int [ 0, %entry ], [ %tmp9, %cond_true ]
%t_addr.0.0 = phi int [ %t, %entry ], [ %tmp7, %cond_true ]
%tmp2 = getelementptr int* %a, int %x.0.0
%tmp3 = load int* %tmp2 ; <int> [#uses=1]
%tmp5 = add int %t_addr.0.0, %x.0.0 ; <int> [#uses=1]
%tmp7 = add int %tmp5, %tmp3 ; <int> [#uses=2]
%tmp9 = add int %x.0.0, 1 ; <int> [#uses=2]
%tmp = setgt int %tmp9, 39 ; <bool> [#uses=1]
br bool %tmp, label %bb12, label %cond_true
cond_true: ; preds = %cond_true, %entry
%x.0.0 = phi i32 [ 0, %entry ], [ %tmp9, %cond_true ] ; <i32> [#uses=3]
%t_addr.0.0 = phi i32 [ %t, %entry ], [ %tmp7, %cond_true ] ; <i32> [#uses=1]
%tmp2 = getelementptr i32* %a, i32 %x.0.0 ; <i32*> [#uses=1]
%tmp3 = load i32* %tmp2 ; <i32> [#uses=1]
%tmp5 = add i32 %t_addr.0.0, %x.0.0 ; <i32> [#uses=1]
%tmp7 = add i32 %tmp5, %tmp3 ; <i32> [#uses=2]
%tmp9 = add i32 %x.0.0, 1 ; <i32> [#uses=2]
%tmp = icmp sgt i32 %tmp9, 39 ; <i1> [#uses=1]
br i1 %tmp, label %bb12, label %cond_true
bb12: ; preds = %cond_true
ret int %tmp7
bb12: ; preds = %cond_true
ret i32 %tmp7
}
is pessimized by -loop-reduce and -indvars
//===---------------------------------------------------------------------===//
@ -704,9 +665,9 @@ The add\sub pair is really unneeded here.
Consider the expansion of:
uint %test3(uint %X) {
%tmp1 = rem uint %X, 255
ret uint %tmp1
define i32 @test3(i32 %X) {
%tmp1 = urem i32 %X, 255
ret i32 %tmp1
}
Currently it compiles to:
@ -948,22 +909,22 @@ Another example is:
;; allocator turns the shift into an LEA. This also occurs for ADD.
; Check that the shift gets turned into an LEA.
; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -x86-asm-syntax=intel | \
; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
; RUN: not grep {mov E.X, E.X}
%G = external global int
@G = external global i32 ; <i32*> [#uses=3]
int %test1(int %X, int %Y) {
%Z = add int %X, %Y
volatile store int %Y, int* %G
volatile store int %Z, int* %G
ret int %X
define i32 @test1(i32 %X, i32 %Y) {
%Z = add i32 %X, %Y ; <i32> [#uses=1]
volatile store i32 %Y, i32* @G
volatile store i32 %Z, i32* @G
ret i32 %X
}
int %test2(int %X) {
%Z = add int %X, 1 ;; inc
volatile store int %Z, int* %G
ret int %X
define i32 @test2(i32 %X) {
%Z = add i32 %X, 1 ; <i32> [#uses=1]
volatile store i32 %Z, i32* @G
ret i32 %X
}
//===---------------------------------------------------------------------===//
@ -1238,37 +1199,6 @@ suggests using the 32-bit register (which is what ICC uses).
//===---------------------------------------------------------------------===//
rdar://5506677 - We compile this:
define i32 @foo(double %x) {
%x14 = bitcast double %x to i64 ; <i64> [#uses=1]
%tmp713 = trunc i64 %x14 to i32 ; <i32> [#uses=1]
%tmp8 = and i32 %tmp713, 2147483647 ; <i32> [#uses=1]
ret i32 %tmp8
}
to:
_foo:
subl $12, %esp
fldl 16(%esp)
fstpl (%esp)
movl $2147483647, %eax
andl (%esp), %eax
addl $12, %esp
#FP_REG_KILL
ret
It would be much better to eliminate the fldl/fstpl by folding the bitcast
into the load SDNode. That would give us:
_foo:
movl $2147483647, %eax
andl 4(%esp), %eax
ret
//===---------------------------------------------------------------------===//
We compile this:
void compare (long long foo) {