Update the readme to remove duplicate information and clarify the loop

problem.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@29468 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Nate Begeman 2006-08-02 05:31:20 +00:00
parent 8f74680c78
commit 83a6d49102

View File

@ -198,7 +198,7 @@ on some processors (which ones?), it is more efficient to do this:
_test: _test:
movl 8(%esp), %ebx movl 8(%esp), %ebx
xor %eax, %eax xor %eax, %eax
cmpl %ebx, 4(%esp) cmpl %ebx, 4(%esp)
setl %al setl %al
ret ret
@ -340,22 +340,6 @@ Enable X86InstrInfo::convertToThreeAddress().
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
Investigate whether it is better to codegen the following
%tmp.1 = mul int %x, 9
to
movl 4(%esp), %eax
leal (%eax,%eax,8), %eax
as opposed to what llc is currently generating:
imull $9, 4(%esp), %eax
Currently the load folding imull has a higher complexity than the LEA32 pattern.
//===---------------------------------------------------------------------===//
We are currently lowering large (1MB+) memmove/memcpy to rep/stosl and rep/movsl We are currently lowering large (1MB+) memmove/memcpy to rep/stosl and rep/movsl
We should leave these as libcalls for everything over a much lower threshold, We should leave these as libcalls for everything over a much lower threshold,
since libc is hand tuned for medium and large mem ops (avoiding RFO for large since libc is hand tuned for medium and large mem ops (avoiding RFO for large
@ -671,35 +655,26 @@ We should handle __attribute__ ((__visibility__ ("hidden"))).
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
Consider: int %foo(int* %a, int %t) {
int foo(int *a, int t) { entry:
int x; br label %cond_true
for (x=0; x<40; ++x)
t = t + a[x] + x; cond_true: ; preds = %cond_true, %entry
return t; %x.0.0 = phi int [ 0, %entry ], [ %tmp9, %cond_true ] ; <int> [#uses=3]
%t_addr.0.0 = phi int [ %t, %entry ], [ %tmp7, %cond_true ] ; <int> [#uses=1]
%tmp2 = getelementptr int* %a, int %x.0.0 ; <int*> [#uses=1]
%tmp3 = load int* %tmp2 ; <int> [#uses=1]
%tmp5 = add int %t_addr.0.0, %x.0.0 ; <int> [#uses=1]
%tmp7 = add int %tmp5, %tmp3 ; <int> [#uses=2]
%tmp9 = add int %x.0.0, 1 ; <int> [#uses=2]
%tmp = setgt int %tmp9, 39 ; <bool> [#uses=1]
br bool %tmp, label %bb12, label %cond_true
bb12: ; preds = %cond_true
ret int %tmp7
} }
We generate: is pessimized by -loop-reduce and -indvars
LBB1_1: #cond_true
movl %ecx, %esi
movl (%edx,%eax,4), %edi
movl %esi, %ecx
addl %edi, %ecx
addl %eax, %ecx
incl %eax
cmpl $40, %eax
jne LBB1_1 #cond_true
GCC generates:
L2:
addl (%ecx,%edx,4), %eax
addl %edx, %eax
addl $1, %edx
cmpl $40, %edx
jne L2
Smells like a register coallescing/reassociation issue.
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//