git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30245 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2006-09-11 05:35:17 +00:00
parent 0f4aa6ee20
commit f47d167c3b
2 changed files with 0 additions and 154 deletions

View File

@ -147,32 +147,6 @@ and ISD::FMAX node types?
//===---------------------------------------------------------------------===//
The first BB of this code:
declare bool %foo()
int %bar() {
%V = call bool %foo()
br bool %V, label %T, label %F
T:
ret int 1
F:
call bool %foo()
ret int 12
}
compiles to:
_bar:
subl $12, %esp
call L_foo$stub
xorb $1, %al
testb %al, %al
jne LBB_bar_2 # F
It would be better to emit "cmp %al, 1" than a xor and test.
//===---------------------------------------------------------------------===//
Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
feasible.
@ -274,33 +248,6 @@ instead of por and movdqa. Does it matter?
//===---------------------------------------------------------------------===//
Use movddup to splat a v2f64 directly from a memory source. e.g.
#include <emmintrin.h>
void test(__m128d *r, double A) {
*r = _mm_set1_pd(A);
}
llc:
_test:
movsd 8(%esp), %xmm0
unpcklpd %xmm0, %xmm0
movl 4(%esp), %eax
movapd %xmm0, (%eax)
ret
icc:
_test:
movl 4(%esp), %eax
movddup 8(%esp), %xmm0
movapd %xmm0, (%eax)
ret
//===---------------------------------------------------------------------===//
X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible
to choose between movaps, movapd, and movdqa based on types of source and
destination?
@ -311,69 +258,6 @@ shorter.
//===---------------------------------------------------------------------===//
We are emitting bad code for this:
float %test(float* %V, int %I, int %D, float %V) {
entry:
%tmp = seteq int %D, 0
br bool %tmp, label %cond_true, label %cond_false23
cond_true:
%tmp3 = getelementptr float* %V, int %I
%tmp = load float* %tmp3
%tmp5 = setgt float %tmp, %V
%tmp6 = tail call bool %llvm.isunordered.f32( float %tmp, float %V )
%tmp7 = or bool %tmp5, %tmp6
br bool %tmp7, label %UnifiedReturnBlock, label %cond_next
cond_next:
%tmp10 = add int %I, 1
%tmp12 = getelementptr float* %V, int %tmp10
%tmp13 = load float* %tmp12
%tmp15 = setle float %tmp13, %V
%tmp16 = tail call bool %llvm.isunordered.f32( float %tmp13, float %V )
%tmp17 = or bool %tmp15, %tmp16
%retval = select bool %tmp17, float 0.000000e+00, float 1.000000e+00
ret float %retval
cond_false23:
%tmp28 = tail call float %foo( float* %V, int %I, int %D, float %V )
ret float %tmp28
UnifiedReturnBlock: ; preds = %cond_true
ret float 0.000000e+00
}
declare bool %llvm.isunordered.f32(float, float)
declare float %foo(float*, int, int, float)
It exposes a known load folding problem:
movss (%edx,%ecx,4), %xmm1
ucomiss %xmm1, %xmm0
As well as this:
LBB_test_2: # cond_next
movss LCPI1_0, %xmm2
pxor %xmm3, %xmm3
ucomiss %xmm0, %xmm1
jbe LBB_test_6 # cond_next
LBB_test_5: # cond_next
movaps %xmm2, %xmm3
LBB_test_6: # cond_next
movss %xmm3, 40(%esp)
flds 40(%esp)
addl $44, %esp
ret
Clearly it's unnecessary to clear %xmm3. It's also not clear why we are emitting
three moves (movss, movaps, movss).
//===---------------------------------------------------------------------===//
External test Nurbs exposed some problems. Look for
__ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc
emits:

View File

@ -390,44 +390,6 @@ require a copy to be inserted (in X86InstrInfo::convertToThreeAddress).
//===---------------------------------------------------------------------===//
This code generates ugly code, probably due to costs being off or something:
void %test(float* %P, <4 x float>* %P2 ) {
%xFloat0.688 = load float* %P
%loadVector37.712 = load <4 x float>* %P2
%inFloat3.713 = insertelement <4 x float> %loadVector37.712, float 0.000000e+00, uint 3
store <4 x float> %inFloat3.713, <4 x float>* %P2
ret void
}
Generates:
_test:
pxor %xmm0, %xmm0
movd %xmm0, %eax ;; EAX = 0!
movl 8(%esp), %ecx
movaps (%ecx), %xmm0
pinsrw $6, %eax, %xmm0
shrl $16, %eax ;; EAX = 0 again!
pinsrw $7, %eax, %xmm0
movaps %xmm0, (%ecx)
ret
It would be better to generate:
_test:
movl 8(%esp), %ecx
movaps (%ecx), %xmm0
xor %eax, %eax
pinsrw $6, %eax, %xmm0
pinsrw $7, %eax, %xmm0
movaps %xmm0, (%ecx)
ret
or use pxor (to make a zero vector) and shuffle (to insert it).
//===---------------------------------------------------------------------===//
Bad codegen:
char foo(int x) { return x; }