Updates.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@30245 91177308-0d34-0410-b5e6-96231b3b80d8
2025-11-23 00:20:25 +00:00 · 2006-09-11 05:35:17 +00:00
parent 0f4aa6ee20
commit f47d167c3b
2 changed files with 0 additions and 154 deletions
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -147,32 +147,6 @@ and ISD::FMAX node types?

 //===---------------------------------------------------------------------===//

-The first BB of this code:
-
-declare bool %foo()
-int %bar() {
-        %V = call bool %foo()
-        br bool %V, label %T, label %F
-T:
-        ret int 1
-F:
-        call bool %foo()
-        ret int 12
-}
-
-compiles to:
-
-_bar:
-        subl $12, %esp
-        call L_foo$stub
-        xorb $1, %al
-        testb %al, %al
-        jne LBB_bar_2   # F
-
-It would be better to emit "cmp %al, 1" than a xor and test.
-
-//===---------------------------------------------------------------------===//
-
 Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
 feasible.

@@ -274,33 +248,6 @@ instead of por and movdqa. Does it matter?

 //===---------------------------------------------------------------------===//

-Use movddup to splat a v2f64 directly from a memory source. e.g.
-
-#include <emmintrin.h>
-
-void test(__m128d *r, double A) {
-  *r = _mm_set1_pd(A);
-}
-
-llc:
-
-_test:
-	movsd 8(%esp), %xmm0
-	unpcklpd %xmm0, %xmm0
-	movl 4(%esp), %eax
-	movapd %xmm0, (%eax)
-	ret
-
-icc:
-
-_test:
-	movl 4(%esp), %eax
-	movddup 8(%esp), %xmm0
-	movapd %xmm0, (%eax)
-	ret
-
-//===---------------------------------------------------------------------===//
-
 X86RegisterInfo::copyRegToReg() returns X86::MOVAPSrr for VR128. Is it possible
 to choose between movaps, movapd, and movdqa based on types of source and
 destination?
@@ -311,69 +258,6 @@ shorter.

 //===---------------------------------------------------------------------===//

-We are emitting bad code for this:
-
-float %test(float* %V, int %I, int %D, float %V) {
-entry:
-	%tmp = seteq int %D, 0
-	br bool %tmp, label %cond_true, label %cond_false23
-
-cond_true:
-	%tmp3 = getelementptr float* %V, int %I
-	%tmp = load float* %tmp3
-	%tmp5 = setgt float %tmp, %V
-	%tmp6 = tail call bool %llvm.isunordered.f32( float %tmp, float %V )
-	%tmp7 = or bool %tmp5, %tmp6
-	br bool %tmp7, label %UnifiedReturnBlock, label %cond_next
-
-cond_next:
-	%tmp10 = add int %I, 1
-	%tmp12 = getelementptr float* %V, int %tmp10
-	%tmp13 = load float* %tmp12
-	%tmp15 = setle float %tmp13, %V
-	%tmp16 = tail call bool %llvm.isunordered.f32( float %tmp13, float %V )
-	%tmp17 = or bool %tmp15, %tmp16
-	%retval = select bool %tmp17, float 0.000000e+00, float 1.000000e+00
-	ret float %retval
-
-cond_false23:
-	%tmp28 = tail call float %foo( float* %V, int %I, int %D, float %V )
-	ret float %tmp28
-
-UnifiedReturnBlock:		; preds = %cond_true
-	ret float 0.000000e+00
-}
-
-declare bool %llvm.isunordered.f32(float, float)
-
-declare float %foo(float*, int, int, float)
-
-
-It exposes a known load folding problem:
-
-	movss (%edx,%ecx,4), %xmm1
-	ucomiss %xmm1, %xmm0
-
-As well as this:
-
-LBB_test_2:	# cond_next
-	movss LCPI1_0, %xmm2
-	pxor %xmm3, %xmm3
-	ucomiss %xmm0, %xmm1
-	jbe LBB_test_6	# cond_next
-LBB_test_5:	# cond_next
-	movaps %xmm2, %xmm3
-LBB_test_6:	# cond_next
-	movss %xmm3, 40(%esp)
-	flds 40(%esp)
-	addl $44, %esp
-	ret
-
-Clearly it's unnecessary to clear %xmm3. It's also not clear why we are emitting
-three moves (movss, movaps, movss).
-
-//===---------------------------------------------------------------------===//
-
 External test Nurbs exposed some problems. Look for
 __ZN15Nurbs_SSE_Cubic17TessellateSurfaceE, bb cond_next140. This is what icc
 emits:
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -390,44 +390,6 @@ require a copy to be inserted (in X86InstrInfo::convertToThreeAddress).

 //===---------------------------------------------------------------------===//

-This code generates ugly code, probably due to costs being off or something:
-
-void %test(float* %P, <4 x float>* %P2 ) {
-        %xFloat0.688 = load float* %P
-        %loadVector37.712 = load <4 x float>* %P2
-        %inFloat3.713 = insertelement <4 x float> %loadVector37.712, float 0.000000e+00, uint 3
-        store <4 x float> %inFloat3.713, <4 x float>* %P2
-        ret void
-}
-
-Generates:
-
-_test:
-        pxor %xmm0, %xmm0
-        movd %xmm0, %eax        ;; EAX = 0!
-        movl 8(%esp), %ecx
-        movaps (%ecx), %xmm0
-        pinsrw $6, %eax, %xmm0
-        shrl $16, %eax          ;; EAX = 0 again!
-        pinsrw $7, %eax, %xmm0
-        movaps %xmm0, (%ecx)
-        ret
-
-It would be better to generate:
-
-_test:
-        movl 8(%esp), %ecx
-        movaps (%ecx), %xmm0
-	xor %eax, %eax
-        pinsrw $6, %eax, %xmm0
-        pinsrw $7, %eax, %xmm0
-        movaps %xmm0, (%ecx)
-        ret
-
-or use pxor (to make a zero vector) and shuffle (to insert it).
-
-//===---------------------------------------------------------------------===//
-
 Bad codegen:

 char foo(int x) { return x; }