From eb05f90c71fc7b42e52d4f363fd8325bace9a0e7 Mon Sep 17 00:00:00 2001
From: Chris Lattner <sabre@nondot.org>
Date: Thu, 14 Feb 2008 06:19:02 +0000
Subject: [PATCH] upgrade some entries, remove stuff that is done.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47109 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/X86/README-SSE.txt |  50 +++++---------
 lib/Target/X86/README.txt     | 126 ++++++++--------------------------
 2 files changed, 44 insertions(+), 132 deletions(-)
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 197dae38c6f..2ccabca2fc7 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -56,22 +56,23 @@ store tmp -> [xslot]
 time, not at spiller time).  *Note* however that this can only be done
 if Y is dead.  Here's a testcase:
 
-%.str_3 = external global [15 x sbyte]          ; <[15 x sbyte]*> [#uses=0]
-implementation   ; Functions:
-declare void %printf(int, ...)
-void %main() {
+@.str_3 = external global [15 x i8]		; <[15 x i8]*> [#uses=0]
+declare void @printf(i32, ...)
+define void @main() {
 build_tree.exit:
-        br label %no_exit.i7
-no_exit.i7:             ; preds = %no_exit.i7, %build_tree.exit
-        %tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.34.i18, %no_exit.i7 ]      ; <double> [#uses=1]
-        %tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.28.i16, %no_exit.i7 ]     ; <double> [#uses=1]
-        %tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00
-        %tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00
-        br bool false, label %Compute_Tree.exit23, label %no_exit.i7
-Compute_Tree.exit23:            ; preds = %no_exit.i7
-        tail call void (int, ...)* %printf( int 0 )
-        store double %tmp.34.i18, double* null
-        ret void
+	br label %no_exit.i7
+
+no_exit.i7:		; preds = %no_exit.i7, %build_tree.exit
+	%tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.34.i18, %no_exit.i7 ]		; <double> [#uses=1]
+	%tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.28.i16, %no_exit.i7 ]		; <double> [#uses=1]
+	%tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00		; <double> [#uses=1]
+	%tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00		; <double> [#uses=2]
+	br i1 false, label %Compute_Tree.exit23, label %no_exit.i7
+
+Compute_Tree.exit23:		; preds = %no_exit.i7
+	tail call void (i32, ...)* @printf( i32 0 )
+	store double %tmp.34.i18, double* null
+	ret void
 }
 
 We currently emit:
@@ -125,25 +126,6 @@ more experiments on different x86 machines.
 
 //===---------------------------------------------------------------------===//
 
-Currently the x86 codegen isn't very good at mixing SSE and FPStack
-code:
-
-unsigned int foo(double x) { return x; }
-
-foo:
-	subl $20, %esp
-	movsd 24(%esp), %xmm0
-	movsd %xmm0, 8(%esp)
-	fldl 8(%esp)
-	fisttpll (%esp)
-	movl (%esp), %eax
-	addl $20, %esp
-	ret
-
-This will be solved when we go to a dynamic programming based isel.
-
-//===---------------------------------------------------------------------===//
-
 Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
 feasible.
 
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index e9f0d7338b3..5a4f7c4e5f1 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -435,44 +435,6 @@ require a copy to be inserted (in X86InstrInfo::convertToThreeAddress).
 
 //===---------------------------------------------------------------------===//
 
-Consider this:
-
-typedef struct pair { float A, B; } pair;
-void pairtest(pair P, float *FP) {
-        *FP = P.A+P.B;
-}
-
-We currently generate this code with llvmgcc4:
-
-_pairtest:
-        movl 8(%esp), %eax
-        movl 4(%esp), %ecx
-        movd %eax, %xmm0
-        movd %ecx, %xmm1
-        addss %xmm0, %xmm1
-        movl 12(%esp), %eax
-        movss %xmm1, (%eax)
-        ret
-
-we should be able to generate:
-_pairtest:
-        movss 4(%esp), %xmm0
-        movl 12(%esp), %eax
-        addss 8(%esp), %xmm0
-        movss %xmm0, (%eax)
-        ret
-
-The issue is that llvmgcc4 is forcing the struct to memory, then passing it as
-integer chunks.  It does this so that structs like {short,short} are passed in
-a single 32-bit integer stack slot.  We should handle the safe cases above much
-nicer, while still handling the hard cases.
-
-While true in general, in this specific case we could do better by promoting
-load int + bitcast to float -> load fload.  This basically needs alignment info,
-the code is already implemented (but disabled) in dag combine).
-
-//===---------------------------------------------------------------------===//
-
 Another instruction selector deficiency:
 
 void %bar() {
@@ -551,25 +513,24 @@ do not make use of.
 
 //===---------------------------------------------------------------------===//
 
-int %foo(int* %a, int %t) {
+define i32 @foo(i32* %a, i32 %t) {
 entry:
-        br label %cond_true
+	br label %cond_true
 
-cond_true:              ; preds = %cond_true, %entry
-        %x.0.0 = phi int [ 0, %entry ], [ %tmp9, %cond_true ]  
-        %t_addr.0.0 = phi int [ %t, %entry ], [ %tmp7, %cond_true ]
-        %tmp2 = getelementptr int* %a, int %x.0.0              
-        %tmp3 = load int* %tmp2         ; <int> [#uses=1]
-        %tmp5 = add int %t_addr.0.0, %x.0.0             ; <int> [#uses=1]
-        %tmp7 = add int %tmp5, %tmp3            ; <int> [#uses=2]
-        %tmp9 = add int %x.0.0, 1               ; <int> [#uses=2]
-        %tmp = setgt int %tmp9, 39              ; <bool> [#uses=1]
-        br bool %tmp, label %bb12, label %cond_true
+cond_true:		; preds = %cond_true, %entry
+	%x.0.0 = phi i32 [ 0, %entry ], [ %tmp9, %cond_true ]		; <i32> [#uses=3]
+	%t_addr.0.0 = phi i32 [ %t, %entry ], [ %tmp7, %cond_true ]		; <i32> [#uses=1]
+	%tmp2 = getelementptr i32* %a, i32 %x.0.0		; <i32*> [#uses=1]
+	%tmp3 = load i32* %tmp2		; <i32> [#uses=1]
+	%tmp5 = add i32 %t_addr.0.0, %x.0.0		; <i32> [#uses=1]
+	%tmp7 = add i32 %tmp5, %tmp3		; <i32> [#uses=2]
+	%tmp9 = add i32 %x.0.0, 1		; <i32> [#uses=2]
+	%tmp = icmp sgt i32 %tmp9, 39		; <i1> [#uses=1]
+	br i1 %tmp, label %bb12, label %cond_true
 
-bb12:           ; preds = %cond_true
-        ret int %tmp7
+bb12:		; preds = %cond_true
+	ret i32 %tmp7
 }
-
 is pessimized by -loop-reduce and -indvars
 
 //===---------------------------------------------------------------------===//
@@ -704,9 +665,9 @@ The add\sub pair is really unneeded here.
 
 Consider the expansion of:
 
-uint %test3(uint %X) {
-        %tmp1 = rem uint %X, 255
-        ret uint %tmp1
+define i32 @test3(i32 %X) {
+        %tmp1 = urem i32 %X, 255
+        ret i32 %tmp1
 }
 
 Currently it compiles to:
@@ -948,22 +909,22 @@ Another example is:
 ;; allocator turns the shift into an LEA.  This also occurs for ADD.
 
 ; Check that the shift gets turned into an LEA.
-; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -x86-asm-syntax=intel | \
+; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
 ; RUN:   not grep {mov E.X, E.X}
 
-%G = external global int
+@G = external global i32		; <i32*> [#uses=3]
 
-int %test1(int %X, int %Y) {
-        %Z = add int %X, %Y
-        volatile store int %Y, int* %G
-        volatile store int %Z, int* %G
-        ret int %X
+define i32 @test1(i32 %X, i32 %Y) {
+	%Z = add i32 %X, %Y		; <i32> [#uses=1]
+	volatile store i32 %Y, i32* @G
+	volatile store i32 %Z, i32* @G
+	ret i32 %X
 }
 
-int %test2(int %X) {
-        %Z = add int %X, 1  ;; inc
-        volatile store int %Z, int* %G
-        ret int %X
+define i32 @test2(i32 %X) {
+	%Z = add i32 %X, 1		; <i32> [#uses=1]
+	volatile store i32 %Z, i32* @G
+	ret i32 %X
 }
 
 //===---------------------------------------------------------------------===//
@@ -1238,37 +1199,6 @@ suggests using the 32-bit register (which is what ICC uses).
 
 //===---------------------------------------------------------------------===//
 
-rdar://5506677 - We compile this:
-
-define i32 @foo(double %x) {
-        %x14 = bitcast double %x to i64         ; <i64> [#uses=1]
-        %tmp713 = trunc i64 %x14 to i32         ; <i32> [#uses=1]
-        %tmp8 = and i32 %tmp713, 2147483647             ; <i32> [#uses=1]
-        ret i32 %tmp8
-}
-
-to:
-
-_foo:
-        subl    $12, %esp
-        fldl    16(%esp)
-        fstpl   (%esp)
-        movl    $2147483647, %eax
-        andl    (%esp), %eax
-        addl    $12, %esp
-        #FP_REG_KILL
-        ret
-
-It would be much better to eliminate the fldl/fstpl by folding the bitcast 
-into the load SDNode.  That would give us:
-
-_foo:
-        movl    $2147483647, %eax
-        andl    4(%esp), %eax
-        ret
-
-//===---------------------------------------------------------------------===//
-
 We compile this:
 
 void compare (long long foo) {