mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 00:32:55 +00:00
upgrade some entries, remove stuff that is done.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@47109 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ea1cddf546
commit
eb05f90c71
@ -56,22 +56,23 @@ store tmp -> [xslot]
|
||||
time, not at spiller time). *Note* however that this can only be done
|
||||
if Y is dead. Here's a testcase:
|
||||
|
||||
%.str_3 = external global [15 x sbyte] ; <[15 x sbyte]*> [#uses=0]
|
||||
implementation ; Functions:
|
||||
declare void %printf(int, ...)
|
||||
void %main() {
|
||||
@.str_3 = external global [15 x i8] ; <[15 x i8]*> [#uses=0]
|
||||
declare void @printf(i32, ...)
|
||||
define void @main() {
|
||||
build_tree.exit:
|
||||
br label %no_exit.i7
|
||||
no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit
|
||||
%tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.34.i18, %no_exit.i7 ] ; <double> [#uses=1]
|
||||
%tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.28.i16, %no_exit.i7 ] ; <double> [#uses=1]
|
||||
%tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00
|
||||
%tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00
|
||||
br bool false, label %Compute_Tree.exit23, label %no_exit.i7
|
||||
Compute_Tree.exit23: ; preds = %no_exit.i7
|
||||
tail call void (int, ...)* %printf( int 0 )
|
||||
store double %tmp.34.i18, double* null
|
||||
ret void
|
||||
br label %no_exit.i7
|
||||
|
||||
no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit
|
||||
%tmp.0.1.0.i9 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.34.i18, %no_exit.i7 ] ; <double> [#uses=1]
|
||||
%tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ], [ %tmp.28.i16, %no_exit.i7 ] ; <double> [#uses=1]
|
||||
%tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00 ; <double> [#uses=1]
|
||||
%tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00 ; <double> [#uses=2]
|
||||
br i1 false, label %Compute_Tree.exit23, label %no_exit.i7
|
||||
|
||||
Compute_Tree.exit23: ; preds = %no_exit.i7
|
||||
tail call void (i32, ...)* @printf( i32 0 )
|
||||
store double %tmp.34.i18, double* null
|
||||
ret void
|
||||
}
|
||||
|
||||
We currently emit:
|
||||
@ -125,25 +126,6 @@ more experiments on different x86 machines.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Currently the x86 codegen isn't very good at mixing SSE and FPStack
|
||||
code:
|
||||
|
||||
unsigned int foo(double x) { return x; }
|
||||
|
||||
foo:
|
||||
subl $20, %esp
|
||||
movsd 24(%esp), %xmm0
|
||||
movsd %xmm0, 8(%esp)
|
||||
fldl 8(%esp)
|
||||
fisttpll (%esp)
|
||||
movl (%esp), %eax
|
||||
addl $20, %esp
|
||||
ret
|
||||
|
||||
This will be solved when we go to a dynamic programming based isel.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Lower memcpy / memset to a series of SSE 128 bit move instructions when it's
|
||||
feasible.
|
||||
|
||||
|
@ -435,44 +435,6 @@ require a copy to be inserted (in X86InstrInfo::convertToThreeAddress).
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Consider this:
|
||||
|
||||
typedef struct pair { float A, B; } pair;
|
||||
void pairtest(pair P, float *FP) {
|
||||
*FP = P.A+P.B;
|
||||
}
|
||||
|
||||
We currently generate this code with llvmgcc4:
|
||||
|
||||
_pairtest:
|
||||
movl 8(%esp), %eax
|
||||
movl 4(%esp), %ecx
|
||||
movd %eax, %xmm0
|
||||
movd %ecx, %xmm1
|
||||
addss %xmm0, %xmm1
|
||||
movl 12(%esp), %eax
|
||||
movss %xmm1, (%eax)
|
||||
ret
|
||||
|
||||
we should be able to generate:
|
||||
_pairtest:
|
||||
movss 4(%esp), %xmm0
|
||||
movl 12(%esp), %eax
|
||||
addss 8(%esp), %xmm0
|
||||
movss %xmm0, (%eax)
|
||||
ret
|
||||
|
||||
The issue is that llvmgcc4 is forcing the struct to memory, then passing it as
|
||||
integer chunks. It does this so that structs like {short,short} are passed in
|
||||
a single 32-bit integer stack slot. We should handle the safe cases above much
|
||||
nicer, while still handling the hard cases.
|
||||
|
||||
While true in general, in this specific case we could do better by promoting
|
||||
load int + bitcast to float -> load fload. This basically needs alignment info,
|
||||
the code is already implemented (but disabled) in dag combine).
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
Another instruction selector deficiency:
|
||||
|
||||
void %bar() {
|
||||
@ -551,25 +513,24 @@ do not make use of.
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
int %foo(int* %a, int %t) {
|
||||
define i32 @foo(i32* %a, i32 %t) {
|
||||
entry:
|
||||
br label %cond_true
|
||||
br label %cond_true
|
||||
|
||||
cond_true: ; preds = %cond_true, %entry
|
||||
%x.0.0 = phi int [ 0, %entry ], [ %tmp9, %cond_true ]
|
||||
%t_addr.0.0 = phi int [ %t, %entry ], [ %tmp7, %cond_true ]
|
||||
%tmp2 = getelementptr int* %a, int %x.0.0
|
||||
%tmp3 = load int* %tmp2 ; <int> [#uses=1]
|
||||
%tmp5 = add int %t_addr.0.0, %x.0.0 ; <int> [#uses=1]
|
||||
%tmp7 = add int %tmp5, %tmp3 ; <int> [#uses=2]
|
||||
%tmp9 = add int %x.0.0, 1 ; <int> [#uses=2]
|
||||
%tmp = setgt int %tmp9, 39 ; <bool> [#uses=1]
|
||||
br bool %tmp, label %bb12, label %cond_true
|
||||
cond_true: ; preds = %cond_true, %entry
|
||||
%x.0.0 = phi i32 [ 0, %entry ], [ %tmp9, %cond_true ] ; <i32> [#uses=3]
|
||||
%t_addr.0.0 = phi i32 [ %t, %entry ], [ %tmp7, %cond_true ] ; <i32> [#uses=1]
|
||||
%tmp2 = getelementptr i32* %a, i32 %x.0.0 ; <i32*> [#uses=1]
|
||||
%tmp3 = load i32* %tmp2 ; <i32> [#uses=1]
|
||||
%tmp5 = add i32 %t_addr.0.0, %x.0.0 ; <i32> [#uses=1]
|
||||
%tmp7 = add i32 %tmp5, %tmp3 ; <i32> [#uses=2]
|
||||
%tmp9 = add i32 %x.0.0, 1 ; <i32> [#uses=2]
|
||||
%tmp = icmp sgt i32 %tmp9, 39 ; <i1> [#uses=1]
|
||||
br i1 %tmp, label %bb12, label %cond_true
|
||||
|
||||
bb12: ; preds = %cond_true
|
||||
ret int %tmp7
|
||||
bb12: ; preds = %cond_true
|
||||
ret i32 %tmp7
|
||||
}
|
||||
|
||||
is pessimized by -loop-reduce and -indvars
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
@ -704,9 +665,9 @@ The add\sub pair is really unneeded here.
|
||||
|
||||
Consider the expansion of:
|
||||
|
||||
uint %test3(uint %X) {
|
||||
%tmp1 = rem uint %X, 255
|
||||
ret uint %tmp1
|
||||
define i32 @test3(i32 %X) {
|
||||
%tmp1 = urem i32 %X, 255
|
||||
ret i32 %tmp1
|
||||
}
|
||||
|
||||
Currently it compiles to:
|
||||
@ -948,22 +909,22 @@ Another example is:
|
||||
;; allocator turns the shift into an LEA. This also occurs for ADD.
|
||||
|
||||
; Check that the shift gets turned into an LEA.
|
||||
; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -x86-asm-syntax=intel | \
|
||||
; RUN: llvm-as < %s | llc -march=x86 -x86-asm-syntax=intel | \
|
||||
; RUN: not grep {mov E.X, E.X}
|
||||
|
||||
%G = external global int
|
||||
@G = external global i32 ; <i32*> [#uses=3]
|
||||
|
||||
int %test1(int %X, int %Y) {
|
||||
%Z = add int %X, %Y
|
||||
volatile store int %Y, int* %G
|
||||
volatile store int %Z, int* %G
|
||||
ret int %X
|
||||
define i32 @test1(i32 %X, i32 %Y) {
|
||||
%Z = add i32 %X, %Y ; <i32> [#uses=1]
|
||||
volatile store i32 %Y, i32* @G
|
||||
volatile store i32 %Z, i32* @G
|
||||
ret i32 %X
|
||||
}
|
||||
|
||||
int %test2(int %X) {
|
||||
%Z = add int %X, 1 ;; inc
|
||||
volatile store int %Z, int* %G
|
||||
ret int %X
|
||||
define i32 @test2(i32 %X) {
|
||||
%Z = add i32 %X, 1 ; <i32> [#uses=1]
|
||||
volatile store i32 %Z, i32* @G
|
||||
ret i32 %X
|
||||
}
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
@ -1238,37 +1199,6 @@ suggests using the 32-bit register (which is what ICC uses).
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
rdar://5506677 - We compile this:
|
||||
|
||||
define i32 @foo(double %x) {
|
||||
%x14 = bitcast double %x to i64 ; <i64> [#uses=1]
|
||||
%tmp713 = trunc i64 %x14 to i32 ; <i32> [#uses=1]
|
||||
%tmp8 = and i32 %tmp713, 2147483647 ; <i32> [#uses=1]
|
||||
ret i32 %tmp8
|
||||
}
|
||||
|
||||
to:
|
||||
|
||||
_foo:
|
||||
subl $12, %esp
|
||||
fldl 16(%esp)
|
||||
fstpl (%esp)
|
||||
movl $2147483647, %eax
|
||||
andl (%esp), %eax
|
||||
addl $12, %esp
|
||||
#FP_REG_KILL
|
||||
ret
|
||||
|
||||
It would be much better to eliminate the fldl/fstpl by folding the bitcast
|
||||
into the load SDNode. That would give us:
|
||||
|
||||
_foo:
|
||||
movl $2147483647, %eax
|
||||
andl 4(%esp), %eax
|
||||
ret
|
||||
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
We compile this:
|
||||
|
||||
void compare (long long foo) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user