Significantly simplify and improve handling of FP function results on x86-32.
This case returns the value in ST(0) and then has to convert it to an SSE
register. This causes significant codegen ugliness in some cases. For
example in the trivial fp-stack-direct-ret.ll testcase we used to generate:
subl $28, %esp
call L_foo$stub
fstpl 16(%esp)
movsd 16(%esp), %xmm0
movsd %xmm0, 8(%esp)
fldl 8(%esp)
addl $28, %esp
because we move the result of foo() into an XMM register, then have to
move it back for the return of bar.
Instead of hacking ever-more special cases into the call result lowering code
we take a much simpler approach: on x86-32, fp return is modeled as always
returning into an f80 register which is then truncated to f32 or f64 as needed.
Similarly for a result, we model it as an extension to f80 + return.
This exposes the truncate and extensions to the dag combiner, allowing target
independent code to hack on them, eliminating them in this case. This gives
us this code for the example above:
subl $12, %esp
call L_foo$stub
addl $12, %esp
The nasty aspect of this is that these conversions are not legal, but we want
the second pass of dag combiner (post-legalize) to be able to hack on them.
To handle this, we lie to legalize and say they are legal, then custom expand
them on entry to the isel pass (PreprocessForFPConvert). This is gross, but
less gross than the code it is replacing :)
This also allows us to generate better code in several other cases. For
example on fp-stack-ret-conv.ll, we now generate:
subl $12, %esp
call L_foo$stub
fstps 8(%esp)
movl 16(%esp), %eax
cvtss2sd 8(%esp), %xmm0
movsd %xmm0, (%eax)
addl $12, %esp
where before we produced (incidentally, the old bad code is identical to what
gcc produces):
subl $12, %esp
call L_foo$stub
fstpl (%esp)
cvtsd2ss (%esp), %xmm0
cvtss2sd %xmm0, %xmm0
movl 16(%esp), %eax
movsd %xmm0, (%eax)
addl $12, %esp
Note that we generate slightly worse code on pr1505b.ll due to a scheduling
deficiency that is unrelated to this patch.
git-svn-id: 91177308-0d34-0410-b5e6-96231b3b80d8
2008-01-24 08:07:48 +00:00
; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstpl | count 4
2007-08-15 13:36:28 +00:00
; RUN: llvm-as < %s | llc -mcpu=i486 | grep fstps | count 3
2008-01-24 06:35:44 +00:00
; PR1505
2007-07-03 00:58:37 +00:00
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
target triple = "i686-apple-darwin8"
%"struct.std::basic_ios<char,std::char_traits<char> >" = type { %"struct.std::ios_base", %"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8, i8, %"struct.std::basic_streambuf<char,std::char_traits<char> >"*, %"struct.std::ctype<char>"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"*, %"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >"* }
%"struct.std::basic_ostream<char,std::char_traits<char> >" = type { i32 (...)**, %"struct.std::basic_ios<char,std::char_traits<char> >" }
%"struct.std::basic_streambuf<char,std::char_traits<char> >" = type { i32 (...)**, i8*, i8*, i8*, i8*, i8*, i8*, %"struct.std::locale" }
%"struct.std::ctype<char>" = type { %"struct.std::locale::facet", i32*, i8, i32*, i32*, i32*, i8, [256 x i8], [256 x i8], i8 }
%"struct.std::ctype_base" = type <{ i8 }>
%"struct.std::ios_base" = type { i32 (...)**, i32, i32, i32, i32, i32, %"struct.std::ios_base::_Callback_list"*, %"struct.std::ios_base::_Words", [8 x %"struct.std::ios_base::_Words"], i32, %"struct.std::ios_base::_Words"*, %"struct.std::locale" }
%"struct.std::ios_base::_Callback_list" = type { %"struct.std::ios_base::_Callback_list"*, void (i32, %"struct.std::ios_base"*, i32)*, i32, i32 }
%"struct.std::ios_base::_Words" = type { i8*, i32 }
%"struct.std::locale" = type { %"struct.std::locale::_Impl"* }
%"struct.std::locale::_Impl" = type { i32, %"struct.std::locale::facet"**, i32, %"struct.std::locale::facet"**, i8** }
%"struct.std::locale::facet" = type { i32 (...)**, i32 }
%"struct.std::num_get<char,std::istreambuf_iterator<char, std::char_traits<char> > >" = type { %"struct.std::locale::facet" }
@a = global float 0x3FD3333340000000 ; <float*> [#uses=1]
@b = global double 6.000000e-01, align 8 ; <double*> [#uses=1]
@_ZSt8__ioinit = internal global %"struct.std::ctype_base" zeroinitializer ; <%"struct.std::ctype_base"*> [#uses=2]
@__dso_handle = external global i8* ; <i8**> [#uses=1]
@_ZSt4cout = external global %"struct.std::basic_ostream<char,std::char_traits<char> >" ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=2]
@.str = internal constant [12 x i8] c"tan float: \00" ; <[12 x i8]*> [#uses=1]
@.str1 = internal constant [13 x i8] c"tan double: \00" ; <[13 x i8]*> [#uses=1]
declare void @_ZNSt8ios_base4InitD1Ev(%"struct.std::ctype_base"*)
declare void @_ZNSt8ios_base4InitC1Ev(%"struct.std::ctype_base"*)
declare i32 @__cxa_atexit(void (i8*)*, i8*, i8*)
define i32 @main() {
%tmp6 = volatile load float* @a ; <float> [#uses=1]
%tmp9 = tail call float @tanf( float %tmp6 ) ; <float> [#uses=1]
%tmp12 = volatile load double* @b ; <double> [#uses=1]
%tmp13 = tail call double @tan( double %tmp12 ) ; <double> [#uses=1]
%tmp1314 = fptrunc double %tmp13 to float ; <float> [#uses=1]
%tmp16 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([12 x i8]* @.str, i32 0, i32 0) ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
%tmp1920 = fpext float %tmp9 to double ; <double> [#uses=1]
%tmp22 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp16, double %tmp1920 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
%tmp30 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp22 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
%tmp34 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc( %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4cout, i8* getelementptr ([13 x i8]* @.str1, i32 0, i32 0) ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
%tmp3940 = fpext float %tmp1314 to double ; <double> [#uses=1]
%tmp42 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp34, double %tmp3940 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=1]
%tmp51 = tail call %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_( %"struct.std::basic_ostream<char,std::char_traits<char> >"* %tmp42 ) ; <%"struct.std::basic_ostream<char,std::char_traits<char> >"*> [#uses=0]
ret i32 0
declare float @tanf(float)
declare double @tan(double)
declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, i8*)
declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZNSolsEd(%"struct.std::basic_ostream<char,std::char_traits<char> >"*, double)
declare %"struct.std::basic_ostream<char,std::char_traits<char> >"* @_ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_(%"struct.std::basic_ostream<char,std::char_traits<char> >"*)