; RUN: llvm-as < %s | llc -relocation-model=static -mcpu=yonah | grep {andpd.*4(%esp), %xmm} ; The double argument is at 4(esp) which is 16-byte aligned, allowing us to ; fold the load into the andpd. target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i686-apple-darwin8" @G = external global double define void @test({ double, double }* byval %z, double* %P) { entry: %tmp = getelementptr { double, double }* %z, i32 0, i32 0 ; <double*> [#uses=1] %tmp1 = load double* %tmp, align 8 ; <double> [#uses=1] %tmp2 = tail call double @fabs( double %tmp1 ) ; <double> [#uses=1] %tmp3 = load double* @G, align 16 ; <double> [#uses=1] %tmp4 = tail call double @fabs( double %tmp3 ) ; <double> [#uses=1] %tmp6 = add double %tmp4, %tmp2 ; <double> [#uses=1] store double %tmp6, double* %P, align 8 ret void } declare double @fabs(double)