[X86] Use 4 byte preferred aggregate alignment on Win32

This helps reduce the frequency of stack realignment prologues in 32-bit X86 Windows code. Before this change and the corresponding clang change, we would take the max of the type preferred alignment and the explicit alignment on the alloca. If you don't override aggregate alignment in datalayout, you get a default of 8. This dates back to 2007 / r34356, and changing it seems prohibitively difficult at this point. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@236270 91177308-0d34-0410-b5e6-96231b3b80d8
2024-09-30 04:56:49 +00:00 · 2015-04-30 22:11:59 +00:00 · 2015-04-30 22:11:59 +00:00 · 7a1b190bcd
commit 7a1b190bcd
parent af2e236c11
5 changed files with 24 additions and 27 deletions
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@ -81,7 +81,7 @@ static std::string computeDataLayout(const Triple &TT) {

  // The stack is aligned to 32 bits on some ABIs and 128 bits on others.
  if (!TT.isArch64Bit() && TT.isOSWindows())
-    Ret += "-S32";
+    Ret += "-a:0:32-S32";
  else
    Ret += "-S128";

--- a/test/CodeGen/X86/inalloca-invoke.ll
+++ b/test/CodeGen/X86/inalloca-invoke.ll
@ -21,8 +21,9 @@ blah:
  %end = getelementptr %frame.reverse, %frame.reverse* %rev_args, i32 0, i32 1

 ; CHECK:  calll   __chkstk
-; CHECK:  movl    %[[beg:[^,]*]], %esp
-; CHECK:  leal    12(%[[beg]]), %[[end:[^ ]*]]
+; CHECK:  movl %esp, %[[beg:[^ ]*]]
+; CHECK:  movl %esp, %[[end:[^ ]*]]
+; CHECK:  addl $12, %[[end]]

  call void @begin(%Iter* sret %temp.lvalue)
 ; CHECK:  calll _begin
--- a/test/CodeGen/X86/inalloca-stdcall.ll
+++ b/test/CodeGen/X86/inalloca-stdcall.ll
@ -10,13 +10,12 @@ define void @g() {
  %b = alloca inalloca %Foo
 ; CHECK: movl    $8, %eax
 ; CHECK: calll   __chkstk
-; CHECK: movl   %[[REG:[^,]*]], %esp
  %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
  %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
  store i32 13, i32* %f1
  store i32 42, i32* %f2
-; CHECK: movl    $13, (%[[REG]])
-; CHECK: movl    $42, 4(%[[REG]])
+; CHECK: movl    $13, (%esp)
+; CHECK: movl    $42, 4(%esp)
  call x86_stdcallcc void @f(%Foo* inalloca %b)
 ; CHECK: calll   _f@8
 ; CHECK-NOT: %esp
--- a/test/CodeGen/X86/inalloca.ll
+++ b/test/CodeGen/X86/inalloca.ll
@ -10,13 +10,12 @@ entry:
  %b = alloca inalloca %Foo
 ; CHECK: movl    $8, %eax
 ; CHECK: calll   __chkstk
-; CHECK: movl   %[[REG:[^,]*]], %esp
  %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
  %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
  store i32 13, i32* %f1
  store i32 42, i32* %f2
-; CHECK: movl    $13, (%[[REG]])
-; CHECK: movl    $42, 4(%[[REG]])
+; CHECK: movl    $13, (%esp)
+; CHECK: movl    $42, 4(%esp)
  call void @f(%Foo* inalloca %b)
 ; CHECK: calll   _f
  ret void
@ -30,13 +29,12 @@ entry:
  %b = alloca inalloca %Foo
 ; CHECK: movl    $8, %eax
 ; CHECK: calll   __chkstk
-; CHECK: movl   %[[REG:[^,]*]], %esp
  %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
  %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
  store i32 13, i32* %f1
  store i32 42, i32* %f2
-; CHECK: movl    $13, (%[[REG]])
-; CHECK: movl    $42, 4(%[[REG]])
+; CHECK: movl    $13, (%esp)
+; CHECK: movl    $42, 4(%esp)
  call void @inreg_with_inalloca(i32 inreg 1, %Foo* inalloca %b)
 ; CHECK: movl    $1, %eax
 ; CHECK: calll   _inreg_with_inalloca
@ -51,13 +49,12 @@ entry:
  %b = alloca inalloca %Foo
 ; CHECK: movl    $8, %eax
 ; CHECK: calll   __chkstk
-; CHECK: movl   %[[REG:[^,]*]], %esp
  %f1 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
  %f2 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
  store i32 13, i32* %f1
  store i32 42, i32* %f2
-; CHECK-DAG: movl    $13, (%[[REG]])
-; CHECK-DAG: movl    $42, 4(%[[REG]])
+; CHECK-DAG: movl    $13, (%esp)
+; CHECK-DAG: movl    $42, 4(%esp)
  call x86_thiscallcc void @thiscall_with_inalloca(i8* null, %Foo* inalloca %b)
 ; CHECK-DAG: xorl    %ecx, %ecx
 ; CHECK: calll   _thiscall_with_inalloca
--- a/test/CodeGen/X86/win32_sret.ll
+++ b/test/CodeGen/X86/win32_sret.ll
@ -155,25 +155,25 @@ define void @test6_f(%struct.test6* %x) nounwind {
 ; LINUX-LABEL: test6_f:

 ; The %x argument is moved to %ecx. It will be the this pointer.
-; WIN32: movl    8(%ebp), %ecx
+; WIN32: movl    20(%esp), %ecx

 ; The %x argument is moved to (%esp). It will be the this pointer. With -O0
 ; we copy esp to ecx and use (ecx) instead of (esp).
-; MINGW_X86: movl    8(%ebp), %eax
+; MINGW_X86: movl    20(%esp), %eax
 ; MINGW_X86: movl    %eax, (%e{{([a-d]x)|(sp)}})

-; CYGWIN: movl    8(%ebp), %eax
+; CYGWIN: movl    20(%esp), %eax
 ; CYGWIN: movl    %eax, (%e{{([a-d]x)|(sp)}})

 ; The sret pointer is (%esp)
-; WIN32:          leal    8(%esp), %[[REG:e[a-d]x]]
+; WIN32:          leal    4(%esp), %[[REG:e[a-d]x]]
 ; WIN32-NEXT:     movl    %[[REG]], (%e{{([a-d]x)|(sp)}})

 ; The sret pointer is %ecx
-; MINGW_X86-NEXT: leal    8(%esp), %ecx
+; MINGW_X86-NEXT: leal    4(%esp), %ecx
 ; MINGW_X86-NEXT: calll   _test6_g

-; CYGWIN-NEXT: leal    8(%esp), %ecx
+; CYGWIN-NEXT: leal    4(%esp), %ecx
 ; CYGWIN-NEXT: calll   _test6_g

  %tmp = alloca %struct.test6, align 4
@ -191,16 +191,16 @@ define void @test7_f(%struct.test7* %x) nounwind {
 ; LINUX-LABEL: test7_f:

 ; The %x argument is moved to %ecx on all OSs. It will be the this pointer.
-; WIN32:      movl    8(%ebp), %ecx
-; MINGW_X86:  movl    8(%ebp), %ecx
-; CYGWIN:     movl    8(%ebp), %ecx
+; WIN32:      movl    20(%esp), %ecx
+; MINGW_X86:  movl    20(%esp), %ecx
+; CYGWIN:     movl    20(%esp), %ecx

 ; The sret pointer is (%esp)
-; WIN32:          leal    8(%esp), %[[REG:e[a-d]x]]
+; WIN32:          leal    4(%esp), %[[REG:e[a-d]x]]
 ; WIN32-NEXT:     movl    %[[REG]], (%e{{([a-d]x)|(sp)}})
-; MINGW_X86:      leal    8(%esp), %[[REG:e[a-d]x]]
+; MINGW_X86:      leal    4(%esp), %[[REG:e[a-d]x]]
 ; MINGW_X86-NEXT: movl    %[[REG]], (%e{{([a-d]x)|(sp)}})
-; CYGWIN:         leal    8(%esp), %[[REG:e[a-d]x]]
+; CYGWIN:         leal    4(%esp), %[[REG:e[a-d]x]]
 ; CYGWIN-NEXT:    movl    %[[REG]], (%e{{([a-d]x)|(sp)}})

  %tmp = alloca %struct.test7, align 4