[x32] Fix segmented stacks support

Summary: Update segmented-stacks*.ll tests with x32 target case and make corresponding changes to make them pass. Test Plan: tests updated with x32 target Reviewers: nadav, rafael, dschuff Subscribers: llvm-commits, zinovy.nis Differential Revision: http://reviews.llvm.org/D5245 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@218247 91177308-0d34-0410-b5e6-96231b3b80d8
2024-09-30 04:56:49 +00:00 · 2014-09-22 13:11:35 +00:00 · 2014-09-22 13:11:35 +00:00 · 25c57d5cfe
commit 25c57d5cfe
parent 21e5bf8461
9 changed files with 143 additions and 42 deletions
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@ -226,6 +226,7 @@ def CC_X86_64_C : CallingConv<[
  CCIfType<[i8, i16], CCPromoteToType<i32>>,

  // The 'nest' parameter, if any, is passed in R10.
+  CCIfNest<CCIfSubtarget<"isTarget64BitILP32()", CCAssignToReg<[R10D]>>>,
  CCIfNest<CCAssignToReg<[R10]>>,

  // The first 6 integer arguments are passed in integer registers.
--- a/lib/Target/X86/X86FrameLowering.cpp
+++ b/lib/Target/X86/X86FrameLowering.cpp
@ -1315,7 +1315,7 @@ HasNestArgument(const MachineFunction *MF) {
 /// and the properties of the function either one or two registers will be
 /// needed. Set primary to true for the first register, false for the second.
 static unsigned
-GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
+GetScratchRegister(bool Is64Bit, bool IsLP64, const MachineFunction &MF, bool Primary) {
  CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();

  // Erlang stuff.
@ -1326,8 +1326,12 @@ GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
      return Primary ? X86::EBX : X86::EDI;
  }

-  if (Is64Bit)
-    return Primary ? X86::R11 : X86::R12;
+  if (Is64Bit) {
+    if (IsLP64)
+      return Primary ? X86::R11 : X86::R12;
+    else
+      return Primary ? X86::R11D : X86::R12D;
+  }

  bool IsNested = HasNestArgument(&MF);

@ -1355,10 +1359,11 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
  uint64_t StackSize;
  const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
  bool Is64Bit = STI.is64Bit();
+  const bool IsLP64 = STI.isTarget64BitLP64();
  unsigned TlsReg, TlsOffset;
  DebugLoc DL;

-  unsigned ScratchReg = GetScratchRegister(Is64Bit, MF, true);
+  unsigned ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
  assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
         "Scratch register is live-in");

@ -1396,7 +1401,7 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
  }

  if (IsNested)
-    allocMBB->addLiveIn(X86::R10);
+    allocMBB->addLiveIn(IsLP64 ? X86::R10 : X86::R10D);

  MF.push_front(allocMBB);
  MF.push_front(checkMBB);
@ -1409,7 +1414,7 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
  if (Is64Bit) {
    if (STI.isTargetLinux()) {
      TlsReg = X86::FS;
-      TlsOffset = 0x70;
+      TlsOffset = IsLP64 ? 0x70 : 0x40;
    } else if (STI.isTargetDarwin()) {
      TlsReg = X86::GS;
      TlsOffset = 0x60 + 90*8; // See pthread_machdep.h. Steal TLS slot 90.
@ -1424,12 +1429,12 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
    }

    if (CompareStackPointer)
-      ScratchReg = X86::RSP;
+      ScratchReg = IsLP64 ? X86::RSP : X86::ESP;
    else
-      BuildMI(checkMBB, DL, TII.get(X86::LEA64r), ScratchReg).addReg(X86::RSP)
+      BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::LEA64r : X86::LEA64_32r), ScratchReg).addReg(X86::RSP)
        .addImm(1).addReg(0).addImm(-StackSize).addReg(0);

-    BuildMI(checkMBB, DL, TII.get(X86::CMP64rm)).addReg(ScratchReg)
+    BuildMI(checkMBB, DL, TII.get(IsLP64 ? X86::CMP64rm : X86::CMP32rm)).addReg(ScratchReg)
      .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg);
  } else {
    if (STI.isTargetLinux()) {
@ -1463,11 +1468,11 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
      bool SaveScratch2;
      if (CompareStackPointer) {
        // The primary scratch register is available for holding the TLS offset.
-        ScratchReg2 = GetScratchRegister(Is64Bit, MF, true);
+        ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, true);
        SaveScratch2 = false;
      } else {
        // Need to use a second register to hold the TLS offset
-        ScratchReg2 = GetScratchRegister(Is64Bit, MF, false);
+        ScratchReg2 = GetScratchRegister(Is64Bit, IsLP64, MF, false);

        // Unfortunately, with fastcc the second scratch register may hold an
        // argument.
@ -1505,15 +1510,21 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
    // Functions with nested arguments use R10, so it needs to be saved across
    // the call to _morestack

-    if (IsNested)
-      BuildMI(allocMBB, DL, TII.get(X86::MOV64rr), X86::RAX).addReg(X86::R10);
+    const unsigned RegAX = IsLP64 ? X86::RAX : X86::EAX;
+    const unsigned Reg10 = IsLP64 ? X86::R10 : X86::R10D;
+    const unsigned Reg11 = IsLP64 ? X86::R11 : X86::R11D;
+    const unsigned MOVrr = IsLP64 ? X86::MOV64rr : X86::MOV32rr;
+    const unsigned MOVri = IsLP64 ? X86::MOV64ri : X86::MOV32ri;

-    BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R10)
+    if (IsNested)
+      BuildMI(allocMBB, DL, TII.get(MOVrr), RegAX).addReg(Reg10);
+
+    BuildMI(allocMBB, DL, TII.get(MOVri), Reg10)
      .addImm(StackSize);
-    BuildMI(allocMBB, DL, TII.get(X86::MOV64ri), X86::R11)
+    BuildMI(allocMBB, DL, TII.get(MOVri), Reg11)
      .addImm(X86FI->getArgumentStackSize());
-    MF.getRegInfo().setPhysRegUsed(X86::R10);
-    MF.getRegInfo().setPhysRegUsed(X86::R11);
+    MF.getRegInfo().setPhysRegUsed(Reg10);
+    MF.getRegInfo().setPhysRegUsed(Reg11);
  } else {
    BuildMI(allocMBB, DL, TII.get(X86::PUSHi32))
      .addImm(X86FI->getArgumentStackSize());
@ -1567,6 +1578,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
          ->getSlotSize();
  const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
  const bool Is64Bit = STI.is64Bit();
+  const bool IsLP64 = STI.isTarget64BitLP64();
  DebugLoc DL;
  // HiPE-specific values
  const unsigned HipeLeafWords = 24;
@ -1660,7 +1672,7 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
      SPLimitOffset = 0x4c;
    }

-    ScratchReg = GetScratchRegister(Is64Bit, MF, true);
+    ScratchReg = GetScratchRegister(Is64Bit, IsLP64, MF, true);
    assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
           "HiPE prologue scratch register is live-in");

--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@ -14933,7 +14933,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
  EVT VT = Op.getNode()->getValueType(0);

  bool Is64Bit = Subtarget->is64Bit();
-  EVT SPTy = Is64Bit ? MVT::i64 : MVT::i32;
+  EVT SPTy = getPointerTy();

  if (SplitStack) {
    MachineRegisterInfo &MRI = MF.getRegInfo();
@ -14951,7 +14951,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
    }

    const TargetRegisterClass *AddrRegClass =
-      getRegClassFor(Subtarget->is64Bit() ? MVT::i64:MVT::i32);
+      getRegClassFor(getPointerTy());
    unsigned Vreg = MRI.createVirtualRegister(AddrRegClass);
    Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size);
    SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain,
@ -14960,7 +14960,7 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
    return DAG.getMergeValues(Ops1, dl);
  } else {
    SDValue Flag;
-    unsigned Reg = (Subtarget->is64Bit() ? X86::RAX : X86::EAX);
+    const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX);

    Chain = DAG.getCopyToReg(Chain, dl, Reg, Size, Flag);
    Flag = Chain.getValue(1);
@ -18966,8 +18966,8 @@ X86TargetLowering::EmitLoweredSelect(MachineInstr *MI,
 }

 MachineBasicBlock *
-X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
-                                        bool Is64Bit) const {
+X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI,
+                                        MachineBasicBlock *BB) const {
  MachineFunction *MF = BB->getParent();
  const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
  DebugLoc DL = MI->getDebugLoc();
@ -18975,8 +18975,11 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,

  assert(MF->shouldSplitStack());

-  unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
-  unsigned TlsOffset = Is64Bit ? 0x70 : 0x30;
+  const bool Is64Bit = Subtarget->is64Bit();
+  const bool IsLP64 = Subtarget->isTarget64BitLP64();
+
+  const unsigned TlsReg = Is64Bit ? X86::FS : X86::GS;
+  const unsigned TlsOffset = IsLP64 ? 0x70 : Is64Bit ? 0x40 : 0x30;

  // BB:
  //  ... [Till the alloca]
@ -19000,14 +19003,14 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,

  MachineRegisterInfo &MRI = MF->getRegInfo();
  const TargetRegisterClass *AddrRegClass =
-    getRegClassFor(Is64Bit ? MVT::i64:MVT::i32);
+    getRegClassFor(getPointerTy());

  unsigned mallocPtrVReg = MRI.createVirtualRegister(AddrRegClass),
    bumpSPPtrVReg = MRI.createVirtualRegister(AddrRegClass),
    tmpSPVReg = MRI.createVirtualRegister(AddrRegClass),
    SPLimitVReg = MRI.createVirtualRegister(AddrRegClass),
    sizeVReg = MI->getOperand(1).getReg(),
-    physSPReg = Is64Bit ? X86::RSP : X86::ESP;
+    physSPReg = IsLP64 || Subtarget->isTargetNaCl64() ? X86::RSP : X86::ESP;

  MachineFunction::iterator MBBIter = BB;
  ++MBBIter;
@ -19023,9 +19026,9 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
  // Add code to the main basic block to check if the stack limit has been hit,
  // and if so, jump to mallocMBB otherwise to bumpMBB.
  BuildMI(BB, DL, TII->get(TargetOpcode::COPY), tmpSPVReg).addReg(physSPReg);
-  BuildMI(BB, DL, TII->get(Is64Bit ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
+  BuildMI(BB, DL, TII->get(IsLP64 ? X86::SUB64rr:X86::SUB32rr), SPLimitVReg)
    .addReg(tmpSPVReg).addReg(sizeVReg);
-  BuildMI(BB, DL, TII->get(Is64Bit ? X86::CMP64mr:X86::CMP32mr))
+  BuildMI(BB, DL, TII->get(IsLP64 ? X86::CMP64mr:X86::CMP32mr))
    .addReg(0).addImm(1).addReg(0).addImm(TlsOffset).addReg(TlsReg)
    .addReg(SPLimitVReg);
  BuildMI(BB, DL, TII->get(X86::JG_4)).addMBB(mallocMBB);
@ -19043,7 +19046,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
                                .getSubtargetImpl()
                                ->getRegisterInfo()
                                ->getCallPreservedMask(CallingConv::C);
-  if (Is64Bit) {
+  if (IsLP64) {
    BuildMI(mallocMBB, DL, TII->get(X86::MOV64rr), X86::RDI)
      .addReg(sizeVReg);
    BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
@ -19051,6 +19054,14 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
      .addRegMask(RegMask)
      .addReg(X86::RDI, RegState::Implicit)
      .addReg(X86::RAX, RegState::ImplicitDefine);
+  } else if (Is64Bit) {
+    BuildMI(mallocMBB, DL, TII->get(X86::MOV32rr), X86::EDI)
+      .addReg(sizeVReg);
+    BuildMI(mallocMBB, DL, TII->get(X86::CALL64pcrel32))
+      .addExternalSymbol("__morestack_allocate_stack_space")
+      .addRegMask(RegMask)
+      .addReg(X86::EDI, RegState::Implicit)
+      .addReg(X86::EAX, RegState::ImplicitDefine);
  } else {
    BuildMI(mallocMBB, DL, TII->get(X86::SUB32ri), physSPReg).addReg(physSPReg)
      .addImm(12);
@ -19066,7 +19077,7 @@ X86TargetLowering::EmitLoweredSegAlloca(MachineInstr *MI, MachineBasicBlock *BB,
      .addImm(16);

  BuildMI(mallocMBB, DL, TII->get(TargetOpcode::COPY), mallocPtrVReg)
-    .addReg(Is64Bit ? X86::RAX : X86::EAX);
+    .addReg(IsLP64 ? X86::RAX : X86::EAX);
  BuildMI(mallocMBB, DL, TII->get(X86::JMP_4)).addMBB(continueMBB);

  // Set up the CFG correctly.
@ -19500,9 +19511,8 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
  case X86::WIN_ALLOCA:
    return EmitLoweredWinAlloca(MI, BB);
  case X86::SEG_ALLOCA_32:
-    return EmitLoweredSegAlloca(MI, BB, false);
  case X86::SEG_ALLOCA_64:
-    return EmitLoweredSegAlloca(MI, BB, true);
+    return EmitLoweredSegAlloca(MI, BB);
  case X86::TLSCall_32:
  case X86::TLSCall_64:
    return EmitLoweredTLSCall(MI, BB);
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@ -998,8 +998,7 @@ namespace llvm {
                                              MachineBasicBlock *BB) const;

    MachineBasicBlock *EmitLoweredSegAlloca(MachineInstr *MI,
-                                            MachineBasicBlock *BB,
-                                            bool Is64Bit) const;
+                                            MachineBasicBlock *BB) const;

    MachineBasicBlock *EmitLoweredTLSCall(MachineInstr *MI,
                                          MachineBasicBlock *BB) const;
--- a/lib/Target/X86/X86InstrCompiler.td
+++ b/lib/Target/X86/X86InstrCompiler.td
@ -46,11 +46,11 @@ let Defs = [ESP, EFLAGS], Uses = [ESP] in {
 def ADJCALLSTACKDOWN32 : I<0, Pseudo, (outs), (ins i32imm:$amt),
                           "#ADJCALLSTACKDOWN",
                           [(X86callseq_start timm:$amt)]>,
-                          Requires<[Not64BitMode]>;
+                          Requires<[NotLP64]>;
 def ADJCALLSTACKUP32   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
                           "#ADJCALLSTACKUP",
                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
-                          Requires<[Not64BitMode]>;
+                          Requires<[NotLP64]>;
 }

 // ADJCALLSTACKDOWN/UP implicitly use/def RSP because they may be expanded into
@ -62,11 +62,11 @@ let Defs = [RSP, EFLAGS], Uses = [RSP] in {
 def ADJCALLSTACKDOWN64 : I<0, Pseudo, (outs), (ins i32imm:$amt),
                           "#ADJCALLSTACKDOWN",
                           [(X86callseq_start timm:$amt)]>,
-                          Requires<[In64BitMode]>;
+                          Requires<[IsLP64]>;
 def ADJCALLSTACKUP64   : I<0, Pseudo, (outs), (ins i32imm:$amt1, i32imm:$amt2),
                           "#ADJCALLSTACKUP",
                           [(X86callseq_end timm:$amt1, timm:$amt2)]>,
-                          Requires<[In64BitMode]>;
+                          Requires<[IsLP64]>;
 }


@ -118,7 +118,7 @@ def SEG_ALLOCA_32 : I<0, Pseudo, (outs GR32:$dst), (ins GR32:$size),
                      "# variable sized alloca for segmented stacks",
                      [(set GR32:$dst,
                         (X86SegAlloca GR32:$size))]>,
-                    Requires<[Not64BitMode]>;
+                    Requires<[NotLP64]>;

 let Defs = [RAX, RSP, EFLAGS], Uses = [RSP] in
 def SEG_ALLOCA_64 : I<0, Pseudo, (outs GR64:$dst), (ins GR64:$size),
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@ -101,8 +101,8 @@ void X86InstrInfo::anchor() {}

 X86InstrInfo::X86InstrInfo(X86Subtarget &STI)
    : X86GenInstrInfo(
-          (STI.is64Bit() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
-          (STI.is64Bit() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
+          (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKDOWN64 : X86::ADJCALLSTACKDOWN32),
+          (STI.isTarget64BitLP64() ? X86::ADJCALLSTACKUP64 : X86::ADJCALLSTACKUP32)),
      Subtarget(STI), RI(STI) {

  static const X86OpTblEntry OpTbl2Addr[] = {
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@ -746,6 +746,8 @@ def Not64BitMode : Predicate<"!Subtarget->is64Bit()">,
                             AssemblerPredicate<"!Mode64Bit", "Not 64-bit mode">;
 def In64BitMode  : Predicate<"Subtarget->is64Bit()">,
                             AssemblerPredicate<"Mode64Bit", "64-bit mode">;
+def IsLP64  : Predicate<"Subtarget->isTarget64BitLP64()">;
+def NotLP64 : Predicate<"!Subtarget->isTarget64BitLP64()">;
 def In16BitMode  : Predicate<"Subtarget->is16Bit()">,
                             AssemblerPredicate<"Mode16Bit", "16-bit mode">;
 def Not16BitMode : Predicate<"!Subtarget->is16Bit()">,
--- a/test/CodeGen/X86/segmented-stacks-dynamic.ll
+++ b/test/CodeGen/X86/segmented-stacks-dynamic.ll
@ -1,7 +1,9 @@
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -verify-machineinstrs | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -verify-machineinstrs | FileCheck %s -check-prefix=X32ABI
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -filetype=obj

 ; Just to prevent the alloca from being optimized away
 declare void @dummy_use(i32*, i32)
@ -61,6 +63,26 @@ false:
 ; X64-NEXT: callq __morestack_allocate_stack_space
 ; X64:      movq %rax, %rdi

+; X32ABI-LABEL:      test_basic:
+
+; X32ABI:      cmpl %fs:64, %esp
+; X32ABI-NEXT: ja      .LBB0_2
+
+; X32ABI:      movl $24, %r10d
+; X32ABI-NEXT: movl $0, %r11d
+; X32ABI-NEXT: callq __morestack
+; X32ABI-NEXT: ret
+
+; X32ABI:      movl %esp, %[[EDI:edi|eax]]
+; X32ABI:      subl %{{.*}}, %[[EDI]]
+; X32ABI-NEXT: cmpl %[[EDI]], %fs:64
+
+; X32ABI:      movl %[[EDI]], %esp
+
+; X32ABI:      movl %{{.*}}, %edi
+; X32ABI-NEXT: callq __morestack_allocate_stack_space
+; X32ABI:      movl %eax, %edi
+
 }

 attributes #0 = { "split-stack" }
--- a/test/CodeGen/X86/segmented-stacks.ll
+++ b/test/CodeGen/X86/segmented-stacks.ll
@ -1,5 +1,6 @@
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux  -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -verify-machineinstrs | FileCheck %s -check-prefix=X32ABI
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -verify-machineinstrs | FileCheck %s -check-prefix=X32-Darwin
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -verify-machineinstrs | FileCheck %s -check-prefix=X64-Darwin
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -verify-machineinstrs | FileCheck %s -check-prefix=X32-MinGW
@ -9,6 +10,7 @@
 ; We used to crash with filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux -filetype=obj
+; RUN: llc < %s -mcpu=generic -mtriple=x86_64-linux-gnux32 -filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-darwin -filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=x86_64-darwin -filetype=obj
 ; RUN: llc < %s -mcpu=generic -mtriple=i686-mingw32 -filetype=obj
@ -51,6 +53,16 @@ define void @test_basic() #0 {
 ; X64-Linux-NEXT:  callq __morestack
 ; X64-Linux-NEXT:  ret

+; X32ABI-LABEL:       test_basic:
+
+; X32ABI:       cmpl %fs:64, %esp
+; X32ABI-NEXT:  ja      .LBB0_2
+
+; X32ABI:       movl $40, %r10d
+; X32ABI-NEXT:  movl $0, %r11d
+; X32ABI-NEXT:  callq __morestack
+; X32ABI-NEXT:  ret
+
 ; X32-Darwin-LABEL:      test_basic:

 ; X32-Darwin:      movl $432, %ecx
@ -129,6 +141,16 @@ define i32 @test_nested(i32 * nest %closure, i32 %other) #0 {
 ; X64-Linux-NEXT:  ret
 ; X64-Linux-NEXT:  movq %rax, %r10

+; X32ABI:       cmpl %fs:64, %esp
+; X32ABI-NEXT:  ja      .LBB1_2
+
+; X32ABI:       movl %r10d, %eax
+; X32ABI-NEXT:  movl $56, %r10d
+; X32ABI-NEXT:  movl $0, %r11d
+; X32ABI-NEXT:  callq __morestack
+; X32ABI-NEXT:  ret
+; X32ABI-NEXT:  movq %rax, %r10
+
 ; X32-Darwin:      movl $432, %edx
 ; X32-Darwin-NEXT: cmpl %gs:(%edx), %esp
 ; X32-Darwin-NEXT: ja      LBB1_2
@ -202,6 +224,15 @@ define void @test_large() #0 {
 ; X64-Linux-NEXT:  callq __morestack
 ; X64-Linux-NEXT:  ret

+; X32ABI:       leal -40008(%rsp), %r11d
+; X32ABI-NEXT:  cmpl %fs:64, %r11d
+; X32ABI-NEXT:  ja      .LBB2_2
+
+; X32ABI:       movl $40008, %r10d
+; X32ABI-NEXT:  movl $0, %r11d
+; X32ABI-NEXT:  callq __morestack
+; X32ABI-NEXT:  ret
+
 ; X32-Darwin:      leal -40012(%esp), %ecx
 ; X32-Darwin-NEXT: movl $432, %eax
 ; X32-Darwin-NEXT: cmpl %gs:(%eax), %ecx
@ -276,6 +307,16 @@ define fastcc void @test_fastcc() #0 {
 ; X64-Linux-NEXT:  callq __morestack
 ; X64-Linux-NEXT:  ret

+; X32ABI-LABEL:       test_fastcc:
+
+; X32ABI:       cmpl %fs:64, %esp
+; X32ABI-NEXT:  ja      .LBB3_2
+
+; X32ABI:       movl $40, %r10d
+; X32ABI-NEXT:  movl $0, %r11d
+; X32ABI-NEXT:  callq __morestack
+; X32ABI-NEXT:  ret
+
 ; X32-Darwin-LABEL:      test_fastcc:

 ; X32-Darwin:      movl $432, %eax
@ -356,6 +397,17 @@ define fastcc void @test_fastcc_large() #0 {
 ; X64-Linux-NEXT:  callq __morestack
 ; X64-Linux-NEXT:  ret

+; X32ABI-LABEL:       test_fastcc_large:
+
+; X32ABI:       leal -40008(%rsp), %r11d
+; X32ABI-NEXT:  cmpl %fs:64, %r11d
+; X32ABI-NEXT:  ja      .LBB4_2
+
+; X32ABI:       movl $40008, %r10d
+; X32ABI-NEXT:  movl $0, %r11d
+; X32ABI-NEXT:  callq __morestack
+; X32ABI-NEXT:  ret
+
 ; X32-Darwin-LABEL:      test_fastcc_large:

 ; X32-Darwin:      leal -40012(%esp), %eax
@ -446,6 +498,9 @@ define void @test_nostack() #0 {
 ; X64-Linux-LABEL: test_nostack:
 ; X32-Linux-NOT:   callq __morestack

+; X32ABI-LABEL: test_nostack:
+; X32ABI-NOT:   callq __morestack
+
 ; X32-Darwin-LABEL: test_nostack:
 ; X32-Darwin-NOT:   calll __morestack