[X86] Convert esp-relative movs of function arguments into pushes, step 1

This handles the simplest case for mov -> push conversion:
1. x86-32 calling convention, everything is passed through the stack.
2. There is no reserved call frame.
3. Only registers or immediates are pushed, no attempt to combine a mem-reg-mem sequence into a single PUSHmm.

Differential Revision: http://reviews.llvm.org/D6503

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223757 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Michael Kuperstein 2014-12-09 06:10:44 +00:00
parent 2959baffd2
commit 77c1b73211
8 changed files with 231 additions and 13 deletions

View File

@ -93,6 +93,15 @@ static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
return X86::AND32ri;
}
static unsigned getPUSHiOpcode(bool IsLP64, int64_t Imm) {
// We don't support LP64 for now.
assert(!IsLP64);
if (isInt<8>(Imm))
return X86::PUSH32i8;
return X86::PUSHi32;
}
static unsigned getLEArOpcode(unsigned IsLP64) {
return IsLP64 ? X86::LEA64r : X86::LEA32r;
}
@ -1802,6 +1811,103 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
#endif
}
bool X86FrameLowering::
convertArgMovsToPushes(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, uint64_t Amount) const {
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
unsigned StackPtr = RegInfo.getStackRegister();
// Scan the call setup sequence for the pattern we're looking for.
// We only handle a simple case now - a sequence of MOV32mi or MOV32mr
// instructions, that push a sequence of 32-bit values onto the stack, with
// no gaps.
std::map<int64_t, MachineBasicBlock::iterator> MovMap;
do {
int Opcode = I->getOpcode();
if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
break;
// We only want movs of the form:
// movl imm/r32, k(%ecx)
// If we run into something else, bail
// Note that AddrBaseReg may, counterintuitively, not be a register...
if (!I->getOperand(X86::AddrBaseReg).isReg() ||
(I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
!I->getOperand(X86::AddrScaleAmt).isImm() ||
(I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
(I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
(I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
!I->getOperand(X86::AddrDisp).isImm())
return false;
int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
// We don't want to consider the unaligned case.
if (StackDisp % 4)
return false;
// If the same stack slot is being filled twice, something's fishy.
if (!MovMap.insert(std::pair<int64_t, MachineInstr*>(StackDisp, I)).second)
return false;
++I;
} while (I != MBB.end());
// We now expect the end of the sequence - a call and a stack adjust.
if (I == MBB.end())
return false;
if (!I->isCall())
return false;
MachineBasicBlock::iterator Call = I;
if ((++I)->getOpcode() != TII.getCallFrameDestroyOpcode())
return false;
// Now, go through the map, and see that we don't have any gaps,
// but only a series of 32-bit MOVs.
// Since std::map provides ordered iteration, the original order
// of the MOVs doesn't matter.
int64_t ExpectedDist = 0;
for (auto MMI = MovMap.begin(), MME = MovMap.end(); MMI != MME;
++MMI, ExpectedDist += 4)
if (MMI->first != ExpectedDist)
return false;
// Ok, everything looks fine. Do the transformation.
DebugLoc DL = I->getDebugLoc();
// It's possible the original stack adjustment amount was larger than
// that done by the pushes. If so, we still need a SUB.
Amount -= ExpectedDist;
if (Amount) {
MachineInstr* Sub = BuildMI(MBB, Call, DL,
TII.get(getSUBriOpcode(false, Amount)), StackPtr)
.addReg(StackPtr).addImm(Amount);
Sub->getOperand(3).setIsDead();
}
// Now, iterate through the map in reverse order, and replace the movs
// with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses.
for (auto MMI = MovMap.rbegin(), MME = MovMap.rend(); MMI != MME; ++MMI) {
MachineBasicBlock::iterator MOV = MMI->second;
MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
int PushOpcode;
if (MOV->getOpcode() == X86::MOV32mi) {
int64_t Val = PushOp.getImm();
BuildMI(MBB, Call, DL, TII.get(getPUSHiOpcode(false, Val)))
.addImm(Val);
} else {
PushOpcode = X86::PUSH32r;
BuildMI(MBB, Call, DL, TII.get(X86::PUSH32r))
.addReg(PushOp.getReg());
}
MBB.erase(MOV);
}
return true;
}
void X86FrameLowering::
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineBasicBlock::iterator I) const {
@ -1809,21 +1915,20 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
MF.getSubtarget().getRegisterInfo());
unsigned StackPtr = RegInfo.getStackRegister();
bool reseveCallFrame = hasReservedCallFrame(MF);
bool reserveCallFrame = hasReservedCallFrame(MF);
int Opcode = I->getOpcode();
bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
bool IsLP64 = STI.isTarget64BitLP64();
DebugLoc DL = I->getDebugLoc();
uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
I = MBB.erase(I);
if (!reseveCallFrame) {
if (!reserveCallFrame) {
// If the stack pointer can be changed after prologue, turn the
// adjcallstackup instruction into a 'sub ESP, <amt>' and the
// adjcallstackdown instruction into 'add ESP, <amt>'
// TODO: consider using push / pop instead of sub + store / add
if (Amount == 0)
return;
@ -1838,6 +1943,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MachineInstr *New = nullptr;
if (Opcode == TII.getCallFrameSetupOpcode()) {
// Try to convert movs to the stack into pushes.
// We currently only look for a pattern that appears in 32-bit
// calling conventions.
if (!IsLP64 && convertArgMovsToPushes(MF, MBB, I, Amount))
return;
New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
StackPtr)
.addReg(StackPtr)

View File

@ -76,6 +76,16 @@ public:
void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const override;
private:
/// convertArgMovsToPushes - This method tries to convert a call sequence
/// that uses sub and mov instructions to put the argument onto the stack
/// into a series of pushes.
/// Returns true if the transformation succeeded, false if not.
bool convertArgMovsToPushes(MachineFunction &MF,
MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
uint64_t Amount) const;
};
} // End llvm namespace

View File

@ -33,14 +33,14 @@ define i64 @g(i32 %i) nounwind {
; CHECK-NOT: {{[^ ,]*}}, %esp
;
; Next we set up the memset call, and then undo it.
; CHECK: subl $32, %esp
; CHECK: subl $20, %esp
; CHECK-NOT: {{[^ ,]*}}, %esp
; CHECK: calll memset
; CHECK-NEXT: addl $32, %esp
; CHECK-NOT: {{[^ ,]*}}, %esp
;
; Next we set up the call to 'f'.
; CHECK: subl $32, %esp
; CHECK: subl $28, %esp
; CHECK-NOT: {{[^ ,]*}}, %esp
; CHECK: calll f
; CHECK-NEXT: addl $32, %esp

View File

@ -17,16 +17,16 @@ entry:
; CHECK: movl %esp,
call void @Foo_ctor(%Foo* %c)
; CHECK: leal 12(%{{.*}}),
; CHECK: subl $4, %esp
; CHECK: calll _Foo_ctor
; CHECK-NEXT: pushl
; CHECK-NEXT: calll _Foo_ctor
; CHECK: addl $4, %esp
%b = getelementptr %frame* %args, i32 0, i32 1
store i32 42, i32* %b
; CHECK: movl $42,
%a = getelementptr %frame* %args, i32 0, i32 0
call void @Foo_ctor(%Foo* %a)
; CHECK: subl $4, %esp
; CHECK: calll _Foo_ctor
; CHECK-NEXT: pushl
; CHECK-NEXT: calll _Foo_ctor
; CHECK: addl $4, %esp
call void @f(%frame* inalloca %args)
; CHECK: calll _f

View File

@ -37,7 +37,7 @@ blah:
invoke.cont:
call void @begin(%Iter* sret %beg)
; CHECK: movl %[[beg]],
; CHECK: pushl %[[beg]]
; CHECK: calll _begin
invoke void @reverse(%frame.reverse* inalloca align 4 %rev_args)

View File

@ -19,7 +19,7 @@ define void @g() {
call x86_stdcallcc void @f(%Foo* inalloca %b)
; CHECK: calll _f@8
; CHECK-NOT: %esp
; CHECK: subl $4, %esp
; CHECK: pushl
; CHECK: calll _i@4
call x86_stdcallcc void @i(i32 0)
ret void

View File

@ -63,7 +63,7 @@ spill_vectors:
; CHECK-LABEL: _memcpy_vla_vector:
; CHECK: andl $-16, %esp
; CHECK: movl %esp, %esi
; CHECK: movl $128, {{.*}}(%esp)
; CHECK: pushl $128
; CHECK: calll _memcpy
; CHECK: calll __chkstk

View File

@ -0,0 +1,97 @@
; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
; Here, we should have a reserved frame, so we don't expect pushes
; NORMAL-LABEL: test1
; NORMAL: subl $16, %esp
; NORMAL-NEXT: movl $4, 12(%esp)
; NORMAL-NEXT: movl $3, 8(%esp)
; NORMAL-NEXT: movl $2, 4(%esp)
; NORMAL-NEXT: movl $1, (%esp)
; NORMAL-NEXT: call
define void @test1() {
entry:
call void @good(i32 1, i32 2, i32 3, i32 4)
ret void
}
; Here, we expect a sequence of 4 immediate pushes
; NORMAL-LABEL: test2
; NORMAL-NOT: subl {{.*}} %esp
; NORMAL: pushl $4
; NORMAL-NEXT: pushl $3
; NORMAL-NEXT: pushl $2
; NORMAL-NEXT: pushl $1
; NORMAL-NEXT: call
define void @test2(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @good(i32 1, i32 2, i32 3, i32 4)
ret void
}
; Again, we expect a sequence of 4 immediate pushes
; Checks that we generate the right pushes for >8bit immediates
; NORMAL-LABEL: test2b
; NORMAL-NOT: subl {{.*}} %esp
; NORMAL: pushl $4096
; NORMAL-NEXT: pushl $3072
; NORMAL-NEXT: pushl $2048
; NORMAL-NEXT: pushl $1024
; NORMAL-NEXT: call
define void @test2b(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
ret void
}
; The first push should push a register
; NORMAL-LABEL: test3
; NORMAL-NOT: subl {{.*}} %esp
; NORMAL: pushl $4
; NORMAL-NEXT: pushl $3
; NORMAL-NEXT: pushl $2
; NORMAL-NEXT: pushl %e{{..}}
; NORMAL-NEXT: call
define void @test3(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @good(i32 %k, i32 2, i32 3, i32 4)
ret void
}
; We don't support weird calling conventions
; NORMAL-LABEL: test4
; NORMAL: subl $12, %esp
; NORMAL-NEXT: movl $4, 8(%esp)
; NORMAL-NEXT: movl $3, 4(%esp)
; NORMAL-NEXT: movl $1, (%esp)
; NORMAL-NEXT: movl $2, %eax
; NORMAL-NEXT: call
define void @test4(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @inreg(i32 1, i32 2, i32 3, i32 4)
ret void
}
; Check that additional alignment is added when the pushes
; don't add up to the required alignment.
; ALIGNED-LABEL: test5
; ALIGNED: subl $16, %esp
; ALIGNED-NEXT: pushl $4
; ALIGNED-NEXT: pushl $3
; ALIGNED-NEXT: pushl $2
; ALIGNED-NEXT: pushl $1
; ALIGNED-NEXT: call
define void @test5(i32 %k) {
entry:
%a = alloca i32, i32 %k
call void @good(i32 1, i32 2, i32 3, i32 4)
ret void
}