mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-01 15:11:24 +00:00
[X86] Convert esp-relative movs of function arguments into pushes, step 1
This handles the simplest case for mov -> push conversion: 1. x86-32 calling convention, everything is passed through the stack. 2. There is no reserved call frame. 3. Only registers or immediates are pushed, no attempt to combine a mem-reg-mem sequence into a single PUSHmm. Differential Revision: http://reviews.llvm.org/D6503 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@223757 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2959baffd2
commit
77c1b73211
@ -93,6 +93,15 @@ static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
|
||||
return X86::AND32ri;
|
||||
}
|
||||
|
||||
static unsigned getPUSHiOpcode(bool IsLP64, int64_t Imm) {
|
||||
// We don't support LP64 for now.
|
||||
assert(!IsLP64);
|
||||
|
||||
if (isInt<8>(Imm))
|
||||
return X86::PUSH32i8;
|
||||
return X86::PUSHi32;
|
||||
}
|
||||
|
||||
static unsigned getLEArOpcode(unsigned IsLP64) {
|
||||
return IsLP64 ? X86::LEA64r : X86::LEA32r;
|
||||
}
|
||||
@ -1802,6 +1811,103 @@ void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
|
||||
#endif
|
||||
}
|
||||
|
||||
bool X86FrameLowering::
|
||||
convertArgMovsToPushes(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, uint64_t Amount) const {
|
||||
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
|
||||
const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
|
||||
MF.getSubtarget().getRegisterInfo());
|
||||
unsigned StackPtr = RegInfo.getStackRegister();
|
||||
|
||||
// Scan the call setup sequence for the pattern we're looking for.
|
||||
// We only handle a simple case now - a sequence of MOV32mi or MOV32mr
|
||||
// instructions, that push a sequence of 32-bit values onto the stack, with
|
||||
// no gaps.
|
||||
std::map<int64_t, MachineBasicBlock::iterator> MovMap;
|
||||
do {
|
||||
int Opcode = I->getOpcode();
|
||||
if (Opcode != X86::MOV32mi && Opcode != X86::MOV32mr)
|
||||
break;
|
||||
|
||||
// We only want movs of the form:
|
||||
// movl imm/r32, k(%ecx)
|
||||
// If we run into something else, bail
|
||||
// Note that AddrBaseReg may, counterintuitively, not be a register...
|
||||
if (!I->getOperand(X86::AddrBaseReg).isReg() ||
|
||||
(I->getOperand(X86::AddrBaseReg).getReg() != StackPtr) ||
|
||||
!I->getOperand(X86::AddrScaleAmt).isImm() ||
|
||||
(I->getOperand(X86::AddrScaleAmt).getImm() != 1) ||
|
||||
(I->getOperand(X86::AddrIndexReg).getReg() != X86::NoRegister) ||
|
||||
(I->getOperand(X86::AddrSegmentReg).getReg() != X86::NoRegister) ||
|
||||
!I->getOperand(X86::AddrDisp).isImm())
|
||||
return false;
|
||||
|
||||
int64_t StackDisp = I->getOperand(X86::AddrDisp).getImm();
|
||||
|
||||
// We don't want to consider the unaligned case.
|
||||
if (StackDisp % 4)
|
||||
return false;
|
||||
|
||||
// If the same stack slot is being filled twice, something's fishy.
|
||||
if (!MovMap.insert(std::pair<int64_t, MachineInstr*>(StackDisp, I)).second)
|
||||
return false;
|
||||
|
||||
++I;
|
||||
} while (I != MBB.end());
|
||||
|
||||
// We now expect the end of the sequence - a call and a stack adjust.
|
||||
if (I == MBB.end())
|
||||
return false;
|
||||
if (!I->isCall())
|
||||
return false;
|
||||
MachineBasicBlock::iterator Call = I;
|
||||
if ((++I)->getOpcode() != TII.getCallFrameDestroyOpcode())
|
||||
return false;
|
||||
|
||||
// Now, go through the map, and see that we don't have any gaps,
|
||||
// but only a series of 32-bit MOVs.
|
||||
// Since std::map provides ordered iteration, the original order
|
||||
// of the MOVs doesn't matter.
|
||||
int64_t ExpectedDist = 0;
|
||||
for (auto MMI = MovMap.begin(), MME = MovMap.end(); MMI != MME;
|
||||
++MMI, ExpectedDist += 4)
|
||||
if (MMI->first != ExpectedDist)
|
||||
return false;
|
||||
|
||||
// Ok, everything looks fine. Do the transformation.
|
||||
DebugLoc DL = I->getDebugLoc();
|
||||
|
||||
// It's possible the original stack adjustment amount was larger than
|
||||
// that done by the pushes. If so, we still need a SUB.
|
||||
Amount -= ExpectedDist;
|
||||
if (Amount) {
|
||||
MachineInstr* Sub = BuildMI(MBB, Call, DL,
|
||||
TII.get(getSUBriOpcode(false, Amount)), StackPtr)
|
||||
.addReg(StackPtr).addImm(Amount);
|
||||
Sub->getOperand(3).setIsDead();
|
||||
}
|
||||
|
||||
// Now, iterate through the map in reverse order, and replace the movs
|
||||
// with pushes. MOVmi/MOVmr doesn't have any defs, so need to replace uses.
|
||||
for (auto MMI = MovMap.rbegin(), MME = MovMap.rend(); MMI != MME; ++MMI) {
|
||||
MachineBasicBlock::iterator MOV = MMI->second;
|
||||
MachineOperand PushOp = MOV->getOperand(X86::AddrNumOperands);
|
||||
int PushOpcode;
|
||||
if (MOV->getOpcode() == X86::MOV32mi) {
|
||||
int64_t Val = PushOp.getImm();
|
||||
BuildMI(MBB, Call, DL, TII.get(getPUSHiOpcode(false, Val)))
|
||||
.addImm(Val);
|
||||
} else {
|
||||
PushOpcode = X86::PUSH32r;
|
||||
BuildMI(MBB, Call, DL, TII.get(X86::PUSH32r))
|
||||
.addReg(PushOp.getReg());
|
||||
}
|
||||
MBB.erase(MOV);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void X86FrameLowering::
|
||||
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const {
|
||||
@ -1809,21 +1915,20 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
const X86RegisterInfo &RegInfo = *static_cast<const X86RegisterInfo *>(
|
||||
MF.getSubtarget().getRegisterInfo());
|
||||
unsigned StackPtr = RegInfo.getStackRegister();
|
||||
bool reseveCallFrame = hasReservedCallFrame(MF);
|
||||
bool reserveCallFrame = hasReservedCallFrame(MF);
|
||||
int Opcode = I->getOpcode();
|
||||
bool isDestroy = Opcode == TII.getCallFrameDestroyOpcode();
|
||||
const X86Subtarget &STI = MF.getTarget().getSubtarget<X86Subtarget>();
|
||||
bool IsLP64 = STI.isTarget64BitLP64();
|
||||
DebugLoc DL = I->getDebugLoc();
|
||||
uint64_t Amount = !reseveCallFrame ? I->getOperand(0).getImm() : 0;
|
||||
uint64_t Amount = !reserveCallFrame ? I->getOperand(0).getImm() : 0;
|
||||
uint64_t CalleeAmt = isDestroy ? I->getOperand(1).getImm() : 0;
|
||||
I = MBB.erase(I);
|
||||
|
||||
if (!reseveCallFrame) {
|
||||
if (!reserveCallFrame) {
|
||||
// If the stack pointer can be changed after prologue, turn the
|
||||
// adjcallstackup instruction into a 'sub ESP, <amt>' and the
|
||||
// adjcallstackdown instruction into 'add ESP, <amt>'
|
||||
// TODO: consider using push / pop instead of sub + store / add
|
||||
if (Amount == 0)
|
||||
return;
|
||||
|
||||
@ -1838,6 +1943,12 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
|
||||
MachineInstr *New = nullptr;
|
||||
if (Opcode == TII.getCallFrameSetupOpcode()) {
|
||||
// Try to convert movs to the stack into pushes.
|
||||
// We currently only look for a pattern that appears in 32-bit
|
||||
// calling conventions.
|
||||
if (!IsLP64 && convertArgMovsToPushes(MF, MBB, I, Amount))
|
||||
return;
|
||||
|
||||
New = BuildMI(MF, DL, TII.get(getSUBriOpcode(IsLP64, Amount)),
|
||||
StackPtr)
|
||||
.addReg(StackPtr)
|
||||
|
@ -76,6 +76,16 @@ public:
|
||||
void eliminateCallFramePseudoInstr(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI) const override;
|
||||
|
||||
private:
|
||||
/// convertArgMovsToPushes - This method tries to convert a call sequence
|
||||
/// that uses sub and mov instructions to put the argument onto the stack
|
||||
/// into a series of pushes.
|
||||
/// Returns true if the transformation succeeded, false if not.
|
||||
bool convertArgMovsToPushes(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
uint64_t Amount) const;
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
@ -33,14 +33,14 @@ define i64 @g(i32 %i) nounwind {
|
||||
; CHECK-NOT: {{[^ ,]*}}, %esp
|
||||
;
|
||||
; Next we set up the memset call, and then undo it.
|
||||
; CHECK: subl $32, %esp
|
||||
; CHECK: subl $20, %esp
|
||||
; CHECK-NOT: {{[^ ,]*}}, %esp
|
||||
; CHECK: calll memset
|
||||
; CHECK-NEXT: addl $32, %esp
|
||||
; CHECK-NOT: {{[^ ,]*}}, %esp
|
||||
;
|
||||
; Next we set up the call to 'f'.
|
||||
; CHECK: subl $32, %esp
|
||||
; CHECK: subl $28, %esp
|
||||
; CHECK-NOT: {{[^ ,]*}}, %esp
|
||||
; CHECK: calll f
|
||||
; CHECK-NEXT: addl $32, %esp
|
||||
|
@ -17,16 +17,16 @@ entry:
|
||||
; CHECK: movl %esp,
|
||||
call void @Foo_ctor(%Foo* %c)
|
||||
; CHECK: leal 12(%{{.*}}),
|
||||
; CHECK: subl $4, %esp
|
||||
; CHECK: calll _Foo_ctor
|
||||
; CHECK-NEXT: pushl
|
||||
; CHECK-NEXT: calll _Foo_ctor
|
||||
; CHECK: addl $4, %esp
|
||||
%b = getelementptr %frame* %args, i32 0, i32 1
|
||||
store i32 42, i32* %b
|
||||
; CHECK: movl $42,
|
||||
%a = getelementptr %frame* %args, i32 0, i32 0
|
||||
call void @Foo_ctor(%Foo* %a)
|
||||
; CHECK: subl $4, %esp
|
||||
; CHECK: calll _Foo_ctor
|
||||
; CHECK-NEXT: pushl
|
||||
; CHECK-NEXT: calll _Foo_ctor
|
||||
; CHECK: addl $4, %esp
|
||||
call void @f(%frame* inalloca %args)
|
||||
; CHECK: calll _f
|
||||
|
@ -37,7 +37,7 @@ blah:
|
||||
invoke.cont:
|
||||
call void @begin(%Iter* sret %beg)
|
||||
|
||||
; CHECK: movl %[[beg]],
|
||||
; CHECK: pushl %[[beg]]
|
||||
; CHECK: calll _begin
|
||||
|
||||
invoke void @reverse(%frame.reverse* inalloca align 4 %rev_args)
|
||||
|
@ -19,7 +19,7 @@ define void @g() {
|
||||
call x86_stdcallcc void @f(%Foo* inalloca %b)
|
||||
; CHECK: calll _f@8
|
||||
; CHECK-NOT: %esp
|
||||
; CHECK: subl $4, %esp
|
||||
; CHECK: pushl
|
||||
; CHECK: calll _i@4
|
||||
call x86_stdcallcc void @i(i32 0)
|
||||
ret void
|
||||
|
@ -63,7 +63,7 @@ spill_vectors:
|
||||
; CHECK-LABEL: _memcpy_vla_vector:
|
||||
; CHECK: andl $-16, %esp
|
||||
; CHECK: movl %esp, %esi
|
||||
; CHECK: movl $128, {{.*}}(%esp)
|
||||
; CHECK: pushl $128
|
||||
; CHECK: calll _memcpy
|
||||
; CHECK: calll __chkstk
|
||||
|
||||
|
97
test/CodeGen/X86/movtopush.ll
Normal file
97
test/CodeGen/X86/movtopush.ll
Normal file
@ -0,0 +1,97 @@
|
||||
; RUN: llc < %s -mtriple=i686-windows | FileCheck %s -check-prefix=NORMAL
|
||||
; RUN: llc < %s -mtriple=i686-windows -force-align-stack -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
|
||||
declare void @good(i32 %a, i32 %b, i32 %c, i32 %d)
|
||||
declare void @inreg(i32 %a, i32 inreg %b, i32 %c, i32 %d)
|
||||
|
||||
; Here, we should have a reserved frame, so we don't expect pushes
|
||||
; NORMAL-LABEL: test1
|
||||
; NORMAL: subl $16, %esp
|
||||
; NORMAL-NEXT: movl $4, 12(%esp)
|
||||
; NORMAL-NEXT: movl $3, 8(%esp)
|
||||
; NORMAL-NEXT: movl $2, 4(%esp)
|
||||
; NORMAL-NEXT: movl $1, (%esp)
|
||||
; NORMAL-NEXT: call
|
||||
define void @test1() {
|
||||
entry:
|
||||
call void @good(i32 1, i32 2, i32 3, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Here, we expect a sequence of 4 immediate pushes
|
||||
; NORMAL-LABEL: test2
|
||||
; NORMAL-NOT: subl {{.*}} %esp
|
||||
; NORMAL: pushl $4
|
||||
; NORMAL-NEXT: pushl $3
|
||||
; NORMAL-NEXT: pushl $2
|
||||
; NORMAL-NEXT: pushl $1
|
||||
; NORMAL-NEXT: call
|
||||
define void @test2(i32 %k) {
|
||||
entry:
|
||||
%a = alloca i32, i32 %k
|
||||
call void @good(i32 1, i32 2, i32 3, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Again, we expect a sequence of 4 immediate pushes
|
||||
; Checks that we generate the right pushes for >8bit immediates
|
||||
; NORMAL-LABEL: test2b
|
||||
; NORMAL-NOT: subl {{.*}} %esp
|
||||
; NORMAL: pushl $4096
|
||||
; NORMAL-NEXT: pushl $3072
|
||||
; NORMAL-NEXT: pushl $2048
|
||||
; NORMAL-NEXT: pushl $1024
|
||||
; NORMAL-NEXT: call
|
||||
define void @test2b(i32 %k) {
|
||||
entry:
|
||||
%a = alloca i32, i32 %k
|
||||
call void @good(i32 1024, i32 2048, i32 3072, i32 4096)
|
||||
ret void
|
||||
}
|
||||
|
||||
; The first push should push a register
|
||||
; NORMAL-LABEL: test3
|
||||
; NORMAL-NOT: subl {{.*}} %esp
|
||||
; NORMAL: pushl $4
|
||||
; NORMAL-NEXT: pushl $3
|
||||
; NORMAL-NEXT: pushl $2
|
||||
; NORMAL-NEXT: pushl %e{{..}}
|
||||
; NORMAL-NEXT: call
|
||||
define void @test3(i32 %k) {
|
||||
entry:
|
||||
%a = alloca i32, i32 %k
|
||||
call void @good(i32 %k, i32 2, i32 3, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; We don't support weird calling conventions
|
||||
; NORMAL-LABEL: test4
|
||||
; NORMAL: subl $12, %esp
|
||||
; NORMAL-NEXT: movl $4, 8(%esp)
|
||||
; NORMAL-NEXT: movl $3, 4(%esp)
|
||||
; NORMAL-NEXT: movl $1, (%esp)
|
||||
; NORMAL-NEXT: movl $2, %eax
|
||||
; NORMAL-NEXT: call
|
||||
define void @test4(i32 %k) {
|
||||
entry:
|
||||
%a = alloca i32, i32 %k
|
||||
call void @inreg(i32 1, i32 2, i32 3, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Check that additional alignment is added when the pushes
|
||||
; don't add up to the required alignment.
|
||||
; ALIGNED-LABEL: test5
|
||||
; ALIGNED: subl $16, %esp
|
||||
; ALIGNED-NEXT: pushl $4
|
||||
; ALIGNED-NEXT: pushl $3
|
||||
; ALIGNED-NEXT: pushl $2
|
||||
; ALIGNED-NEXT: pushl $1
|
||||
; ALIGNED-NEXT: call
|
||||
define void @test5(i32 %k) {
|
||||
entry:
|
||||
%a = alloca i32, i32 %k
|
||||
call void @good(i32 1, i32 2, i32 3, i32 4)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user