mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-11-02 22:04:55 +00:00
[X86] Use single add/sub for large stack offsets
For large stack offsets the compiler generates multiple immediate mode sub/add instructions in the prologue/epilogue. This patch makes the compiler place the final amount to be added/subtracted into a register, which is then added/substracted with a single operation. Differential Revision: http://reviews.llvm.org/D7226 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@227458 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c7260e2ffa
commit
1031549bec
@ -82,6 +82,14 @@ static unsigned getADDriOpcode(unsigned IsLP64, int64_t Imm) {
|
||||
}
|
||||
}
|
||||
|
||||
static unsigned getSUBrrOpcode(unsigned isLP64) {
|
||||
return isLP64 ? X86::SUB64rr : X86::SUB32rr;
|
||||
}
|
||||
|
||||
static unsigned getADDrrOpcode(unsigned isLP64) {
|
||||
return isLP64 ? X86::ADD64rr : X86::ADD32rr;
|
||||
}
|
||||
|
||||
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
|
||||
if (IsLP64) {
|
||||
if (isInt<8>(Imm))
|
||||
@ -165,6 +173,18 @@ static unsigned findDeadCallerSavedReg(MachineBasicBlock &MBB,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static bool isEAXLiveIn(MachineFunction &MF) {
|
||||
for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
|
||||
EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
|
||||
unsigned Reg = II->first;
|
||||
|
||||
if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
|
||||
Reg == X86::AH || Reg == X86::AL)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/// emitSPUpdate - Emit a series of instructions to increment / decrement the
|
||||
/// stack pointer by a constant value.
|
||||
@ -187,6 +207,32 @@ void emitSPUpdate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
|
||||
DebugLoc DL = MBB.findDebugLoc(MBBI);
|
||||
|
||||
while (Offset) {
|
||||
if (Offset > Chunk) {
|
||||
// Rather than emit a long series of instructions for large offsets,
|
||||
// load the offset into a register and do one sub/add
|
||||
unsigned Reg = 0;
|
||||
|
||||
if (isSub && !isEAXLiveIn(*MBB.getParent()))
|
||||
Reg = (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX);
|
||||
else
|
||||
Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
|
||||
|
||||
if (Reg) {
|
||||
Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri;
|
||||
BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg)
|
||||
.addImm(Offset);
|
||||
Opc = isSub
|
||||
? getSUBrrOpcode(Is64BitTarget)
|
||||
: getADDrrOpcode(Is64BitTarget);
|
||||
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
|
||||
.addReg(StackPtr)
|
||||
.addReg(Reg);
|
||||
MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
|
||||
Offset = 0;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
|
||||
if (ThisVal == (Is64BitTarget ? 8 : 4)) {
|
||||
// Use push / pop instead.
|
||||
@ -316,19 +362,6 @@ static int mergeSPUpdates(MachineBasicBlock &MBB,
|
||||
return Offset;
|
||||
}
|
||||
|
||||
static bool isEAXLiveIn(MachineFunction &MF) {
|
||||
for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
|
||||
EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
|
||||
unsigned Reg = II->first;
|
||||
|
||||
if (Reg == X86::EAX || Reg == X86::AX ||
|
||||
Reg == X86::AH || Reg == X86::AL)
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
|
59
test/CodeGen/X86/huge-stack-offset.ll
Normal file
59
test/CodeGen/X86/huge-stack-offset.ll
Normal file
@ -0,0 +1,59 @@
|
||||
; RUN: llc < %s -mtriple=x86_64-linux-unknown | FileCheck %s --check-prefix=CHECK-64
|
||||
; RUN: llc < %s -mtriple=i386-linux-unknown | FileCheck %s --check-prefix=CHECK-32
|
||||
|
||||
; Test that a large stack offset uses a single add/sub instruction to
|
||||
; adjust the stack pointer.
|
||||
|
||||
define void @foo() nounwind {
|
||||
; CHECK-64-LABEL: foo:
|
||||
; CHECK-64: movabsq $50000000{{..}}, %rax
|
||||
; CHECK-64-NEXT: subq %rax, %rsp
|
||||
; CHECK-64-NOT: subq $2147483647, %rsp
|
||||
; CHECK-64: movabsq $50000000{{..}}, [[RAX:%r..]]
|
||||
; CHECK-64-NEXT: addq [[RAX]], %rsp
|
||||
|
||||
; CHECK-32-LABEL: foo:
|
||||
; CHECK-32: movl $50000000{{..}}, %eax
|
||||
; CHECK-32-NEXT: subl %eax, %esp
|
||||
; CHECK-32-NOT: subl $2147483647, %esp
|
||||
; CHECK-32: movl $50000000{{..}}, [[EAX:%e..]]
|
||||
; CHECK-32-NEXT: addl [[EAX]], %esp
|
||||
%1 = alloca [5000000000 x i8], align 16
|
||||
%2 = getelementptr inbounds [5000000000 x i8]* %1, i32 0, i32 0
|
||||
call void @bar(i8* %2)
|
||||
ret void
|
||||
}
|
||||
|
||||
; Verify that we do not clobber the return value.
|
||||
|
||||
define i32 @foo2() nounwind {
|
||||
; CHECK-64-LABEL: foo2:
|
||||
; CHECK-64: movl $10, %eax
|
||||
; CHECK-64-NOT: movabsq ${{.*}}, %rax
|
||||
|
||||
; CHECK-32-LABEL: foo2:
|
||||
; CHECK-32: movl $10, %eax
|
||||
; CHECK-32-NOT: movl ${{.*}}, %eax
|
||||
%1 = alloca [5000000000 x i8], align 16
|
||||
%2 = getelementptr inbounds [5000000000 x i8]* %1, i32 0, i32 0
|
||||
call void @bar(i8* %2)
|
||||
ret i32 10
|
||||
}
|
||||
|
||||
; Verify that we do not clobber EAX when using inreg attribute
|
||||
|
||||
define i32 @foo3(i32 inreg %x) nounwind {
|
||||
; CHECK-64-LABEL: foo3:
|
||||
; CHECK-64: movabsq $50000000{{..}}, %rax
|
||||
; CHECK-64-NEXT: subq %rax, %rsp
|
||||
|
||||
; CHECK-32-LABEL: foo3:
|
||||
; CHECK-32: subl $2147483647, %esp
|
||||
; CHECK-32-NOT: movl ${{.*}}, %eax
|
||||
%1 = alloca [5000000000 x i8], align 16
|
||||
%2 = getelementptr inbounds [5000000000 x i8]* %1, i32 0, i32 0
|
||||
call void @bar(i8* %2)
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
declare void @bar(i8*)
|
Loading…
Reference in New Issue
Block a user