mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-06 23:32:27 +00:00
Support for HiPE-compatible code emission, patch by Yiannis Tsiouris.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@175457 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b875acda98
commit
98fbe27ac8
@ -120,6 +120,10 @@ public:
|
|||||||
/// by adding a check even before the "normal" function prologue.
|
/// by adding a check even before the "normal" function prologue.
|
||||||
virtual void adjustForSegmentedStacks(MachineFunction &MF) const { }
|
virtual void adjustForSegmentedStacks(MachineFunction &MF) const { }
|
||||||
|
|
||||||
|
/// Adjust the prologue to add Erlang Run-Time System (ERTS) specific code in
|
||||||
|
/// the assembly prologue to explicitly handle the stack.
|
||||||
|
virtual void adjustForHiPEPrologue(MachineFunction &MF) const { }
|
||||||
|
|
||||||
/// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
|
/// spillCalleeSavedRegisters - Issues instruction(s) to spill all callee
|
||||||
/// saved registers and returns true if it isn't possible / profitable to do
|
/// saved registers and returns true if it isn't possible / profitable to do
|
||||||
/// so by issuing a series of store instructions via
|
/// so by issuing a series of store instructions via
|
||||||
|
@ -693,6 +693,14 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) {
|
|||||||
// space in small chunks instead of one large contiguous block.
|
// space in small chunks instead of one large contiguous block.
|
||||||
if (Fn.getTarget().Options.EnableSegmentedStacks)
|
if (Fn.getTarget().Options.EnableSegmentedStacks)
|
||||||
TFI.adjustForSegmentedStacks(Fn);
|
TFI.adjustForSegmentedStacks(Fn);
|
||||||
|
|
||||||
|
// Emit additional code that is required to explicitly handle the stack in
|
||||||
|
// HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The
|
||||||
|
// approach is rather similar to that of Segmented Stacks, but it uses a
|
||||||
|
// different conditional check and another BIF for allocating more stack
|
||||||
|
// space.
|
||||||
|
if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE)
|
||||||
|
TFI.adjustForHiPEPrologue(Fn);
|
||||||
}
|
}
|
||||||
|
|
||||||
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
|
/// replaceFrameIndices - Replace all MO_FrameIndex operands with physical
|
||||||
|
@ -1387,16 +1387,25 @@ HasNestArgument(const MachineFunction *MF) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// GetScratchRegister - Get a register for performing work in the segmented
|
/// GetScratchRegister - Get a temp register for performing work in the
|
||||||
/// stack prologue. Depending on platform and the properties of the function
|
/// segmented stack and the Erlang/HiPE stack prologue. Depending on platform
|
||||||
/// either one or two registers will be needed. Set primary to true for
|
/// and the properties of the function either one or two registers will be
|
||||||
/// the first register, false for the second.
|
/// needed. Set primary to true for the first register, false for the second.
|
||||||
static unsigned
|
static unsigned
|
||||||
GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
|
GetScratchRegister(bool Is64Bit, const MachineFunction &MF, bool Primary) {
|
||||||
|
CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
|
||||||
|
|
||||||
|
// Erlang stuff.
|
||||||
|
if (CallingConvention == CallingConv::HiPE) {
|
||||||
|
if (Is64Bit)
|
||||||
|
return Primary ? X86::R14 : X86::R13;
|
||||||
|
else
|
||||||
|
return Primary ? X86::EBX : X86::EDI;
|
||||||
|
}
|
||||||
|
|
||||||
if (Is64Bit)
|
if (Is64Bit)
|
||||||
return Primary ? X86::R11 : X86::R12;
|
return Primary ? X86::R11 : X86::R12;
|
||||||
|
|
||||||
CallingConv::ID CallingConvention = MF.getFunction()->getCallingConv();
|
|
||||||
bool IsNested = HasNestArgument(&MF);
|
bool IsNested = HasNestArgument(&MF);
|
||||||
|
|
||||||
if (CallingConvention == CallingConv::X86_FastCall ||
|
if (CallingConvention == CallingConv::X86_FastCall ||
|
||||||
@ -1603,3 +1612,144 @@ X86FrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
|
|||||||
MF.verify();
|
MF.verify();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Erlang programs may need a special prologue to handle the stack size they
|
||||||
|
// might need at runtime. That is because Erlang/OTP does not implement a C
|
||||||
|
// stack but uses a custom implementation of hybrid stack/heap
|
||||||
|
// architecture. (for more information see Eric Stenman's Ph.D. thesis:
|
||||||
|
// http://publications.uu.se/uu/fulltext/nbn_se_uu_diva-2688.pdf)
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// CheckStack:
|
||||||
|
// temp0 = sp - MaxStack
|
||||||
|
// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
|
||||||
|
// OldStart:
|
||||||
|
// ...
|
||||||
|
// IncStack:
|
||||||
|
// call inc_stack # doubles the stack space
|
||||||
|
// temp0 = sp - MaxStack
|
||||||
|
// if( temp0 < SP_LIMIT(P) ) goto IncStack else goto OldStart
|
||||||
|
void X86FrameLowering::adjustForHiPEPrologue(MachineFunction &MF) const {
|
||||||
|
const X86InstrInfo &TII = *TM.getInstrInfo();
|
||||||
|
const X86Subtarget *ST = &MF.getTarget().getSubtarget<X86Subtarget>();
|
||||||
|
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||||
|
const uint64_t SlotSize = TM.getRegisterInfo()->getSlotSize();
|
||||||
|
const bool Is64Bit = STI.is64Bit();
|
||||||
|
DebugLoc DL;
|
||||||
|
// HiPE-specific values
|
||||||
|
const unsigned HipeLeafWords = 24;
|
||||||
|
const unsigned CCRegisteredArgs = Is64Bit ? 6 : 5;
|
||||||
|
const unsigned Guaranteed = HipeLeafWords * SlotSize;
|
||||||
|
const unsigned CallerStkArity =
|
||||||
|
std::max<int>(0, MF.getFunction()->arg_size() - CCRegisteredArgs);
|
||||||
|
unsigned MaxStack =
|
||||||
|
MFI->getStackSize() + CallerStkArity * SlotSize + SlotSize;
|
||||||
|
|
||||||
|
assert(ST->isTargetLinux() &&
|
||||||
|
"HiPE prologue is only supported on Linux operating systems.");
|
||||||
|
|
||||||
|
// Compute the largest caller's frame that is needed to fit the callees'
|
||||||
|
// frames. This 'MaxStack' is computed from:
|
||||||
|
//
|
||||||
|
// a) the fixed frame size, which is the space needed for all spilled temps,
|
||||||
|
// b) outgoing on-stack parameter areas, and
|
||||||
|
// c) the minimum stack space this function needs to make available for the
|
||||||
|
// functions it calls (a tunable ABI property).
|
||||||
|
if (MFI->hasCalls()) {
|
||||||
|
unsigned MoreStackForCalls = 0;
|
||||||
|
|
||||||
|
for (MachineFunction::iterator MBBI = MF.begin(), MBBE = MF.end();
|
||||||
|
MBBI != MBBE; ++MBBI)
|
||||||
|
for (MachineBasicBlock::iterator MI = MBBI->begin(), ME = MBBI->end();
|
||||||
|
MI != ME; ++MI)
|
||||||
|
if (MI->isCall()) {
|
||||||
|
// Get callee operand.
|
||||||
|
const MachineOperand &MO = MI->getOperand(0);
|
||||||
|
const Function *F;
|
||||||
|
|
||||||
|
// Only take account of global function calls (no closures etc.).
|
||||||
|
if (!MO.isGlobal()) continue;
|
||||||
|
if (!(F = dyn_cast<Function>(MO.getGlobal()))) continue;
|
||||||
|
|
||||||
|
// Do not update 'MaxStack' for primitive and built-in functions
|
||||||
|
// (encoded with names either starting with "erlang."/"bif_" or not
|
||||||
|
// having a ".", such as a simple <Module>.<Function>.<Arity>, or an
|
||||||
|
// "_", such as the BIF "suspend_0") as they are executed on another
|
||||||
|
// stack.
|
||||||
|
if ((F->getName().find("erlang.") != std::string::npos) ||
|
||||||
|
(F->getName().find("bif_") != std::string::npos)) continue;
|
||||||
|
if (F->getName().find_first_of("._") == std::string::npos)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
const uint64_t CalleeStkArity =
|
||||||
|
std::max<int64_t>(0, F->arg_size() - CCRegisteredArgs);
|
||||||
|
MoreStackForCalls = std::max<int64_t>(
|
||||||
|
MoreStackForCalls, (HipeLeafWords - 1 - CalleeStkArity) * SlotSize);
|
||||||
|
}
|
||||||
|
MaxStack += MoreStackForCalls;
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the stack frame needed is larger than the guaranteed then runtime checks
|
||||||
|
// and calls to "inc_stack_0" BIF should be inserted in the assembly prologue.
|
||||||
|
if (MaxStack > Guaranteed) {
|
||||||
|
MachineBasicBlock &prologueMBB = MF.front();
|
||||||
|
MachineBasicBlock *stackCheckMBB = MF.CreateMachineBasicBlock();
|
||||||
|
MachineBasicBlock *incStackMBB = MF.CreateMachineBasicBlock();
|
||||||
|
|
||||||
|
for (MachineBasicBlock::livein_iterator I = prologueMBB.livein_begin(),
|
||||||
|
E = prologueMBB.livein_end(); I != E; I++) {
|
||||||
|
stackCheckMBB->addLiveIn(*I);
|
||||||
|
incStackMBB->addLiveIn(*I);
|
||||||
|
}
|
||||||
|
|
||||||
|
MF.push_front(incStackMBB);
|
||||||
|
MF.push_front(stackCheckMBB);
|
||||||
|
|
||||||
|
unsigned ScratchReg, SPReg, PReg, SPLimitOffset;
|
||||||
|
unsigned LEAop, CMPop, CALLop;
|
||||||
|
if (Is64Bit) {
|
||||||
|
SPReg = X86::RSP;
|
||||||
|
PReg = X86::RBP;
|
||||||
|
LEAop = X86::LEA64r;
|
||||||
|
CMPop = X86::CMP64rm;
|
||||||
|
CALLop = X86::CALL64pcrel32;
|
||||||
|
SPLimitOffset = 0x90;
|
||||||
|
} else {
|
||||||
|
SPReg = X86::ESP;
|
||||||
|
PReg = X86::EBP;
|
||||||
|
LEAop = X86::LEA32r;
|
||||||
|
CMPop = X86::CMP32rm;
|
||||||
|
CALLop = X86::CALLpcrel32;
|
||||||
|
SPLimitOffset = 0x4c;
|
||||||
|
}
|
||||||
|
|
||||||
|
ScratchReg = GetScratchRegister(Is64Bit, MF, true);
|
||||||
|
assert(!MF.getRegInfo().isLiveIn(ScratchReg) &&
|
||||||
|
"HiPE prologue scratch register is live-in");
|
||||||
|
|
||||||
|
// Create new MBB for StackCheck:
|
||||||
|
addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(LEAop), ScratchReg),
|
||||||
|
SPReg, false, -MaxStack);
|
||||||
|
// SPLimitOffset is in a fixed heap location (pointed by BP).
|
||||||
|
addRegOffset(BuildMI(stackCheckMBB, DL, TII.get(CMPop))
|
||||||
|
.addReg(ScratchReg), PReg, false, SPLimitOffset);
|
||||||
|
BuildMI(stackCheckMBB, DL, TII.get(X86::JAE_4)).addMBB(&prologueMBB);
|
||||||
|
|
||||||
|
// Create new MBB for IncStack:
|
||||||
|
BuildMI(incStackMBB, DL, TII.get(CALLop)).
|
||||||
|
addExternalSymbol("inc_stack_0");
|
||||||
|
addRegOffset(BuildMI(incStackMBB, DL, TII.get(LEAop), ScratchReg),
|
||||||
|
SPReg, false, -MaxStack);
|
||||||
|
addRegOffset(BuildMI(incStackMBB, DL, TII.get(CMPop))
|
||||||
|
.addReg(ScratchReg), PReg, false, SPLimitOffset);
|
||||||
|
BuildMI(incStackMBB, DL, TII.get(X86::JLE_4)).addMBB(incStackMBB);
|
||||||
|
|
||||||
|
stackCheckMBB->addSuccessor(&prologueMBB, 99);
|
||||||
|
stackCheckMBB->addSuccessor(incStackMBB, 1);
|
||||||
|
incStackMBB->addSuccessor(&prologueMBB, 99);
|
||||||
|
incStackMBB->addSuccessor(incStackMBB, 1);
|
||||||
|
}
|
||||||
|
#ifdef XDEBUG
|
||||||
|
MF.verify();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
@ -43,6 +43,8 @@ public:
|
|||||||
|
|
||||||
void adjustForSegmentedStacks(MachineFunction &MF) const;
|
void adjustForSegmentedStacks(MachineFunction &MF) const;
|
||||||
|
|
||||||
|
void adjustForHiPEPrologue(MachineFunction &MF) const;
|
||||||
|
|
||||||
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
|
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
|
||||||
RegScavenger *RS = NULL) const;
|
RegScavenger *RS = NULL) const;
|
||||||
|
|
||||||
|
67
test/CodeGen/X86/hipe-prologue.ll
Normal file
67
test/CodeGen/X86/hipe-prologue.ll
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
; RUN: llc < %s -mcpu=generic -mtriple=i686-linux -verify-machineinstrs | FileCheck %s -check-prefix=X32-Linux
|
||||||
|
; RUN: llc < %s -mtriple=x86_64-linux-gnu -verify-machineinstrs | FileCheck %s -check-prefix=X64-Linux
|
||||||
|
|
||||||
|
; The HiPE compiler (i.e., the native code compiler of the Erlang/OTP system)
|
||||||
|
; adds a custom assembly prologue in order to efficiently manipulate the stack
|
||||||
|
; at runtime.
|
||||||
|
|
||||||
|
; Just to prevent the alloca from being optimized away.
|
||||||
|
declare void @dummy_use(i32*, i32)
|
||||||
|
|
||||||
|
define {i32, i32} @test_basic(i32 %hp, i32 %p) {
|
||||||
|
; X32-Linux: test_basic:
|
||||||
|
; X32-Linux-NOT: calll inc_stack_0
|
||||||
|
|
||||||
|
; X64-Linux: test_basic:
|
||||||
|
; X64-Linux-NOT: callq inc_stack_0
|
||||||
|
|
||||||
|
%mem = alloca i32, i32 10
|
||||||
|
call void @dummy_use (i32* %mem, i32 10)
|
||||||
|
%1 = insertvalue {i32, i32} undef, i32 %hp, 0
|
||||||
|
%2 = insertvalue {i32, i32} %1, i32 %p, 1
|
||||||
|
ret {i32, i32} %1
|
||||||
|
}
|
||||||
|
|
||||||
|
define cc 11 {i32, i32} @test_basic_hipecc(i32 %hp, i32 %p) {
|
||||||
|
; X32-Linux: test_basic_hipecc:
|
||||||
|
; X32-Linux: leal -156(%esp), %ebx
|
||||||
|
; X32-Linux-NEXT: cmpl 76(%ebp), %ebx
|
||||||
|
; X32-Linux-NEXT: jb .LBB1_1
|
||||||
|
|
||||||
|
; X32-Linux: ret
|
||||||
|
|
||||||
|
; X32-Linux: .LBB1_1:
|
||||||
|
; X32-Linux-NEXT: calll inc_stack_0
|
||||||
|
|
||||||
|
; X64-Linux: test_basic_hipecc:
|
||||||
|
; X64-Linux: leaq -232(%rsp), %r14
|
||||||
|
; X64-Linux-NEXT: cmpq 144(%rbp), %r14
|
||||||
|
; X64-Linux-NEXT: jb .LBB1_1
|
||||||
|
|
||||||
|
; X64-Linux: ret
|
||||||
|
|
||||||
|
; X64-Linux: .LBB1_1:
|
||||||
|
; X64-Linux-NEXT: callq inc_stack_0
|
||||||
|
|
||||||
|
%mem = alloca i32, i32 10
|
||||||
|
call void @dummy_use (i32* %mem, i32 10)
|
||||||
|
%1 = insertvalue {i32, i32} undef, i32 %hp, 0
|
||||||
|
%2 = insertvalue {i32, i32} %1, i32 %p, 1
|
||||||
|
ret {i32, i32} %2
|
||||||
|
}
|
||||||
|
|
||||||
|
define cc 11 {i32,i32,i32} @test_nocall_hipecc(i32 %hp,i32 %p,i32 %x,i32 %y) {
|
||||||
|
; X32-Linux: test_nocall_hipecc:
|
||||||
|
; X32-Linux-NOT: calll inc_stack_0
|
||||||
|
|
||||||
|
; X64-Linux: test_nocall_hipecc:
|
||||||
|
; X64-Linux-NOT: callq inc_stack_0
|
||||||
|
|
||||||
|
%1 = add i32 %x, %y
|
||||||
|
%2 = mul i32 42, %1
|
||||||
|
%3 = sub i32 24, %2
|
||||||
|
%4 = insertvalue {i32, i32, i32} undef, i32 %hp, 0
|
||||||
|
%5 = insertvalue {i32, i32, i32} %4, i32 %p, 1
|
||||||
|
%6 = insertvalue {i32, i32, i32} %5, i32 %p, 2
|
||||||
|
ret {i32, i32, i32} %6
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user