mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-13 20:32:21 +00:00
Unbreak Win64 CC. Step one: honour register save area, fix some alignment and provide a different set of call-clobberred registers.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77962 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
3e4c41a84a
commit
cf6b739d3d
@ -201,8 +201,8 @@ def CC_X86_Win64_C : CallingConv<[
|
||||
[XMM0, XMM1, XMM2, XMM3]>>,
|
||||
|
||||
// Integer/FP values get stored in stack slots that are 8 bytes in size and
|
||||
// 16-byte aligned if there are no more registers to hold them.
|
||||
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 16>>,
|
||||
// 8-byte aligned if there are no more registers to hold them.
|
||||
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
|
||||
|
||||
// Long doubles get stack slots whose size and alignment depends on the
|
||||
// subtarget.
|
||||
@ -211,8 +211,8 @@ def CC_X86_Win64_C : CallingConv<[
|
||||
// Vectors get 16-byte stack slots that are 16-byte aligned.
|
||||
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
|
||||
|
||||
// __m64 vectors get 8-byte stack slots that are 16-byte aligned.
|
||||
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>>
|
||||
// __m64 vectors get 8-byte stack slots that are 8-byte aligned.
|
||||
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
|
||||
]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -15,52 +15,52 @@ extrn X86CompilationCallback2: PROC
|
||||
|
||||
.code
|
||||
X86CompilationCallback proc
|
||||
; Save all int arg registers into register spill area.
|
||||
mov [rsp+ 8], rcx
|
||||
mov [rsp+16], rdx
|
||||
mov [rsp+24], r8
|
||||
mov [rsp+32], r9
|
||||
|
||||
push rbp
|
||||
|
||||
; Save RSP
|
||||
; Save RSP.
|
||||
mov rbp, rsp
|
||||
|
||||
; Save all int arg registers
|
||||
push rcx
|
||||
push rdx
|
||||
push r8
|
||||
push r9
|
||||
|
||||
; Align stack on 16-byte boundary.
|
||||
and rsp, -16
|
||||
|
||||
; Save all XMM arg registers
|
||||
sub rsp, 64
|
||||
movaps [rsp], xmm0
|
||||
movaps [rsp+16], xmm1
|
||||
movaps [rsp+32], xmm2
|
||||
movaps [rsp+48], xmm3
|
||||
; Save all XMM arg registers. Also allocate reg spill area.
|
||||
sub rsp, 96
|
||||
movaps [rsp +32], xmm0
|
||||
movaps [rsp+16+32], xmm1
|
||||
movaps [rsp+32+32], xmm2
|
||||
movaps [rsp+48+32], xmm3
|
||||
|
||||
; JIT callee
|
||||
|
||||
; Pass prev frame and return address
|
||||
; Pass prev frame and return address.
|
||||
mov rcx, rbp
|
||||
mov rdx, qword ptr [rbp+8]
|
||||
call X86CompilationCallback2
|
||||
|
||||
; Restore all XMM arg registers
|
||||
movaps xmm3, [rsp+48]
|
||||
movaps xmm2, [rsp+32]
|
||||
movaps xmm1, [rsp+16]
|
||||
movaps xmm0, [rsp]
|
||||
; Restore all XMM arg registers.
|
||||
movaps xmm3, [rsp+48+32]
|
||||
movaps xmm2, [rsp+32+32]
|
||||
movaps xmm1, [rsp+16+32]
|
||||
movaps xmm0, [rsp +32]
|
||||
|
||||
; Restore RSP
|
||||
; Restore RSP.
|
||||
mov rsp, rbp
|
||||
|
||||
; Restore all int arg registers
|
||||
sub rsp, 32
|
||||
pop r9
|
||||
pop r8
|
||||
pop rdx
|
||||
pop rcx
|
||||
|
||||
; Restore RBP
|
||||
; Restore RBP.
|
||||
pop rbp
|
||||
|
||||
; Restore all int arg registers.
|
||||
mov r9, [rsp+32]
|
||||
mov r8, [rsp+24]
|
||||
mov rdx, [rsp+16]
|
||||
mov rcx, [rsp+ 8]
|
||||
|
||||
ret
|
||||
X86CompilationCallback endp
|
||||
|
||||
|
@ -1228,7 +1228,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
|
||||
MVT::v2i64, InFlag).getValue(1);
|
||||
Val = Chain.getValue(0);
|
||||
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
|
||||
Val, DAG.getConstant(0, MVT::i64));
|
||||
Val, DAG.getConstant(0, MVT::i64));
|
||||
} else {
|
||||
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
|
||||
MVT::i64, InFlag).getValue(1);
|
||||
@ -1628,8 +1628,9 @@ X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
|
||||
const CCValAssign &VA,
|
||||
SDValue Chain,
|
||||
SDValue Arg, ISD::ArgFlagsTy Flags) {
|
||||
const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
|
||||
DebugLoc dl = TheCall->getDebugLoc();
|
||||
unsigned LocMemOffset = VA.getLocMemOffset();
|
||||
unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
|
||||
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
|
||||
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
|
||||
if (Flags.isByVal()) {
|
||||
|
@ -128,13 +128,37 @@ let isCall = 1 in
|
||||
def CALL64pcrel32 : Ii32<0xE8, RawFrm,
|
||||
(outs), (ins i64i32imm_pcrel:$dst, variable_ops),
|
||||
"call\t$dst", []>,
|
||||
Requires<[In64BitMode]>;
|
||||
Requires<[In64BitMode, NotWin64]>;
|
||||
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
|
||||
"call\t{*}$dst", [(X86call GR64:$dst)]>;
|
||||
"call\t{*}$dst", [(X86call GR64:$dst)]>,
|
||||
Requires<[NotWin64]>;
|
||||
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
|
||||
"call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>;
|
||||
"call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
|
||||
Requires<[NotWin64]>;
|
||||
}
|
||||
|
||||
// FIXME: We need to teach codegen about single list of call-clobbered registers.
|
||||
let isCall = 1 in
|
||||
// All calls clobber the non-callee saved registers. RSP is marked as
|
||||
// a use to prevent stack-pointer assignments that appear immediately
|
||||
// before calls from potentially appearing dead. Uses for argument
|
||||
// registers are added manually.
|
||||
let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
|
||||
FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
|
||||
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
|
||||
Uses = [RSP] in {
|
||||
def WINCALL64pcrel32 : I<0xE8, RawFrm,
|
||||
(outs), (ins i64i32imm:$dst, variable_ops),
|
||||
"call\t${dst:call}", [(X86call imm:$dst)]>,
|
||||
Requires<[IsWin64]>;
|
||||
def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
|
||||
"call\t{*}$dst",
|
||||
[(X86call GR64:$dst)]>, Requires<[IsWin64]>;
|
||||
def WINCALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
|
||||
"call\t{*}$dst",
|
||||
[(X86call (loadi64 addr:$dst))]>, Requires<[IsWin64]>;
|
||||
}
|
||||
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
@ -1495,9 +1519,14 @@ def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
|
||||
// Direct PC relative function call for small code model. 32-bit displacement
|
||||
// sign extended to 64-bit.
|
||||
def : Pat<(X86call (i64 tglobaladdr:$dst)),
|
||||
(CALL64pcrel32 tglobaladdr:$dst)>;
|
||||
(CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
|
||||
def : Pat<(X86call (i64 texternalsym:$dst)),
|
||||
(CALL64pcrel32 texternalsym:$dst)>;
|
||||
(CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
|
||||
|
||||
def : Pat<(X86call (i64 tglobaladdr:$dst)),
|
||||
(WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
|
||||
def : Pat<(X86call (i64 texternalsym:$dst)),
|
||||
(WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
|
||||
|
||||
def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
|
||||
(CALL64pcrel32 tglobaladdr:$dst)>;
|
||||
|
@ -254,6 +254,8 @@ def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
|
||||
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
|
||||
def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
|
||||
def In64BitMode : Predicate<"Subtarget->is64Bit()">;
|
||||
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
|
||||
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
|
||||
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
|
||||
def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
|
||||
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;
|
||||
|
@ -484,15 +484,18 @@ bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
|
||||
|
||||
int
|
||||
X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
|
||||
int Offset = MF.getFrameInfo()->getObjectOffset(FI) + SlotSize;
|
||||
uint64_t StackSize = MF.getFrameInfo()->getStackSize();
|
||||
const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
|
||||
int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea();
|
||||
uint64_t StackSize = MFI->getStackSize();
|
||||
|
||||
if (needsStackRealignment(MF)) {
|
||||
if (FI < 0)
|
||||
// Skip the saved EBP
|
||||
Offset += SlotSize;
|
||||
else {
|
||||
unsigned Align = MF.getFrameInfo()->getObjectAlignment(FI);
|
||||
unsigned Align = MFI->getObjectAlignment(FI);
|
||||
assert( (-(Offset + StackSize)) % Align == 0);
|
||||
Align = 0;
|
||||
return Offset + StackSize;
|
||||
@ -622,14 +625,14 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
void
|
||||
X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
|
||||
RegScavenger *RS) const {
|
||||
MachineFrameInfo *FFI = MF.getFrameInfo();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
|
||||
// Calculate and set max stack object alignment early, so we can decide
|
||||
// whether we will need stack realignment (and thus FP).
|
||||
unsigned MaxAlign = std::max(FFI->getMaxAlignment(),
|
||||
calculateMaxStackAlignment(FFI));
|
||||
unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
|
||||
calculateMaxStackAlignment(MFI));
|
||||
|
||||
FFI->setMaxAlignment(MaxAlign);
|
||||
MFI->setMaxAlignment(MaxAlign);
|
||||
|
||||
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
|
||||
int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
|
||||
@ -643,18 +646,18 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
|
||||
// ...
|
||||
// }
|
||||
// [EBP]
|
||||
MF.getFrameInfo()->
|
||||
CreateFixedObject(-TailCallReturnAddrDelta,
|
||||
(-1*SlotSize)+TailCallReturnAddrDelta);
|
||||
MFI->CreateFixedObject(-TailCallReturnAddrDelta,
|
||||
(-1*SlotSize)+TailCallReturnAddrDelta);
|
||||
}
|
||||
|
||||
if (hasFP(MF)) {
|
||||
assert((TailCallReturnAddrDelta <= 0) &&
|
||||
"The Delta should always be zero or negative");
|
||||
// Create a frame entry for the EBP register that must be saved.
|
||||
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,
|
||||
(int)SlotSize * -2+
|
||||
TailCallReturnAddrDelta);
|
||||
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
|
||||
int FrameIdx = MFI->CreateFixedObject(SlotSize,
|
||||
(int)SlotSize * -2+
|
||||
TailCallReturnAddrDelta);
|
||||
assert(FrameIdx == MFI->getObjectIndexBegin() &&
|
||||
"Slot for EBP register must be last in order to be found!");
|
||||
FrameIdx = 0;
|
||||
}
|
||||
@ -887,6 +890,11 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
|
||||
StackSize = std::max(MinSize,
|
||||
StackSize > 128 ? StackSize - 128 : 0);
|
||||
MFI->setStackSize(StackSize);
|
||||
} else if (Subtarget->isTargetWin64()) {
|
||||
// We need to always allocate 32 bytes as register spill area.
|
||||
// FIXME: we might reuse these 32 bytes for leaf functions.
|
||||
StackSize += 32;
|
||||
MFI->setStackSize(StackSize);
|
||||
}
|
||||
|
||||
// Insert stack pointer adjustment for later moving of return addr. Only
|
||||
|
@ -64,7 +64,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
|
||||
Subtarget(TT, FS, is64Bit),
|
||||
DataLayout(Subtarget.getDataLayout()),
|
||||
FrameInfo(TargetFrameInfo::StackGrowsDown,
|
||||
Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
|
||||
Subtarget.getStackAlignment(),
|
||||
(Subtarget.isTargetWin64() ? -40 :
|
||||
(Subtarget.is64Bit() ? -8 : -4))),
|
||||
InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) {
|
||||
DefRelocModel = getRelocationModel();
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llvm-as < %s | llc | grep "subq.*\\\$8, \\\%rsp"
|
||||
; RUN: llvm-as < %s | llc | grep "subq.*\\\$40, \\\%rsp"
|
||||
target triple = "x86_64-mingw64"
|
||||
|
||||
define x86_fp80 @a(i64 %x) nounwind readnone {
|
||||
|
@ -1,7 +1,7 @@
|
||||
; RUN: llvm-as < %s | llc -o %t1 -f
|
||||
; RUN: grep "subq.*\\\$40, \\\%rsp" %t1
|
||||
; RUN: grep "movaps \\\%xmm8, \\\(\\\%rsp\\\)" %t1
|
||||
; RUN: grep "movaps \\\%xmm7, 16\\\(\\\%rsp\\\)" %t1
|
||||
; RUN: grep "subq.*\\\$72, \\\%rsp" %t1
|
||||
; RUN: grep "movaps \\\%xmm8, 32\\\(\\\%rsp\\\)" %t1
|
||||
; RUN: grep "movaps \\\%xmm7, 48\\\(\\\%rsp\\\)" %t1
|
||||
target triple = "x86_64-mingw64"
|
||||
|
||||
define i32 @a() nounwind {
|
||||
|
Loading…
Reference in New Issue
Block a user