Unbreak Win64 CC. Step one: honour register save area, fix some alignment and provide a different set of call-clobberred registers.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@77962 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Anton Korobeynikov 2009-08-03 08:12:53 +00:00
parent 3e4c41a84a
commit cf6b739d3d
9 changed files with 100 additions and 58 deletions

View File

@ -201,8 +201,8 @@ def CC_X86_Win64_C : CallingConv<[
[XMM0, XMM1, XMM2, XMM3]>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 16-byte aligned if there are no more registers to hold them.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 16>>,
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
// Long doubles get stack slots whose size and alignment depends on the
// subtarget.
@ -211,8 +211,8 @@ def CC_X86_Win64_C : CallingConv<[
// Vectors get 16-byte stack slots that are 16-byte aligned.
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
// __m64 vectors get 8-byte stack slots that are 16-byte aligned.
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 16>>
// __m64 vectors get 8-byte stack slots that are 8-byte aligned.
CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
]>;
//===----------------------------------------------------------------------===//

View File

@ -15,52 +15,52 @@ extrn X86CompilationCallback2: PROC
.code
X86CompilationCallback proc
; Save all int arg registers into register spill area.
mov [rsp+ 8], rcx
mov [rsp+16], rdx
mov [rsp+24], r8
mov [rsp+32], r9
push rbp
; Save RSP
; Save RSP.
mov rbp, rsp
; Save all int arg registers
push rcx
push rdx
push r8
push r9
; Align stack on 16-byte boundary.
and rsp, -16
; Save all XMM arg registers
sub rsp, 64
movaps [rsp], xmm0
movaps [rsp+16], xmm1
movaps [rsp+32], xmm2
movaps [rsp+48], xmm3
; Save all XMM arg registers. Also allocate reg spill area.
sub rsp, 96
movaps [rsp +32], xmm0
movaps [rsp+16+32], xmm1
movaps [rsp+32+32], xmm2
movaps [rsp+48+32], xmm3
; JIT callee
; Pass prev frame and return address
; Pass prev frame and return address.
mov rcx, rbp
mov rdx, qword ptr [rbp+8]
call X86CompilationCallback2
; Restore all XMM arg registers
movaps xmm3, [rsp+48]
movaps xmm2, [rsp+32]
movaps xmm1, [rsp+16]
movaps xmm0, [rsp]
; Restore all XMM arg registers.
movaps xmm3, [rsp+48+32]
movaps xmm2, [rsp+32+32]
movaps xmm1, [rsp+16+32]
movaps xmm0, [rsp +32]
; Restore RSP
; Restore RSP.
mov rsp, rbp
; Restore all int arg registers
sub rsp, 32
pop r9
pop r8
pop rdx
pop rcx
; Restore RBP
; Restore RBP.
pop rbp
; Restore all int arg registers.
mov r9, [rsp+32]
mov r8, [rsp+24]
mov rdx, [rsp+16]
mov rcx, [rsp+ 8]
ret
X86CompilationCallback endp

View File

@ -1228,7 +1228,7 @@ LowerCallResult(SDValue Chain, SDValue InFlag, CallSDNode *TheCall,
MVT::v2i64, InFlag).getValue(1);
Val = Chain.getValue(0);
Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
Val, DAG.getConstant(0, MVT::i64));
Val, DAG.getConstant(0, MVT::i64));
} else {
Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(),
MVT::i64, InFlag).getValue(1);
@ -1628,8 +1628,9 @@ X86TargetLowering::LowerMemOpCallTo(CallSDNode *TheCall, SelectionDAG &DAG,
const CCValAssign &VA,
SDValue Chain,
SDValue Arg, ISD::ArgFlagsTy Flags) {
const unsigned FirstStackArgOffset = (Subtarget->isTargetWin64() ? 32 : 0);
DebugLoc dl = TheCall->getDebugLoc();
unsigned LocMemOffset = VA.getLocMemOffset();
unsigned LocMemOffset = FirstStackArgOffset + VA.getLocMemOffset();
SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset);
PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr, PtrOff);
if (Flags.isByVal()) {

View File

@ -128,13 +128,37 @@ let isCall = 1 in
def CALL64pcrel32 : Ii32<0xE8, RawFrm,
(outs), (ins i64i32imm_pcrel:$dst, variable_ops),
"call\t$dst", []>,
Requires<[In64BitMode]>;
Requires<[In64BitMode, NotWin64]>;
def CALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
"call\t{*}$dst", [(X86call GR64:$dst)]>;
"call\t{*}$dst", [(X86call GR64:$dst)]>,
Requires<[NotWin64]>;
def CALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
"call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>;
"call\t{*}$dst", [(X86call (loadi64 addr:$dst))]>,
Requires<[NotWin64]>;
}
// FIXME: We need to teach codegen about single list of call-clobbered registers.
let isCall = 1 in
// All calls clobber the non-callee saved registers. RSP is marked as
// a use to prevent stack-pointer assignments that appear immediately
// before calls from potentially appearing dead. Uses for argument
// registers are added manually.
let Defs = [RAX, RCX, RDX, R8, R9, R10, R11,
FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, EFLAGS],
Uses = [RSP] in {
def WINCALL64pcrel32 : I<0xE8, RawFrm,
(outs), (ins i64i32imm:$dst, variable_ops),
"call\t${dst:call}", [(X86call imm:$dst)]>,
Requires<[IsWin64]>;
def WINCALL64r : I<0xFF, MRM2r, (outs), (ins GR64:$dst, variable_ops),
"call\t{*}$dst",
[(X86call GR64:$dst)]>, Requires<[IsWin64]>;
def WINCALL64m : I<0xFF, MRM2m, (outs), (ins i64mem:$dst, variable_ops),
"call\t{*}$dst",
[(X86call (loadi64 addr:$dst))]>, Requires<[IsWin64]>;
}
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
@ -1495,9 +1519,14 @@ def : Pat<(store (i64 (X86Wrapper texternalsym:$src)), addr:$dst),
// Direct PC relative function call for small code model. 32-bit displacement
// sign extended to 64-bit.
def : Pat<(X86call (i64 tglobaladdr:$dst)),
(CALL64pcrel32 tglobaladdr:$dst)>;
(CALL64pcrel32 tglobaladdr:$dst)>, Requires<[NotWin64]>;
def : Pat<(X86call (i64 texternalsym:$dst)),
(CALL64pcrel32 texternalsym:$dst)>;
(CALL64pcrel32 texternalsym:$dst)>, Requires<[NotWin64]>;
def : Pat<(X86call (i64 tglobaladdr:$dst)),
(WINCALL64pcrel32 tglobaladdr:$dst)>, Requires<[IsWin64]>;
def : Pat<(X86call (i64 texternalsym:$dst)),
(WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
def : Pat<(X86tailcall (i64 tglobaladdr:$dst)),
(CALL64pcrel32 tglobaladdr:$dst)>;

View File

@ -254,6 +254,8 @@ def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">;
def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">;
def In32BitMode : Predicate<"!Subtarget->is64Bit()">;
def In64BitMode : Predicate<"Subtarget->is64Bit()">;
def IsWin64 : Predicate<"Subtarget->isTargetWin64()">;
def NotWin64 : Predicate<"!Subtarget->isTargetWin64()">;
def SmallCode : Predicate<"TM.getCodeModel() == CodeModel::Small">;
def NotSmallCode : Predicate<"TM.getCodeModel() != CodeModel::Small">;
def IsStatic : Predicate<"TM.getRelocationModel() == Reloc::Static">;

View File

@ -484,15 +484,18 @@ bool X86RegisterInfo::hasReservedSpillSlot(MachineFunction &MF, unsigned Reg,
int
X86RegisterInfo::getFrameIndexOffset(MachineFunction &MF, int FI) const {
int Offset = MF.getFrameInfo()->getObjectOffset(FI) + SlotSize;
uint64_t StackSize = MF.getFrameInfo()->getStackSize();
const TargetFrameInfo &TFI = *MF.getTarget().getFrameInfo();
MachineFrameInfo *MFI = MF.getFrameInfo();
int Offset = MFI->getObjectOffset(FI) - TFI.getOffsetOfLocalArea();
uint64_t StackSize = MFI->getStackSize();
if (needsStackRealignment(MF)) {
if (FI < 0)
// Skip the saved EBP
Offset += SlotSize;
else {
unsigned Align = MF.getFrameInfo()->getObjectAlignment(FI);
unsigned Align = MFI->getObjectAlignment(FI);
assert( (-(Offset + StackSize)) % Align == 0);
Align = 0;
return Offset + StackSize;
@ -622,14 +625,14 @@ void X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
void
X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const {
MachineFrameInfo *FFI = MF.getFrameInfo();
MachineFrameInfo *MFI = MF.getFrameInfo();
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
unsigned MaxAlign = std::max(FFI->getMaxAlignment(),
calculateMaxStackAlignment(FFI));
unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
calculateMaxStackAlignment(MFI));
FFI->setMaxAlignment(MaxAlign);
MFI->setMaxAlignment(MaxAlign);
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@ -643,18 +646,18 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// ...
// }
// [EBP]
MF.getFrameInfo()->
CreateFixedObject(-TailCallReturnAddrDelta,
(-1*SlotSize)+TailCallReturnAddrDelta);
MFI->CreateFixedObject(-TailCallReturnAddrDelta,
(-1*SlotSize)+TailCallReturnAddrDelta);
}
if (hasFP(MF)) {
assert((TailCallReturnAddrDelta <= 0) &&
"The Delta should always be zero or negative");
// Create a frame entry for the EBP register that must be saved.
int FrameIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize,
(int)SlotSize * -2+
TailCallReturnAddrDelta);
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
int FrameIdx = MFI->CreateFixedObject(SlotSize,
(int)SlotSize * -2+
TailCallReturnAddrDelta);
assert(FrameIdx == MFI->getObjectIndexBegin() &&
"Slot for EBP register must be last in order to be found!");
FrameIdx = 0;
}
@ -887,6 +890,11 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
StackSize = std::max(MinSize,
StackSize > 128 ? StackSize - 128 : 0);
MFI->setStackSize(StackSize);
} else if (Subtarget->isTargetWin64()) {
// We need to always allocate 32 bytes as register spill area.
// FIXME: we might reuse these 32 bytes for leaf functions.
StackSize += 32;
MFI->setStackSize(StackSize);
}
// Insert stack pointer adjustment for later moving of return addr. Only

View File

@ -64,7 +64,9 @@ X86TargetMachine::X86TargetMachine(const Target &T, const std::string &TT,
Subtarget(TT, FS, is64Bit),
DataLayout(Subtarget.getDataLayout()),
FrameInfo(TargetFrameInfo::StackGrowsDown,
Subtarget.getStackAlignment(), Subtarget.is64Bit() ? -8 : -4),
Subtarget.getStackAlignment(),
(Subtarget.isTargetWin64() ? -40 :
(Subtarget.is64Bit() ? -8 : -4))),
InstrInfo(*this), JITInfo(*this), TLInfo(*this), ELFWriterInfo(*this) {
DefRelocModel = getRelocationModel();

View File

@ -1,4 +1,4 @@
; RUN: llvm-as < %s | llc | grep "subq.*\\\$8, \\\%rsp"
; RUN: llvm-as < %s | llc | grep "subq.*\\\$40, \\\%rsp"
target triple = "x86_64-mingw64"
define x86_fp80 @a(i64 %x) nounwind readnone {

View File

@ -1,7 +1,7 @@
; RUN: llvm-as < %s | llc -o %t1 -f
; RUN: grep "subq.*\\\$40, \\\%rsp" %t1
; RUN: grep "movaps \\\%xmm8, \\\(\\\%rsp\\\)" %t1
; RUN: grep "movaps \\\%xmm7, 16\\\(\\\%rsp\\\)" %t1
; RUN: grep "subq.*\\\$72, \\\%rsp" %t1
; RUN: grep "movaps \\\%xmm8, 32\\\(\\\%rsp\\\)" %t1
; RUN: grep "movaps \\\%xmm7, 48\\\(\\\%rsp\\\)" %t1
target triple = "x86_64-mingw64"
define i32 @a() nounwind {