mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-07-05 13:29:22 +00:00
Add support for dynamic stack realignment in the presence of dynamic allocas on
X86. rdar://11496434 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158087 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8b421c8eb2
commit
a97b180fc4
@ -650,6 +650,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
|
|||||||
unsigned SlotSize = RegInfo->getSlotSize();
|
unsigned SlotSize = RegInfo->getSlotSize();
|
||||||
unsigned FramePtr = RegInfo->getFrameRegister(MF);
|
unsigned FramePtr = RegInfo->getFrameRegister(MF);
|
||||||
unsigned StackPtr = RegInfo->getStackRegister();
|
unsigned StackPtr = RegInfo->getStackRegister();
|
||||||
|
unsigned BasePtr = RegInfo->getBaseRegister();
|
||||||
DebugLoc DL;
|
DebugLoc DL;
|
||||||
|
|
||||||
// If we're forcing a stack realignment we can't rely on just the frame
|
// If we're forcing a stack realignment we can't rely on just the frame
|
||||||
@ -913,6 +914,18 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF) const {
|
|||||||
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
|
emitSPUpdate(MBB, MBBI, StackPtr, -(int64_t)NumBytes, Is64Bit,
|
||||||
UseLEA, TII, *RegInfo);
|
UseLEA, TII, *RegInfo);
|
||||||
|
|
||||||
|
// If we need a base pointer, set it up here. It's whatever the value
|
||||||
|
// of the stack pointer is at this point. Any variable size objects
|
||||||
|
// will be allocated after this, so we can still use the base pointer
|
||||||
|
// to reference locals.
|
||||||
|
if (RegInfo->hasBasePointer(MF)) {
|
||||||
|
// Update the frame pointer with the current stack pointer.
|
||||||
|
unsigned Opc = Is64Bit ? X86::MOV64rr : X86::MOV32rr;
|
||||||
|
BuildMI(MBB, MBBI, DL, TII.get(Opc), BasePtr)
|
||||||
|
.addReg(StackPtr)
|
||||||
|
.setMIFlag(MachineInstr::FrameSetup);
|
||||||
|
}
|
||||||
|
|
||||||
if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
|
if (( (!HasFP && NumBytes) || PushedRegs) && needsFrameMoves) {
|
||||||
// Mark end of stack pointer adjustment.
|
// Mark end of stack pointer adjustment.
|
||||||
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
|
MCSymbol *Label = MMI.getContext().CreateTempSymbol();
|
||||||
@ -1148,7 +1161,16 @@ int X86FrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) con
|
|||||||
int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
|
int Offset = MFI->getObjectOffset(FI) - getOffsetOfLocalArea();
|
||||||
uint64_t StackSize = MFI->getStackSize();
|
uint64_t StackSize = MFI->getStackSize();
|
||||||
|
|
||||||
if (RegInfo->needsStackRealignment(MF)) {
|
if (RegInfo->hasBasePointer(MF)) {
|
||||||
|
assert (hasFP(MF) && "VLAs and dynamic stack realign, but no FP?!");
|
||||||
|
if (FI < 0) {
|
||||||
|
// Skip the saved EBP.
|
||||||
|
return Offset + RegInfo->getSlotSize();
|
||||||
|
} else {
|
||||||
|
assert((-(Offset + StackSize)) % MFI->getObjectAlignment(FI) == 0);
|
||||||
|
return Offset + StackSize;
|
||||||
|
}
|
||||||
|
} else if (RegInfo->needsStackRealignment(MF)) {
|
||||||
if (FI < 0) {
|
if (FI < 0) {
|
||||||
// Skip the saved EBP.
|
// Skip the saved EBP.
|
||||||
return Offset + RegInfo->getSlotSize();
|
return Offset + RegInfo->getSlotSize();
|
||||||
@ -1179,9 +1201,14 @@ int X86FrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
|
|||||||
const X86RegisterInfo *RegInfo =
|
const X86RegisterInfo *RegInfo =
|
||||||
static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
|
static_cast<const X86RegisterInfo*>(MF.getTarget().getRegisterInfo());
|
||||||
// We can't calculate offset from frame pointer if the stack is realigned,
|
// We can't calculate offset from frame pointer if the stack is realigned,
|
||||||
// so enforce usage of stack pointer.
|
// so enforce usage of stack/base pointer. The base pointer is used when we
|
||||||
FrameReg = (RegInfo->needsStackRealignment(MF)) ?
|
// have dynamic allocas in addition to dynamic realignment.
|
||||||
RegInfo->getStackRegister() : RegInfo->getFrameRegister(MF);
|
if (RegInfo->hasBasePointer(MF))
|
||||||
|
FrameReg = RegInfo->getBaseRegister();
|
||||||
|
else if (RegInfo->needsStackRealignment(MF))
|
||||||
|
FrameReg = RegInfo->getStackRegister();
|
||||||
|
else
|
||||||
|
FrameReg = RegInfo->getFrameRegister(MF);
|
||||||
return getFrameIndexOffset(MF, FI);
|
return getFrameIndexOffset(MF, FI);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1318,6 +1345,10 @@ X86FrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
|
|||||||
"Slot for EBP register must be last in order to be found!");
|
"Slot for EBP register must be last in order to be found!");
|
||||||
(void)FrameIdx;
|
(void)FrameIdx;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Spill the BasePtr if it's used.
|
||||||
|
if (RegInfo->hasBasePointer(MF))
|
||||||
|
MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister());
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool
|
static bool
|
||||||
|
@ -50,6 +50,10 @@ ForceStackAlign("force-align-stack",
|
|||||||
" needed for the function."),
|
" needed for the function."),
|
||||||
cl::init(false), cl::Hidden);
|
cl::init(false), cl::Hidden);
|
||||||
|
|
||||||
|
cl::opt<bool>
|
||||||
|
EnableBasePointer("x86-use-base-pointer", cl::Hidden, cl::init(true),
|
||||||
|
cl::desc("Enable use of a base pointer for complex stack frames"));
|
||||||
|
|
||||||
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
|
X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
|
||||||
const TargetInstrInfo &tii)
|
const TargetInstrInfo &tii)
|
||||||
: X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit()
|
: X86GenRegisterInfo(tm.getSubtarget<X86Subtarget>().is64Bit()
|
||||||
@ -68,10 +72,12 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
|
|||||||
SlotSize = 8;
|
SlotSize = 8;
|
||||||
StackPtr = X86::RSP;
|
StackPtr = X86::RSP;
|
||||||
FramePtr = X86::RBP;
|
FramePtr = X86::RBP;
|
||||||
|
BasePtr = X86::RBX;
|
||||||
} else {
|
} else {
|
||||||
SlotSize = 4;
|
SlotSize = 4;
|
||||||
StackPtr = X86::ESP;
|
StackPtr = X86::ESP;
|
||||||
FramePtr = X86::EBP;
|
FramePtr = X86::EBP;
|
||||||
|
BasePtr = X86::EBX;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -290,6 +296,20 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
|||||||
Reserved.set(*I);
|
Reserved.set(*I);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set the base-pointer register and its aliases as reserved if needed.
|
||||||
|
if (hasBasePointer(MF)) {
|
||||||
|
CallingConv::ID CC = MF.getFunction()->getCallingConv();
|
||||||
|
const uint32_t* RegMask = getCallPreservedMask(CC);
|
||||||
|
if (MachineOperand::clobbersPhysReg(RegMask, getBaseRegister()))
|
||||||
|
report_fatal_error(
|
||||||
|
"Stack realignment in presence of dynamic allocas is not supported with"
|
||||||
|
"this calling convention.");
|
||||||
|
|
||||||
|
Reserved.set(getBaseRegister());
|
||||||
|
for (MCSubRegIterator I(getBaseRegister(), this); I.isValid(); ++I)
|
||||||
|
Reserved.set(*I);
|
||||||
|
}
|
||||||
|
|
||||||
// Mark the segment registers as reserved.
|
// Mark the segment registers as reserved.
|
||||||
Reserved.set(X86::CS);
|
Reserved.set(X86::CS);
|
||||||
Reserved.set(X86::SS);
|
Reserved.set(X86::SS);
|
||||||
@ -340,10 +360,35 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
|||||||
// Stack Frame Processing methods
|
// Stack Frame Processing methods
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
|
||||||
|
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||||
|
|
||||||
|
if (!EnableBasePointer)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// When we need stack realignment and there are dynamic allocas, we can't
|
||||||
|
// reference off of the stack pointer, so we reserve a base pointer.
|
||||||
|
if (needsStackRealignment(MF) && MFI->hasVarSizedObjects())
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
|
bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
|
||||||
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
const MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||||
return (MF.getTarget().Options.RealignStack &&
|
const MachineRegisterInfo *MRI = &MF.getRegInfo();
|
||||||
!MFI->hasVarSizedObjects());
|
if (!MF.getTarget().Options.RealignStack)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// Stack realignment requires a frame pointer. If we already started
|
||||||
|
// register allocation with frame pointer elimination, it is too late now.
|
||||||
|
if (!MRI->canReserveReg(FramePtr))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
// If base pointer is necessary. Check that it isn't too late to reserve it.
|
||||||
|
if (MFI->hasVarSizedObjects())
|
||||||
|
return MRI->canReserveReg(BasePtr);
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
|
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
|
||||||
@ -353,13 +398,6 @@ bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
|
|||||||
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
|
bool requiresRealignment = ((MFI->getMaxAlignment() > StackAlign) ||
|
||||||
F->hasFnAttr(Attribute::StackAlignment));
|
F->hasFnAttr(Attribute::StackAlignment));
|
||||||
|
|
||||||
// FIXME: Currently we don't support stack realignment for functions with
|
|
||||||
// variable-sized allocas.
|
|
||||||
// FIXME: It's more complicated than this...
|
|
||||||
if (0 && requiresRealignment && MFI->hasVarSizedObjects())
|
|
||||||
report_fatal_error(
|
|
||||||
"Stack realignment in presence of dynamic allocas is not supported");
|
|
||||||
|
|
||||||
// If we've requested that we force align the stack do so now.
|
// If we've requested that we force align the stack do so now.
|
||||||
if (ForceStackAlign)
|
if (ForceStackAlign)
|
||||||
return canRealignStack(MF);
|
return canRealignStack(MF);
|
||||||
@ -499,7 +537,9 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|||||||
|
|
||||||
unsigned Opc = MI.getOpcode();
|
unsigned Opc = MI.getOpcode();
|
||||||
bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
|
bool AfterFPPop = Opc == X86::TAILJMPm64 || Opc == X86::TAILJMPm;
|
||||||
if (needsStackRealignment(MF))
|
if (hasBasePointer(MF))
|
||||||
|
BasePtr = getBaseRegister();
|
||||||
|
else if (needsStackRealignment(MF))
|
||||||
BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
|
BasePtr = (FrameIndex < 0 ? FramePtr : StackPtr);
|
||||||
else if (AfterFPPop)
|
else if (AfterFPPop)
|
||||||
BasePtr = StackPtr;
|
BasePtr = StackPtr;
|
||||||
|
@ -50,6 +50,11 @@ private:
|
|||||||
///
|
///
|
||||||
unsigned FramePtr;
|
unsigned FramePtr;
|
||||||
|
|
||||||
|
/// BasePtr - X86 physical register used as a base ptr in complex stack
|
||||||
|
/// frames. I.e., when we need a 3rd base, not just SP and FP, due to
|
||||||
|
/// variable size stack objects.
|
||||||
|
unsigned BasePtr;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
|
X86RegisterInfo(X86TargetMachine &tm, const TargetInstrInfo &tii);
|
||||||
|
|
||||||
@ -106,6 +111,8 @@ public:
|
|||||||
/// register scavenger to determine what registers are free.
|
/// register scavenger to determine what registers are free.
|
||||||
BitVector getReservedRegs(const MachineFunction &MF) const;
|
BitVector getReservedRegs(const MachineFunction &MF) const;
|
||||||
|
|
||||||
|
bool hasBasePointer(const MachineFunction &MF) const;
|
||||||
|
|
||||||
bool canRealignStack(const MachineFunction &MF) const;
|
bool canRealignStack(const MachineFunction &MF) const;
|
||||||
|
|
||||||
bool needsStackRealignment(const MachineFunction &MF) const;
|
bool needsStackRealignment(const MachineFunction &MF) const;
|
||||||
@ -123,6 +130,7 @@ public:
|
|||||||
// Debug information queries.
|
// Debug information queries.
|
||||||
unsigned getFrameRegister(const MachineFunction &MF) const;
|
unsigned getFrameRegister(const MachineFunction &MF) const;
|
||||||
unsigned getStackRegister() const { return StackPtr; }
|
unsigned getStackRegister() const { return StackPtr; }
|
||||||
|
unsigned getBaseRegister() const { return BasePtr; }
|
||||||
// FIXME: Move to FrameInfok
|
// FIXME: Move to FrameInfok
|
||||||
unsigned getSlotSize() const { return SlotSize; }
|
unsigned getSlotSize() const { return SlotSize; }
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | grep and | count 1
|
; RUN: llc < %s -march=x86 -mtriple=i686-apple-darwin | FileCheck %s
|
||||||
|
|
||||||
declare void @bar(<2 x i64>* %n)
|
declare void @bar(<2 x i64>* %n)
|
||||||
|
|
||||||
@ -6,10 +6,15 @@ define void @foo(i32 %h) {
|
|||||||
%p = alloca <2 x i64>, i32 %h
|
%p = alloca <2 x i64>, i32 %h
|
||||||
call void @bar(<2 x i64>* %p)
|
call void @bar(<2 x i64>* %p)
|
||||||
ret void
|
ret void
|
||||||
|
; CHECK: foo
|
||||||
|
; CHECK-NOT: andl $-32, %eax
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @foo2(i32 %h) {
|
define void @foo2(i32 %h) {
|
||||||
%p = alloca <2 x i64>, i32 %h, align 32
|
%p = alloca <2 x i64>, i32 %h, align 32
|
||||||
call void @bar(<2 x i64>* %p)
|
call void @bar(<2 x i64>* %p)
|
||||||
ret void
|
ret void
|
||||||
|
; CHECK: foo2
|
||||||
|
; CHECK: andl $-32, %esp
|
||||||
|
; CHECK: andl $-32, %eax
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | grep and | count 1
|
; RUN: llc < %s -march=x86-64 -mtriple=i686-pc-linux | FileCheck %s
|
||||||
|
|
||||||
declare void @bar(<2 x i64>* %n)
|
declare void @bar(<2 x i64>* %n)
|
||||||
|
|
||||||
@ -6,10 +6,15 @@ define void @foo(i64 %h) {
|
|||||||
%p = alloca <2 x i64>, i64 %h
|
%p = alloca <2 x i64>, i64 %h
|
||||||
call void @bar(<2 x i64>* %p)
|
call void @bar(<2 x i64>* %p)
|
||||||
ret void
|
ret void
|
||||||
|
; CHECK: foo
|
||||||
|
; CHECK-NOT: andq $-32, %rax
|
||||||
}
|
}
|
||||||
|
|
||||||
define void @foo2(i64 %h) {
|
define void @foo2(i64 %h) {
|
||||||
%p = alloca <2 x i64>, i64 %h, align 32
|
%p = alloca <2 x i64>, i64 %h, align 32
|
||||||
call void @bar(<2 x i64>* %p)
|
call void @bar(<2 x i64>* %p)
|
||||||
ret void
|
ret void
|
||||||
|
; CHECK: foo2
|
||||||
|
; CHECK: andq $-32, %rsp
|
||||||
|
; CHECK: andq $-32, %rax
|
||||||
}
|
}
|
||||||
|
158
test/CodeGen/X86/dynamic-allocas-VLAs.ll
Normal file
158
test/CodeGen/X86/dynamic-allocas-VLAs.ll
Normal file
@ -0,0 +1,158 @@
|
|||||||
|
; RUN: llc < %s -march=x86-64 -mattr=+avx -mtriple=i686-apple-darwin10 | FileCheck %s
|
||||||
|
; rdar://11496434
|
||||||
|
|
||||||
|
; no VLAs or dynamic alignment
|
||||||
|
define i32 @t1() nounwind uwtable ssp {
|
||||||
|
entry:
|
||||||
|
%a = alloca i32, align 4
|
||||||
|
call void @t1_helper(i32* %a) nounwind
|
||||||
|
%0 = load i32* %a, align 4
|
||||||
|
%add = add nsw i32 %0, 13
|
||||||
|
ret i32 %add
|
||||||
|
|
||||||
|
; CHECK: _t1
|
||||||
|
; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
|
||||||
|
; CHECK: leaq [[OFFSET:[0-9]*]](%rsp), %rdi
|
||||||
|
; CHECK: callq _t1_helper
|
||||||
|
; CHECK: movl [[OFFSET]](%rsp), %eax
|
||||||
|
; CHECK: addl $13, %eax
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @t1_helper(i32*)
|
||||||
|
|
||||||
|
; dynamic realignment
|
||||||
|
define i32 @t2() nounwind uwtable ssp {
|
||||||
|
entry:
|
||||||
|
%a = alloca i32, align 4
|
||||||
|
%v = alloca <8 x float>, align 32
|
||||||
|
call void @t2_helper(i32* %a, <8 x float>* %v) nounwind
|
||||||
|
%0 = load i32* %a, align 4
|
||||||
|
%add = add nsw i32 %0, 13
|
||||||
|
ret i32 %add
|
||||||
|
|
||||||
|
; CHECK: _t2
|
||||||
|
; CHECK: pushq %rbp
|
||||||
|
; CHECK: movq %rsp, %rbp
|
||||||
|
; CHECK: andq $-32, %rsp
|
||||||
|
; CHECK: subq ${{[0-9]+}}, %rsp
|
||||||
|
;
|
||||||
|
; CHECK: leaq {{[0-9]*}}(%rsp), %rdi
|
||||||
|
; CHECK: leaq {{[0-9]*}}(%rsp), %rsi
|
||||||
|
; CHECK: callq _t2_helper
|
||||||
|
;
|
||||||
|
; CHECK: movq %rbp, %rsp
|
||||||
|
; CHECK: popq %rbp
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @t2_helper(i32*, <8 x float>*)
|
||||||
|
|
||||||
|
; VLAs
|
||||||
|
define i32 @t3(i64 %sz) nounwind uwtable ssp {
|
||||||
|
entry:
|
||||||
|
%a = alloca i32, align 4
|
||||||
|
%vla = alloca i32, i64 %sz, align 16
|
||||||
|
call void @t3_helper(i32* %a, i32* %vla) nounwind
|
||||||
|
%0 = load i32* %a, align 4
|
||||||
|
%add = add nsw i32 %0, 13
|
||||||
|
ret i32 %add
|
||||||
|
|
||||||
|
; CHECK: _t3
|
||||||
|
; CHECK: pushq %rbp
|
||||||
|
; CHECK: movq %rsp, %rbp
|
||||||
|
; CHECK: pushq %rbx
|
||||||
|
; CHECK-NOT: andq $-{{[0-9]+}}, %rsp
|
||||||
|
; CHECK: subq ${{[0-9]+}}, %rsp
|
||||||
|
;
|
||||||
|
; CHECK: leaq -{{[0-9]+}}(%rbp), %rsp
|
||||||
|
; CHECK: popq %rbx
|
||||||
|
; CHECK: popq %rbp
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @t3_helper(i32*, i32*)
|
||||||
|
|
||||||
|
; VLAs + Dynamic realignment
|
||||||
|
define i32 @t4(i64 %sz) nounwind uwtable ssp {
|
||||||
|
entry:
|
||||||
|
%a = alloca i32, align 4
|
||||||
|
%v = alloca <8 x float>, align 32
|
||||||
|
%vla = alloca i32, i64 %sz, align 16
|
||||||
|
call void @t4_helper(i32* %a, i32* %vla, <8 x float>* %v) nounwind
|
||||||
|
%0 = load i32* %a, align 4
|
||||||
|
%add = add nsw i32 %0, 13
|
||||||
|
ret i32 %add
|
||||||
|
|
||||||
|
; CHECK: _t4
|
||||||
|
; CHECK: pushq %rbp
|
||||||
|
; CHECK: movq %rsp, %rbp
|
||||||
|
; CHECK: andq $-32, %rsp
|
||||||
|
; CHECK: pushq %r14
|
||||||
|
; CHECK: pushq %rbx
|
||||||
|
; CHECK: subq $[[STACKADJ:[0-9]+]], %rsp
|
||||||
|
; CHECK: movq %rsp, %rbx
|
||||||
|
;
|
||||||
|
; CHECK: leaq {{[0-9]*}}(%rbx), %rdi
|
||||||
|
; CHECK: leaq {{[0-9]*}}(%rbx), %rdx
|
||||||
|
; CHECK: callq _t4_helper
|
||||||
|
;
|
||||||
|
; CHECK: addq $[[STACKADJ]], %rsp
|
||||||
|
; CHECK: popq %rbx
|
||||||
|
; CHECK: popq %r14
|
||||||
|
; CHECK: movq %rbp, %rsp
|
||||||
|
; CHECK: popq %rbp
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @t4_helper(i32*, i32*, <8 x float>*)
|
||||||
|
|
||||||
|
; Dynamic realignment + Spill
|
||||||
|
define i32 @t5(float* nocapture %f) nounwind uwtable ssp {
|
||||||
|
entry:
|
||||||
|
%a = alloca i32, align 4
|
||||||
|
%0 = bitcast float* %f to <8 x float>*
|
||||||
|
%1 = load <8 x float>* %0, align 32
|
||||||
|
call void @t5_helper1(i32* %a) nounwind
|
||||||
|
call void @t5_helper2(<8 x float> %1) nounwind
|
||||||
|
%2 = load i32* %a, align 4
|
||||||
|
%add = add nsw i32 %2, 13
|
||||||
|
ret i32 %add
|
||||||
|
|
||||||
|
; CHECK: _t5
|
||||||
|
; CHECK: pushq %rbp
|
||||||
|
; CHECK: movq %rsp, %rbp
|
||||||
|
; CHECK: andq $-32, %rsp
|
||||||
|
; CHECK: subq ${{[0-9]+}}, %rsp
|
||||||
|
;
|
||||||
|
; CHECK: vmovaps (%rdi), [[AVXREG:%ymm[0-9]+]]
|
||||||
|
; CHECK: vmovaps [[AVXREG]], (%rsp)
|
||||||
|
; CHECK: leaq {{[0-9]+}}(%rsp), %rdi
|
||||||
|
; CHECK: callq _t5_helper1
|
||||||
|
; CHECK: vmovaps (%rsp), %ymm0
|
||||||
|
; CHECK: callq _t5_helper2
|
||||||
|
; CHECK: movl {{[0-9]+}}(%rsp), %eax
|
||||||
|
;
|
||||||
|
; CHECK: movq %rbp, %rsp
|
||||||
|
; CHECK: popq %rbp
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @t5_helper1(i32*)
|
||||||
|
|
||||||
|
declare void @t5_helper2(<8 x float>)
|
||||||
|
|
||||||
|
; VLAs + Dynamic realignment + Spill
|
||||||
|
; FIXME: RA has already reserved RBX, so we can't do dynamic realignment.
|
||||||
|
define i32 @t6(i64 %sz, float* nocapture %f) nounwind uwtable ssp {
|
||||||
|
entry:
|
||||||
|
; CHECK: _t6
|
||||||
|
%a = alloca i32, align 4
|
||||||
|
%0 = bitcast float* %f to <8 x float>*
|
||||||
|
%1 = load <8 x float>* %0, align 32
|
||||||
|
%vla = alloca i32, i64 %sz, align 16
|
||||||
|
call void @t6_helper1(i32* %a, i32* %vla) nounwind
|
||||||
|
call void @t6_helper2(<8 x float> %1) nounwind
|
||||||
|
%2 = load i32* %a, align 4
|
||||||
|
%add = add nsw i32 %2, 13
|
||||||
|
ret i32 %add
|
||||||
|
}
|
||||||
|
|
||||||
|
declare void @t6_helper1(i32*, i32*)
|
||||||
|
|
||||||
|
declare void @t6_helper2(<8 x float>)
|
Loading…
Reference in New Issue
Block a user