[AArch64] Add support for dynamic stack alignment

Differential Revision: http://reviews.llvm.org/D8876



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@234471 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Kristof Beyls 2015-04-09 08:49:47 +00:00
parent 64008ef318
commit 2a6ad5bfb2
5 changed files with 663 additions and 43 deletions

View File

@ -9,6 +9,82 @@
//
// This file contains the AArch64 implementation of TargetFrameLowering class.
//
// On AArch64, stack frames are structured as follows:
//
// The stack grows downward.
//
// All of the individual frame areas on the frame below are optional, i.e. it's
// possible to create a function so that the particular area isn't present
// in the frame.
//
// At function entry, the "frame" looks as follows:
//
// | | Higher address
// |-----------------------------------|
// | |
// | arguments passed on the stack |
// | |
// |-----------------------------------| <- sp
// | | Lower address
//
//
// After the prologue has run, the frame has the following general structure.
// Note that this doesn't depict the case where a red-zone is used. Also,
// technically the last frame area (VLAs) doesn't get created until in the
// main function body, after the prologue is run. However, it's depicted here
// for completeness.
//
// | | Higher address
// |-----------------------------------|
// | |
// | arguments passed on the stack |
// | |
// |-----------------------------------|
// | |
// | prev_fp, prev_lr |
// | (a.k.a. "frame record") |
// |-----------------------------------| <- fp(=x29)
// | |
// | other callee-saved registers |
// | |
// |-----------------------------------|
// |.empty.space.to.make.part.below....|
// |.aligned.in.case.it.needs.more.than| (size of this area is unknown at
// |.the.standard.16-byte.alignment....| compile time; if present)
// |-----------------------------------|
// | |
// | local variables of fixed size |
// | including spill slots |
// |-----------------------------------| <- bp(not defined by ABI,
// |.variable-sized.local.variables....| LLVM chooses X19)
// |.(VLAs)............................| (size of this area is unknown at
// |...................................| compile time)
// |-----------------------------------| <- sp
// | | Lower address
//
//
// To access the data in a frame, at-compile time, a constant offset must be
// computable from one of the pointers (fp, bp, sp) to access it. The size
// of the areas with a dotted background cannot be computed at compile-time
// if they are present, making it required to have all three of fp, bp and
// sp to be set up to be able to access all contents in the frame areas,
// assuming all of the frame areas are non-empty.
//
// For most functions, some of the frame areas are empty. For those functions,
// it may not be necessary to set up fp or bp:
// * A base pointer is definitly needed when there are both VLAs and local
// variables with more-than-default alignment requirements.
// * A frame pointer is definitly needed when there are local variables with
// more-than-default alignment requirements.
//
// In some cases when a base pointer is not strictly needed, it is generated
// anyway when offsets from the frame pointer to access local variables become
// so large that the offset can't be encoded in the immediate fields of loads
// or stores.
//
// FIXME: also explain the redzone concept.
// FIXME: also explain the concept of reserved call frames.
//
//===----------------------------------------------------------------------===//
#include "AArch64FrameLowering.h"
@ -39,26 +115,6 @@ static cl::opt<bool> EnableRedZone("aarch64-redzone",
STATISTIC(NumRedZoneFunctions, "Number of functions using red zone");
static unsigned estimateStackSize(MachineFunction &MF) {
const MachineFrameInfo *FFI = MF.getFrameInfo();
int Offset = 0;
for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
int FixedOff = -FFI->getObjectOffset(i);
if (FixedOff > Offset)
Offset = FixedOff;
}
for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) {
if (FFI->isDeadObjectIndex(i))
continue;
Offset += FFI->getObjectSize(i);
unsigned Align = FFI->getObjectAlignment(i);
// Adjust to alignment boundary
Offset = (Offset + Align - 1) / Align * Align;
}
// This does not include the 16 bytes used for fp and lr.
return (unsigned)Offset;
}
bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
if (!EnableRedZone)
return false;
@ -83,16 +139,10 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
/// pointer register.
bool AArch64FrameLowering::hasFP(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
#ifndef NDEBUG
const TargetRegisterInfo *RegInfo = MF.getSubtarget().getRegisterInfo();
assert(!RegInfo->needsStackRealignment(MF) &&
"No stack realignment on AArch64!");
#endif
return (MFI->hasCalls() || MFI->hasVarSizedObjects() ||
MFI->isFrameAddressTaken() || MFI->hasStackMap() ||
MFI->hasPatchPoint());
MFI->hasPatchPoint() || RegInfo->needsStackRealignment(MF));
}
/// hasReservedCallFrame - Under normal circumstances, when a frame pointer is
@ -288,11 +338,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
AFI->setLocalStackSize(NumBytes);
// Allocate space for the rest of the frame.
if (NumBytes) {
// If we're a leaf function, try using the red zone.
if (!canUseRedZone(MF))
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP, -NumBytes, TII,
MachineInstr::FrameSetup);
const unsigned Alignment = MFI->getMaxAlignment();
const bool NeedsRealignment = (Alignment > 16);
unsigned scratchSPReg = AArch64::SP;
if (NeedsRealignment) {
// Use the first callee-saved register as a scratch register
assert(MF.getRegInfo().isPhysRegUsed(AArch64::X9) &&
"No scratch register to align SP!");
scratchSPReg = AArch64::X9;
}
// If we're a leaf function, try using the red zone.
if (NumBytes && !canUseRedZone(MF))
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
// the correct value here, as NumBytes also includes padding bytes,
// which shouldn't be counted here.
emitFrameOffset(MBB, MBBI, DL, scratchSPReg, AArch64::SP, -NumBytes, TII,
MachineInstr::FrameSetup);
assert(!(NeedsRealignment && NumBytes==0) &&
"NumBytes should never be 0 when realignment is needed");
if (NumBytes && NeedsRealignment) {
const unsigned NrBitsToZero = countTrailingZeros(Alignment);
assert(NrBitsToZero > 1);
assert(scratchSPReg != AArch64::SP);
// SUB X9, SP, NumBytes
// -- X9 is temporary register, so shouldn't contain any live data here,
// -- free to use. This is already produced by emitFrameOffset above.
// AND SP, X9, 0b11111...0000
// The logical immediates have a non-trivial encoding. The following
// formula computes the encoded immediate with all ones but
// NrBitsToZero zero bits as least significant bits.
uint32_t andMaskEncoded =
(1 <<12) // = N
| ((64-NrBitsToZero) << 6) // immr
| ((64-NrBitsToZero-1) << 0) // imms
;
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ANDXri), AArch64::SP)
.addReg(scratchSPReg, RegState::Kill)
.addImm(andMaskEncoded);
}
// If we need a base pointer, set it up here. It's whatever the value of the
@ -302,15 +389,15 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const {
// FIXME: Clarify FrameSetup flags here.
// Note: Use emitFrameOffset() like above for FP if the FrameSetup flag is
// needed.
//
if (RegInfo->hasBasePointer(MF))
TII->copyPhysReg(MBB, MBBI, DL, AArch64::X19, AArch64::SP, false);
if (RegInfo->hasBasePointer(MF)) {
TII->copyPhysReg(MBB, MBBI, DL, RegInfo->getBaseRegister(), AArch64::SP,
false);
}
if (needsFrameMoves) {
const DataLayout *TD = MF.getTarget().getDataLayout();
const int StackGrowth = -TD->getPointerSize(0);
unsigned FramePtr = RegInfo->getFrameRegister(MF);
// An example of the prologue:
//
// .globl __foo
@ -460,7 +547,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
if (MF.getFunction()->getCallingConv() == CallingConv::GHC)
return;
// Initial and residual are named for consitency with the prologue. Note that
// Initial and residual are named for consistency with the prologue. Note that
// in the epilogue, the residual adjustment is executed first.
uint64_t ArgumentPopSize = 0;
if (RetOpcode == AArch64::TCRETURNdi || RetOpcode == AArch64::TCRETURNri) {
@ -571,9 +658,9 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
bool isFixed = MFI->isFixedObjectIndex(FI);
// Use frame pointer to reference fixed objects. Use it for locals if
// there are VLAs (and thus the SP isn't reliable as a base).
// Make sure useFPForScavengingIndex() does the right thing for the emergency
// spill slot.
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
// reliable as a base). Make sure useFPForScavengingIndex() does the
// right thing for the emergency spill slot.
bool UseFP = false;
if (AFI->hasStackFrame()) {
// Note: Keeping the following as multiple 'if' statements rather than
@ -582,7 +669,8 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
// Argument access should always use the FP.
if (isFixed) {
UseFP = hasFP(MF);
} else if (hasFP(MF) && !RegInfo->hasBasePointer(MF)) {
} else if (hasFP(MF) && !RegInfo->hasBasePointer(MF) &&
!RegInfo->needsStackRealignment(MF)) {
// Use SP or FP, whichever gives us the best chance of the offset
// being in range for direct access. If the FPOffset is positive,
// that'll always be best, as the SP will be even further away.
@ -598,6 +686,10 @@ int AArch64FrameLowering::resolveFrameIndexReference(const MachineFunction &MF,
}
}
assert((isFixed || !RegInfo->needsStackRealignment(MF) || !UseFP) &&
"In the presence of dynamic stack pointer realignment, "
"non-argument objects cannot be accessed through the frame pointer");
if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
return FPOffset;
@ -794,6 +886,9 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
if (RegInfo->hasBasePointer(MF))
MRI->setPhysRegUsed(RegInfo->getBaseRegister());
if (RegInfo->needsStackRealignment(MF) && !RegInfo->hasBasePointer(MF))
MRI->setPhysRegUsed(AArch64::X9);
// If any callee-saved registers are used, the frame cannot be eliminated.
unsigned NumGPRSpilled = 0;
unsigned NumFPRSpilled = 0;
@ -867,7 +962,8 @@ void AArch64FrameLowering::processFunctionBeforeCalleeSavedScan(
// The CSR spill slots have not been allocated yet, so estimateStackSize
// won't include them.
MachineFrameInfo *MFI = MF.getFrameInfo();
unsigned CFSize = estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
unsigned CFSize =
MFI->estimateStackSize(MF) + 8 * (NumGPRSpilled + NumFPRSpilled);
DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
bool BigStack = (CFSize >= 256);
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))

View File

@ -22,7 +22,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
public:
explicit AArch64FrameLowering()
: TargetFrameLowering(StackGrowsDown, 16, 0, 16,
false /*StackRealignable*/) {}
true /*StackRealignable*/) {}
void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,

View File

@ -165,7 +165,12 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
// large enough that referencing from the FP won't result in things being
// in range relatively often, we can use a base pointer to allow access
// from the other direction like the SP normally works.
// Furthermore, if both variable sized objects are present, and the
// stack needs to be dynamically re-aligned, the base pointer is the only
// reliable way to reference the locals.
if (MFI->hasVarSizedObjects()) {
if (needsStackRealignment(MF))
return true;
// Conservatively estimate whether the negative offset from the frame
// pointer will be sufficient to reach. If a function has a smallish
// frame, it's less likely to have lots of spills and callee saved
@ -181,6 +186,31 @@ bool AArch64RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
return false;
}
bool AArch64RegisterInfo::canRealignStack(const MachineFunction &MF) const {
if (MF.getFunction()->hasFnAttribute("no-realign-stack"))
return false;
return true;
}
// FIXME: share this with other backends with identical implementation?
bool
AArch64RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
const Function *F = MF.getFunction();
unsigned StackAlign = MF.getTarget()
.getSubtargetImpl(*MF.getFunction())
->getFrameLowering()
->getStackAlignment();
bool requiresRealignment =
((MFI->getMaxAlignment() > StackAlign) ||
F->getAttributes().hasAttribute(AttributeSet::FunctionIndex,
Attribute::StackAlignment));
return requiresRealignment && canRealignStack(MF);
}
unsigned
AArch64RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
const TargetFrameLowering *TFI = MF.getSubtarget().getFrameLowering();

View File

@ -93,6 +93,9 @@ public:
unsigned getRegPressureLimit(const TargetRegisterClass *RC,
MachineFunction &MF) const override;
// Base pointer (stack realignment) support.
bool canRealignStack(const MachineFunction &MF) const;
bool needsStackRealignment(const MachineFunction &MF) const override;
};
} // end namespace llvm

View File

@ -0,0 +1,491 @@
; RUN: llc -verify-machineinstrs -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s
; This test aims to check basic correctness of frame layout &
; frame access code. There are 8 functions in this test file,
; each function implements one element in the cartesian product
; of:
; . a function having a VLA/noVLA
; . a function with dynamic stack realignment/no dynamic stack realignment.
; . a function needing a frame pionter/no frame pointer,
; since the presence/absence of these has influence on the frame
; layout and which pointer to use to access various part of the
; frame (bp,sp,fp).
;
; Furthermore: in every test function:
; . there is always one integer and 1 floating point argument to be able
; to check those are accessed correctly.
; . there is always one local variable to check that is accessed
; correctly
;
; The LLVM-IR below was produced by clang on the following C++ code:
;extern "C" int g();
;extern "C" int novla_nodynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
;{
; // use an argument passed on the stack.
; volatile int l1;
; return i10 + (int)d10 + l1 + g();
;}
;extern "C" int novla_nodynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
;{
; // use an argument passed on the stack.
; volatile int l1;
; return i10 + (int)d10 + l1;
;}
;extern "C" int novla_dynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
;{
; // use an argument passed on the stack.
; alignas(128) volatile int l1;
; return i10 + (int)d10 + l1 + g();
;}
;extern "C" int novla_dynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
;{
; // use an argument passed on the stack.
; alignas(128) volatile int l1;
; return i10 + (int)d10 + l1;
;}
;
;extern "C" int vla_nodynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
;{
; // use an argument passed on the stack.
; volatile int l1;
; volatile int vla[i1];
; return i10 + (int)d10 + l1 + g() + vla[0];
;}
;extern "C" int vla_nodynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
;{
; // use an argument passed on the stack.
; volatile int l1;
; volatile int vla[i1];
; return i10 + (int)d10 + l1 + vla[0];
;}
;extern "C" int vla_dynamicrealign_call(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
;{
; // use an argument passed on the stack.
; alignas(128) volatile int l1;
; volatile int vla[i1];
; return i10 + (int)d10 + l1 + g() + vla[0];
;}
;extern "C" int vla_dynamicrealign_nocall(int i1, int i2, int i3, int i4, int i5, int i6, int i7, int i8, int i9, int i10,
; double d1, double d2, double d3, double d4, double d5, double d6, double d7, double d8, double d9, double d10)
;{
; // use an argument passed on the stack.
; alignas(128) volatile int l1;
; volatile int vla[i1];
; return i10 + (int)d10 + l1 + vla[0];
;}
define i32 @novla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
entry:
%l1 = alloca i32, align 4
%conv = fptosi double %d10 to i32
%add = add nsw i32 %conv, %i10
%l1.0.l1.0. = load volatile i32, i32* %l1, align 4
%add1 = add nsw i32 %add, %l1.0.l1.0.
%call = tail call i32 @g()
%add2 = add nsw i32 %add1, %call
ret i32 %add2
}
; CHECK-LABEL: novla_nodynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
; CHECK: stp x20, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
; Check correctness of cfi pseudo-instructions
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
; CHECK: .cfi_offset w19, -24
; CHECK: .cfi_offset w20, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; Check correct access to local variable on the stack, through stack pointer
; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12]
; Check epilogue:
; CHECK: ldp x29, x30, [sp, #16]
; CHECK: ldp x20, x19, [sp], #32
; CHECK: ret
; CHECK: .cfi_endproc
declare i32 @g() #0
; Function Attrs: nounwind
define i32 @novla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
entry:
%l1 = alloca i32, align 4
%conv = fptosi double %d10 to i32
%add = add nsw i32 %conv, %i10
%l1.0.l1.0. = load volatile i32, i32* %l1, align 4
%add1 = add nsw i32 %add, %l1.0.l1.0.
ret i32 %add1
}
; CHECK-LABEL: novla_nodynamicrealign_nocall
; Check that space is reserved for one local variable on the stack.
; CHECK: sub sp, sp, #16 // =16
; Check correct access to arguments passed on the stack, through stack pointer
; CHECK: ldr d[[DARG:[0-9]+]], [sp, #40]
; CHECK: ldr w[[IARG:[0-9]+]], [sp, #24]
; Check correct access to local variable on the stack, through stack pointer
; CHECK: ldr w[[ILOC:[0-9]+]], [sp, #12]
; Check epilogue:
; CHECK: add sp, sp, #16 // =16
; CHECK: ret
define i32 @novla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
entry:
%l1 = alloca i32, align 128
%conv = fptosi double %d10 to i32
%add = add nsw i32 %conv, %i10
%l1.0.l1.0. = load volatile i32, i32* %l1, align 128
%add1 = add nsw i32 %add, %l1.0.l1.0.
%call = tail call i32 @g()
%add2 = add nsw i32 %add1, %call
ret i32 %add2
}
; CHECK-LABEL: novla_dynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
; CHECK: stp x20, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
; Check the dynamic realignment of the stack pointer to a 128-byte boundary
; CHECK: sub x9, sp, #96
; CHECK: and sp, x9, #0xffffffffffffff80
; Check correctness of cfi pseudo-instructions
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
; CHECK: .cfi_offset w19, -24
; CHECK: .cfi_offset w20, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; Check correct access to local variable on the stack, through re-aligned stack pointer
; CHECK: ldr w[[ILOC:[0-9]+]], [sp]
; Check epilogue:
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16 // =16
; CHECK: ldp x29, x30, [sp, #16]
; CHECK: ldp x20, x19, [sp], #32
; CHECK: ret
; CHECK: .cfi_endproc
; Function Attrs: nounwind
define i32 @novla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
entry:
%l1 = alloca i32, align 128
%conv = fptosi double %d10 to i32
%add = add nsw i32 %conv, %i10
%l1.0.l1.0. = load volatile i32, i32* %l1, align 128
%add1 = add nsw i32 %add, %l1.0.l1.0.
ret i32 %add1
}
; CHECK-LABEL: novla_dynamicrealign_nocall
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #-16]!
; CHECK: mov x29, sp
; Check the dynamic realignment of the stack pointer to a 128-byte boundary
; CHECK: sub x9, sp, #112
; CHECK: and sp, x9, #0xffffffffffffff80
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; Check correct access to local variable on the stack, through re-aligned stack pointer
; CHECK: ldr w[[ILOC:[0-9]+]], [sp]
; Check epilogue:
; Check that stack pointer get restored from frame pointer.
; CHECK: mov sp, x29
; CHECK: ldp x29, x30, [sp], #16
; CHECK: ret
define i32 @vla_nodynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
entry:
%l1 = alloca i32, align 4
%0 = zext i32 %i1 to i64
%vla = alloca i32, i64 %0, align 4
%conv = fptosi double %d10 to i32
%add = add nsw i32 %conv, %i10
%l1.0.l1.0. = load volatile i32, i32* %l1, align 4
%add1 = add nsw i32 %add, %l1.0.l1.0.
%call = tail call i32 @g()
%add2 = add nsw i32 %add1, %call
%1 = load volatile i32, i32* %vla, align 4, !tbaa !1
%add3 = add nsw i32 %add2, %1
ret i32 %add3
}
; CHECK-LABEL: vla_nodynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
; CHECK: stp x20, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
; Check that space is reserved on the stack for the local variable,
; rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned.
; CHECK: sub sp, sp, #16
; Check correctness of cfi pseudo-instructions
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
; CHECK: .cfi_offset w19, -24
; CHECK: .cfi_offset w20, -32
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
; CHECK: ubfx x9, x0, #0, #32
; CHECK: lsl x9, x9, #2
; CHECK: add x9, x9, #15
; CHECK: and x9, x9, #0xfffffffffffffff0
; CHECK: mov x10, sp
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
; CHECK: mov sp, x[[VLASPTMP]]
; Check correct access to local variable, through frame pointer
; CHECK: ldur w[[ILOC:[0-9]+]], [x29, #-20]
; Check correct accessing of the VLA variable through the base pointer
; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
; Check epilogue:
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16 // =16
; CHECK: ldp x29, x30, [sp, #16]
; CHECK: ldp x20, x19, [sp], #32
; CHECK: ret
; CHECK: .cfi_endproc
; Function Attrs: nounwind
define i32 @vla_nodynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
entry:
%l1 = alloca i32, align 4
%0 = zext i32 %i1 to i64
%vla = alloca i32, i64 %0, align 4
%conv = fptosi double %d10 to i32
%add = add nsw i32 %conv, %i10
%l1.0.l1.0. = load volatile i32, i32* %l1, align 4
%add1 = add nsw i32 %add, %l1.0.l1.0.
%1 = load volatile i32, i32* %vla, align 4, !tbaa !1
%add2 = add nsw i32 %add1, %1
ret i32 %add2
}
; CHECK-LABEL: vla_nodynamicrealign_nocall
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #-16]!
; CHECK: mov x29, sp
; Check that space is reserved on the stack for the local variable,
; rounded up to a multiple of 16 to keep the stack pointer 16-byte aligned.
; CHECK: sub sp, sp, #16
; Check correctness of cfi pseudo-instructions
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
; CHECK: ubfx x9, x0, #0, #32
; CHECK: lsl x9, x9, #2
; CHECK: add x9, x9, #15
; CHECK: and x9, x9, #0xfffffffffffffff0
; CHECK: mov x10, sp
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
; CHECK: mov sp, x[[VLASPTMP]]
; Check correct access to local variable, through frame pointer
; CHECK: ldur w[[ILOC:[0-9]+]], [x29, #-4]
; Check correct accessing of the VLA variable through the base pointer
; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
; Check epilogue:
; Check that stack pointer get restored from frame pointer.
; CHECK: mov sp, x29
; CHECK: ldp x29, x30, [sp], #16
; CHECK: ret
define i32 @vla_dynamicrealign_call(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #0 {
entry:
%l1 = alloca i32, align 128
%0 = zext i32 %i1 to i64
%vla = alloca i32, i64 %0, align 4
%conv = fptosi double %d10 to i32
%add = add nsw i32 %conv, %i10
%l1.0.l1.0. = load volatile i32, i32* %l1, align 128
%add1 = add nsw i32 %add, %l1.0.l1.0.
%call = tail call i32 @g()
%add2 = add nsw i32 %add1, %call
%1 = load volatile i32, i32* %vla, align 4, !tbaa !1
%add3 = add nsw i32 %add2, %1
ret i32 %add3
}
; CHECK-LABEL: vla_dynamicrealign_call
; CHECK: .cfi_startproc
; Check that used callee-saved registers are saved
; CHECK: stp x22, x21, [sp, #-48]!
; CHECK: stp x20, x19, [sp, #16]
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #32]
; CHECK: add x29, sp, #32
; Check that the stack pointer gets re-aligned to 128
; bytes & the base pointer (x19) gets initialized to
; this 128-byte aligned area for local variables &
; spill slots
; CHECK: sub x9, sp, #80 // =80
; CHECK: and sp, x9, #0xffffffffffffff80
; CHECK: mov x19, sp
; Check correctness of cfi pseudo-instructions
; CHECK: .cfi_def_cfa w29, 16
; CHECK: .cfi_offset w30, -8
; CHECK: .cfi_offset w29, -16
; CHECK: .cfi_offset w19, -24
; CHECK: .cfi_offset w20, -32
; CHECK: .cfi_offset w21, -40
; CHECK: .cfi_offset w22, -48
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
; and set-up of base pointer (x19).
; CHECK: ubfx x9, x0, #0, #32
; CHECK: lsl x9, x9, #2
; CHECK: add x9, x9, #15
; CHECK: and x9, x9, #0xfffffffffffffff0
; CHECK: mov x10, sp
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
; CHECK: mov sp, x[[VLASPTMP]]
; Check correct access to local variable, through base pointer
; CHECK: ldr w[[ILOC:[0-9]+]], [x19]
; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
; Check epilogue:
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #32
; CHECK: ldp x29, x30, [sp, #32]
; CHECK: ldp x20, x19, [sp, #16]
; CHECK: ldp x22, x21, [sp], #48
; CHECK: ret
; CHECK: .cfi_endproc
; Function Attrs: nounwind
define i32 @vla_dynamicrealign_nocall(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
entry:
%l1 = alloca i32, align 128
%0 = zext i32 %i1 to i64
%vla = alloca i32, i64 %0, align 4
%conv = fptosi double %d10 to i32
%add = add nsw i32 %conv, %i10
%l1.0.l1.0. = load volatile i32, i32* %l1, align 128
%add1 = add nsw i32 %add, %l1.0.l1.0.
%1 = load volatile i32, i32* %vla, align 4, !tbaa !1
%add2 = add nsw i32 %add1, %1
ret i32 %add2
}
; CHECK-LABEL: vla_dynamicrealign_nocall
; Check that used callee-saved registers are saved
; CHECK: stp x20, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
; Check that the stack pointer gets re-aligned to 128
; bytes & the base pointer (x19) gets initialized to
; this 128-byte aligned area for local variables &
; spill slots
; CHECK: sub x9, sp, #96
; CHECK: and sp, x9, #0xffffffffffffff80
; CHECK: mov x19, sp
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
; and set-up of base pointer (x19).
; CHECK: ubfx x9, x0, #0, #32
; CHECK: lsl x9, x9, #2
; CHECK: add x9, x9, #15
; CHECK: and x9, x9, #0xfffffffffffffff0
; CHECK: mov x10, sp
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
; CHECK: mov sp, x[[VLASPTMP]]
; Check correct access to local variable, through base pointer
; CHECK: ldr w[[ILOC:[0-9]+]], [x19]
; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
; Check epilogue:
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
; CHECK: ldp x20, x19, [sp], #32
; CHECK: ret
; Function Attrs: nounwind
define i32 @vla_dynamicrealign_nocall_large_align(i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, double %d1, double %d2, double %d3, double %d4, double %d5, double %d6, double %d7, double %d8, double %d9, double %d10) #1 {
entry:
%l1 = alloca i32, align 32768
%0 = zext i32 %i1 to i64
%vla = alloca i32, i64 %0, align 4
%conv = fptosi double %d10 to i32
%add = add nsw i32 %conv, %i10
%l1.0.l1.0. = load volatile i32, i32* %l1, align 32768
%add1 = add nsw i32 %add, %l1.0.l1.0.
%1 = load volatile i32, i32* %vla, align 4, !tbaa !1
%add2 = add nsw i32 %add1, %1
ret i32 %add2
}
; CHECK-LABEL: vla_dynamicrealign_nocall_large_align
; Check that used callee-saved registers are saved
; CHECK: stp x20, x19, [sp, #-32]!
; Check that the frame pointer is created:
; CHECK: stp x29, x30, [sp, #16]
; CHECK: add x29, sp, #16
; Check that the stack pointer gets re-aligned to 128
; bytes & the base pointer (x19) gets initialized to
; this 128-byte aligned area for local variables &
; spill slots
; CHECK: sub x9, sp, #7, lsl #12
; CHECK: and sp, x9, #0xffffffffffff8000
; CHECK: mov x19, sp
; Check correct access to arguments passed on the stack, through frame pointer
; CHECK: ldr w[[IARG:[0-9]+]], [x29, #24]
; CHECK: ldr d[[DARG:[0-9]+]], [x29, #40]
; Check correct reservation of 16-byte aligned VLA (size in w0) on stack
; and set-up of base pointer (x19).
; CHECK: ubfx x9, x0, #0, #32
; CHECK: lsl x9, x9, #2
; CHECK: add x9, x9, #15
; CHECK: and x9, x9, #0xfffffffffffffff0
; CHECK: mov x10, sp
; CHECK: sub x[[VLASPTMP:[0-9]+]], x10, x9
; CHECK: mov sp, x[[VLASPTMP]]
; Check correct access to local variable, through base pointer
; CHECK: ldr w[[ILOC:[0-9]+]], [x19]
; CHECK: ldr w[[VLA:[0-9]+]], [x[[VLASPTMP]]]
; Check epilogue:
; Check that stack pointer get restored from frame pointer.
; CHECK: sub sp, x29, #16
; CHECK: ldp x29, x30, [sp, #16]
; CHECK: ldp x20, x19, [sp], #32
; CHECK: ret
attributes #0 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
!1 = !{!2, !2, i64 0}
!2 = !{!"int", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}