Detect need for autoalignment of the stack earlier to catch spills more

conservatively. eliminateFrameIndex() machinery adjust to handle addr mode
6 (vld1/vst1) used for spills. Fix tests to expect aligned Q-reg spilling

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@88874 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jim Grosbach 2009-11-15 21:45:34 +00:00
parent 6cb6788b79
commit a44321776e
6 changed files with 57 additions and 6 deletions

View File

@ -109,6 +109,7 @@ FunctionPass *createNEONPreAllocPass();
FunctionPass *createNEONMoveFixPass(); FunctionPass *createNEONMoveFixPass();
FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2ITBlockPass();
FunctionPass *createThumb2SizeReductionPass(); FunctionPass *createThumb2SizeReductionPass();
FunctionPass *createARMMaxStackAlignmentCalculatorPass();
extern Target TheARMTarget, TheThumbTarget; extern Target TheARMTarget, TheThumbTarget;

View File

@ -1132,6 +1132,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
break; break;
} }
case ARMII::AddrMode4: case ARMII::AddrMode4:
case ARMII::AddrMode6:
// Can't fold any offset even if it's zero. // Can't fold any offset even if it's zero.
return false; return false;
case ARMII::AddrMode5: { case ARMII::AddrMode5: {

View File

@ -1170,7 +1170,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// as much as possible above, handle the rest, providing a register that is // as much as possible above, handle the rest, providing a register that is
// SP+LargeImm. // SP+LargeImm.
assert((Offset || assert((Offset ||
(MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4) && (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
(MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) &&
"This code isn't needed if offset already handled!"); "This code isn't needed if offset already handled!");
unsigned ScratchReg = 0; unsigned ScratchReg = 0;
@ -1179,7 +1180,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
if (Offset == 0) if (Offset == 0)
// Must be addrmode4. // Must be addrmode4/6.
MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
else { else {
ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass); ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
@ -1462,4 +1463,46 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
} }
namespace {
struct MSAC : public MachineFunctionPass {
static char ID;
MSAC() : MachineFunctionPass(&ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF) {
MachineFrameInfo *FFI = MF.getFrameInfo();
MachineRegisterInfo &RI = MF.getRegInfo();
// Calculate max stack alignment of all already allocated stack objects.
unsigned MaxAlign = calculateMaxStackAlignment(FFI);
// Be over-conservative: scan over all vreg defs and find, whether vector
// registers are used. If yes - there is probability, that vector register
// will be spilled and thus stack needs to be aligned properly.
for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
RegNum < RI.getLastVirtReg(); ++RegNum)
MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
if (FFI->getMaxAlignment() == MaxAlign)
return false;
FFI->setMaxAlignment(MaxAlign);
return true;
}
virtual const char *getPassName() const {
return "ARM Maximal Stack Alignment Calculator";
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
char MSAC::ID = 0;
}
FunctionPass*
llvm::createARMMaxStackAlignmentCalculatorPass() { return new MSAC(); }
#include "ARMGenRegisterInfo.inc" #include "ARMGenRegisterInfo.inc"

View File

@ -93,6 +93,10 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
if (Subtarget.hasNEON()) if (Subtarget.hasNEON())
PM.add(createNEONPreAllocPass()); PM.add(createNEONPreAllocPass());
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
PM.add(createARMMaxStackAlignmentCalculatorPass());
// FIXME: temporarily disabling load / store optimization pass for Thumb1. // FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass(true)); PM.add(createARMLoadStoreOptimizationPass(true));

View File

@ -11,8 +11,9 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
define arm_apcscc void @aaa(%quuz* %this, i8* %block) { define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
; CHECK: aaa: ; CHECK: aaa:
; CHECK: vstmia sp ; CHECK: bic sp, sp, #15
; CHECK: vldmia sp ; CHECK: vst1.64 {{.*}}sp @128
; CHECK: vld1.64 {{.*}}sp @128
entry: entry:
%0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
store float 6.300000e+01, float* undef, align 4 store float 6.300000e+01, float* undef, align 4

View File

@ -11,8 +11,9 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly
define arm_apcscc void @aaa(%quuz* %this, i8* %block) { define arm_apcscc void @aaa(%quuz* %this, i8* %block) {
; CHECK: aaa: ; CHECK: aaa:
; CHECK: vstmia sp ; CHECK: bic sp, sp, #15
; CHECK: vldmia sp ; CHECK: vst1.64 {{.*}}sp @128
; CHECK: vld1.64 {{.*}}sp @128
entry: entry:
%0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1]
store float 6.300000e+01, float* undef, align 4 store float 6.300000e+01, float* undef, align 4