diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h index 21445adff97..ff1980d19df 100644 --- a/lib/Target/ARM/ARM.h +++ b/lib/Target/ARM/ARM.h @@ -109,6 +109,7 @@ FunctionPass *createNEONPreAllocPass(); FunctionPass *createNEONMoveFixPass(); FunctionPass *createThumb2ITBlockPass(); FunctionPass *createThumb2SizeReductionPass(); +FunctionPass *createARMMaxStackAlignmentCalculatorPass(); extern Target TheARMTarget, TheThumbTarget; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 19ffcf72eed..828041c64ec 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1132,6 +1132,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, break; } case ARMII::AddrMode4: + case ARMII::AddrMode6: // Can't fold any offset even if it's zero. return false; case ARMII::AddrMode5: { diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 9b4368f47dc..c3b1000eb2a 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -1170,7 +1170,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, // as much as possible above, handle the rest, providing a register that is // SP+LargeImm. assert((Offset || - (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4) && + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 || + (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) && "This code isn't needed if offset already handled!"); unsigned ScratchReg = 0; @@ -1179,7 +1180,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, ? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm(); unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg(); if (Offset == 0) - // Must be addrmode4. + // Must be addrmode4/6. MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false); else { ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass); @@ -1462,4 +1463,46 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); } +namespace { + struct MSAC : public MachineFunctionPass { + static char ID; + MSAC() : MachineFunctionPass(&ID) {} + + virtual bool runOnMachineFunction(MachineFunction &MF) { + MachineFrameInfo *FFI = MF.getFrameInfo(); + MachineRegisterInfo &RI = MF.getRegInfo(); + + // Calculate max stack alignment of all already allocated stack objects. + unsigned MaxAlign = calculateMaxStackAlignment(FFI); + + // Be over-conservative: scan over all vreg defs and find, whether vector + // registers are used. If yes - there is probability, that vector register + // will be spilled and thus stack needs to be aligned properly. + for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister; + RegNum < RI.getLastVirtReg(); ++RegNum) + MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment()); + + if (FFI->getMaxAlignment() == MaxAlign) + return false; + + FFI->setMaxAlignment(MaxAlign); + return true; + } + + virtual const char *getPassName() const { + return "ARM Maximal Stack Alignment Calculator"; + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + }; + + char MSAC::ID = 0; +} + +FunctionPass* +llvm::createARMMaxStackAlignmentCalculatorPass() { return new MSAC(); } + #include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp index 4d20a5c1a03..2564ed92547 100644 --- a/lib/Target/ARM/ARMTargetMachine.cpp +++ b/lib/Target/ARM/ARMTargetMachine.cpp @@ -93,6 +93,10 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM, if (Subtarget.hasNEON()) PM.add(createNEONPreAllocPass()); + // Calculate and set max stack object alignment early, so we can decide + // whether we will need stack realignment (and thus FP). + PM.add(createARMMaxStackAlignmentCalculatorPass()); + // FIXME: temporarily disabling load / store optimization pass for Thumb1. if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only()) PM.add(createARMLoadStoreOptimizationPass(true)); diff --git a/test/CodeGen/ARM/spill-q.ll b/test/CodeGen/ARM/spill-q.ll index f4b27a7603e..6b44210af74 100644 --- a/test/CodeGen/ARM/spill-q.ll +++ b/test/CodeGen/ARM/spill-q.ll @@ -11,8 +11,9 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly define arm_apcscc void @aaa(%quuz* %this, i8* %block) { ; CHECK: aaa: -; CHECK: vstmia sp -; CHECK: vldmia sp +; CHECK: bic sp, sp, #15 +; CHECK: vst1.64 {{.*}}sp @128 +; CHECK: vld1.64 {{.*}}sp @128 entry: %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] store float 6.300000e+01, float* undef, align 4 diff --git a/test/CodeGen/Thumb2/thumb2-spill-q.ll b/test/CodeGen/Thumb2/thumb2-spill-q.ll index 0a7221c6174..6c6a7e19af0 100644 --- a/test/CodeGen/Thumb2/thumb2-spill-q.ll +++ b/test/CodeGen/Thumb2/thumb2-spill-q.ll @@ -11,8 +11,9 @@ declare <4 x float> @llvm.arm.neon.vld1.v4f32(i8*) nounwind readonly define arm_apcscc void @aaa(%quuz* %this, i8* %block) { ; CHECK: aaa: -; CHECK: vstmia sp -; CHECK: vldmia sp +; CHECK: bic sp, sp, #15 +; CHECK: vst1.64 {{.*}}sp @128 +; CHECK: vld1.64 {{.*}}sp @128 entry: %0 = call <4 x float> @llvm.arm.neon.vld1.v4f32(i8* undef) nounwind ; <<4 x float>> [#uses=1] store float 6.300000e+01, float* undef, align 4