Start of revamping the register scavenging in PEI. ARM Thumb1 is the driving

interest for this, as it currently reserves a register rather than using
the scavenger for matierializing constants as needed.

Instead of scavenging registers on the fly while eliminating frame indices,
new virtual registers are created, and then a scavenged collectively in a
post-pass over the function. This isolates the bits that need to interact
with the scavenger, and sets the stage for more intelligent use, and reuse,
of scavenged registers.

For the time being, this is disabled by default. Once the bugs are worked out,
the current scavenging calls in replaceFrameIndices() will be removed and
the post-pass scavenging will be the default. Until then,
-enable-frame-index-scavenging enables the new code. Currently, only the
Thumb1 back end is set up to use it.




git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82734 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jim Grosbach
2009-09-24 23:52:18 +00:00
parent 2bbcd33feb
commit 3d6cb88a64
4 changed files with 151 additions and 59 deletions

View File

@ -31,7 +31,9 @@
#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetFrameInfo.h" #include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h" #include "llvm/Support/Compiler.h"
#include "llvm/ADT/IndexedMap.h"
#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLExtras.h"
#include <climits> #include <climits>
@ -42,6 +44,16 @@ char PEI::ID = 0;
static RegisterPass<PEI> static RegisterPass<PEI>
X("prologepilog", "Prologue/Epilogue Insertion"); X("prologepilog", "Prologue/Epilogue Insertion");
// FIXME: For now, the frame index scavenging is off by default and only
// used by the Thumb1 target. When it's the default and replaces the current
// on-the-fly PEI scavenging for all targets, requiresRegisterScavenging()
// will replace this.
cl::opt<bool>
FrameIndexVirtualScavenging("enable-frame-index-scavenging",
cl::Hidden,
cl::desc("Enable frame index elimination with"
"virtual register scavenging"));
/// createPrologEpilogCodeInserter - This function returns a pass that inserts /// createPrologEpilogCodeInserter - This function returns a pass that inserts
/// prolog and epilog code, and eliminates abstract frame references. /// prolog and epilog code, and eliminates abstract frame references.
/// ///
@ -104,6 +116,12 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) {
// //
replaceFrameIndices(Fn); replaceFrameIndices(Fn);
// If register scavenging is needed, as we've enabled doing it as a
// post-pass, scavenge the virtual registers that frame index elimiation
// inserted.
if (TRI->requiresRegisterScavenging(Fn) && FrameIndexVirtualScavenging)
scavengeFrameVirtualRegs(Fn);
delete RS; delete RS;
clearAllSets(); clearAllSets();
return true; return true;
@ -634,7 +652,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
for (MachineFunction::iterator BB = Fn.begin(), for (MachineFunction::iterator BB = Fn.begin(),
E = Fn.end(); BB != E; ++BB) { E = Fn.end(); BB != E; ++BB) {
int SPAdj = 0; // SP offset due to call frame setup / destroy. int SPAdj = 0; // SP offset due to call frame setup / destroy.
if (RS) RS->enterBasicBlock(BB); if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB);
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) {
@ -680,7 +698,8 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
// use that target machine register info object to eliminate // use that target machine register info object to eliminate
// it. // it.
TRI.eliminateFrameIndex(MI, SPAdj, RS); TRI.eliminateFrameIndex(MI, SPAdj, FrameIndexVirtualScavenging ?
NULL : RS);
// Reset the iterator if we were at the beginning of the BB. // Reset the iterator if we were at the beginning of the BB.
if (AtBeginning) { if (AtBeginning) {
@ -695,10 +714,50 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
if (DoIncr && I != BB->end()) ++I; if (DoIncr && I != BB->end()) ++I;
// Update register states. // Update register states.
if (RS && MI) RS->forward(MI); if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI);
} }
assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?"); assert(SPAdj == 0 && "Unbalanced call frame setup / destroy pairs?");
} }
} }
void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) {
const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo();
// Run through the instructions and find any virtual registers.
for (MachineFunction::iterator BB = Fn.begin(),
E = Fn.end(); BB != E; ++BB) {
RS->enterBasicBlock(BB);
// Keep a map of which scratch reg we use for each virtual reg.
// FIXME: Is a map like this the best solution? Seems like overkill,
// but to get rid of it would need some fairly strong assumptions
// that may not be valid as this gets smarter about reuse and such.
IndexedMap<unsigned, VirtReg2IndexFunctor> ScratchRegForVirtReg;
ScratchRegForVirtReg.grow(Fn.getRegInfo().getLastVirtReg());
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
MachineInstr *MI = I;
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i)
if (MI->getOperand(i).isReg()) {
unsigned Reg = MI->getOperand(i).getReg();
if (Reg && TRI->isVirtualRegister(Reg)) {
// If we already have a scratch for this virtual register, use it
unsigned NewReg = ScratchRegForVirtReg[Reg];
if (!NewReg) {
const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg);
NewReg = RS->FindUnusedReg(RC);
if (NewReg == 0)
// No register is "free". Scavenge a register.
// FIXME: Track SPAdj. Zero won't always be right
NewReg = RS->scavengeRegister(RC, I, 0);
assert (NewReg && "unable to scavenge register!");
ScratchRegForVirtReg[Reg] = NewReg;
}
MI->getOperand(i).setReg(NewReg);
}
}
RS->forward(MI);
}
}
}

View File

@ -123,6 +123,7 @@ namespace llvm {
void insertCSRSpillsAndRestores(MachineFunction &Fn); void insertCSRSpillsAndRestores(MachineFunction &Fn);
void calculateFrameObjectOffsets(MachineFunction &Fn); void calculateFrameObjectOffsets(MachineFunction &Fn);
void replaceFrameIndices(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn);
void scavengeFrameVirtualRegs(MachineFunction &Fn);
void insertPrologEpilogCode(MachineFunction &Fn); void insertPrologEpilogCode(MachineFunction &Fn);
// Initialize DFA sets, called before iterations. // Initialize DFA sets, called before iterations.

View File

@ -649,10 +649,12 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Estimate if we might need to scavenge a register at some point in order // Estimate if we might need to scavenge a register at some point in order
// to materialize a stack offset. If so, either spill one additional // to materialize a stack offset. If so, either spill one additional
// callee-saved register or reserve a special spill slot to facilitate // callee-saved register or reserve a special spill slot to facilitate
// register scavenging. // register scavenging. Thumb1 needs a spill slot for stack pointer
if (RS && !ExtraCSSpill && !AFI->isThumb1OnlyFunction()) { // adjustments also, even when the frame itself is small.
if (RS && !ExtraCSSpill) {
MachineFrameInfo *MFI = MF.getFrameInfo(); MachineFrameInfo *MFI = MF.getFrameInfo();
if (estimateStackSize(MF, MFI) >= estimateRSStackSizeLimit(MF)) { if (estimateStackSize(MF, MFI) >= estimateRSStackSizeLimit(MF)
|| AFI->isThumb1OnlyFunction()) {
// If any non-reserved CS register isn't spilled, just spill one or two // If any non-reserved CS register isn't spilled, just spill one or two
// extra. That should take care of it! // extra. That should take care of it!
unsigned NumExtras = TargetAlign / 4; unsigned NumExtras = TargetAlign / 4;

View File

@ -37,10 +37,10 @@
#include "llvm/Support/raw_ostream.h" #include "llvm/Support/raw_ostream.h"
using namespace llvm; using namespace llvm;
static cl::opt<bool> // FIXME: This cmd line option conditionalizes the new register scavenging
ThumbRegScavenging("enable-thumb-reg-scavenging", // implemenation in PEI. Remove the option when scavenging works well enough
cl::Hidden, // to be the default.
cl::desc("Enable register scavenging on Thumb")); extern cl::opt<bool> FrameIndexVirtualScavenging;
Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii, Thumb1RegisterInfo::Thumb1RegisterInfo(const ARMBaseInstrInfo &tii,
const ARMSubtarget &sti) const ARMSubtarget &sti)
@ -84,7 +84,7 @@ Thumb1RegisterInfo::getPhysicalRegisterRegClass(unsigned Reg, EVT VT) const {
bool bool
Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { Thumb1RegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const {
return ThumbRegScavenging; return FrameIndexVirtualScavenging;
} }
bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const { bool Thumb1RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
@ -113,6 +113,7 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
const TargetInstrInfo &TII, const TargetInstrInfo &TII,
const Thumb1RegisterInfo& MRI, const Thumb1RegisterInfo& MRI,
DebugLoc dl) { DebugLoc dl) {
MachineFunction &MF = *MBB.getParent();
bool isHigh = !isARMLowRegister(DestReg) || bool isHigh = !isARMLowRegister(DestReg) ||
(BaseReg != 0 && !isARMLowRegister(BaseReg)); (BaseReg != 0 && !isARMLowRegister(BaseReg));
bool isSub = false; bool isSub = false;
@ -127,9 +128,13 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
unsigned LdReg = DestReg; unsigned LdReg = DestReg;
if (DestReg == ARM::SP) { if (DestReg == ARM::SP) {
assert(BaseReg == ARM::SP && "Unexpected!"); assert(BaseReg == ARM::SP && "Unexpected!");
LdReg = ARM::R3; if (FrameIndexVirtualScavenging) {
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) LdReg = MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
.addReg(ARM::R3, RegState::Kill); } else {
LdReg = ARM::R3;
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12)
.addReg(ARM::R3, RegState::Kill);
}
} }
if (NumBytes <= 255 && NumBytes >= 0) if (NumBytes <= 255 && NumBytes >= 0)
@ -155,7 +160,7 @@ void emitThumbRegPlusImmInReg(MachineBasicBlock &MBB,
MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill); MIB.addReg(LdReg).addReg(BaseReg, RegState::Kill);
AddDefaultPred(MIB); AddDefaultPred(MIB);
if (DestReg == ARM::SP) if (!FrameIndexVirtualScavenging && DestReg == ARM::SP)
BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3)
.addReg(ARM::R12, RegState::Kill); .addReg(ARM::R12, RegState::Kill);
} }
@ -602,50 +607,73 @@ void Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
else // tLDR has an extra register operand. else // tLDR has an extra register operand.
MI.addOperand(MachineOperand::CreateReg(0, false)); MI.addOperand(MachineOperand::CreateReg(0, false));
} else if (Desc.mayStore()) { } else if (Desc.mayStore()) {
// FIXME! This is horrific!!! We need register scavenging. if (FrameIndexVirtualScavenging) {
// Our temporary workaround has marked r3 unavailable. Of course, r3 is unsigned TmpReg =
// also a ABI register so it's possible that is is the register that is MF.getRegInfo().createVirtualRegister(ARM::tGPRRegisterClass);
// being storing here. If that's the case, we do the following: bool UseRR = false;
// r12 = r2 if (Opcode == ARM::tSpill) {
// Use r2 to materialize sp + offset if (FrameReg == ARM::SP)
// str r3, r2 emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
// r2 = r12 Offset, false, TII, *this, dl);
unsigned ValReg = MI.getOperand(0).getReg(); else {
unsigned TmpReg = ARM::R3; emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
bool UseRR = false; UseRR = true;
if (ValReg == ARM::R3) { }
BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) } else
.addReg(ARM::R2, RegState::Kill); emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
TmpReg = ARM::R2; *this, dl);
} MI.setDesc(TII.get(ARM::tSTR));
if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12) if (UseRR) // Use [reg, reg] addrmode.
.addReg(ARM::R3, RegState::Kill); MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
if (Opcode == ARM::tSpill) { else // tSTR has an extra register operand.
if (FrameReg == ARM::SP) MI.addOperand(MachineOperand::CreateReg(0, false));
emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg, } else {
Offset, false, TII, *this, dl); // FIXME! This is horrific!!! We need register scavenging.
else { // Our temporary workaround has marked r3 unavailable. Of course, r3 is
emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset); // also a ABI register so it's possible that is is the register that is
UseRR = true; // being storing here. If that's the case, we do the following:
// r12 = r2
// Use r2 to materialize sp + offset
// str r3, r2
// r2 = r12
unsigned ValReg = MI.getOperand(0).getReg();
unsigned TmpReg = ARM::R3;
bool UseRR = false;
if (ValReg == ARM::R3) {
BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12)
.addReg(ARM::R2, RegState::Kill);
TmpReg = ARM::R2;
} }
} else if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII, BuildMI(MBB, II, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R12)
*this, dl); .addReg(ARM::R3, RegState::Kill);
MI.setDesc(TII.get(ARM::tSTR)); if (Opcode == ARM::tSpill) {
MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true); if (FrameReg == ARM::SP)
if (UseRR) // Use [reg, reg] addrmode. emitThumbRegPlusImmInReg(MBB, II, TmpReg, FrameReg,
MI.addOperand(MachineOperand::CreateReg(FrameReg, false)); Offset, false, TII, *this, dl);
else // tSTR has an extra register operand. else {
MI.addOperand(MachineOperand::CreateReg(0, false)); emitLoadConstPool(MBB, II, dl, TmpReg, 0, Offset);
UseRR = true;
}
} else
emitThumbRegPlusImmediate(MBB, II, TmpReg, FrameReg, Offset, TII,
*this, dl);
MI.setDesc(TII.get(ARM::tSTR));
MI.getOperand(i).ChangeToRegister(TmpReg, false, false, true);
if (UseRR) // Use [reg, reg] addrmode.
MI.addOperand(MachineOperand::CreateReg(FrameReg, false));
else // tSTR has an extra register operand.
MI.addOperand(MachineOperand::CreateReg(0, false));
MachineBasicBlock::iterator NII = next(II); MachineBasicBlock::iterator NII = next(II);
if (ValReg == ARM::R3) if (ValReg == ARM::R3)
BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R2) BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R2)
.addReg(ARM::R12, RegState::Kill); .addReg(ARM::R12, RegState::Kill);
if (TmpReg == ARM::R3 && AFI->isR3LiveIn()) if (TmpReg == ARM::R3 && AFI->isR3LiveIn())
BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3) BuildMI(MBB, NII, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R3)
.addReg(ARM::R12, RegState::Kill); .addReg(ARM::R12, RegState::Kill);
}
} else } else
assert(false && "Unexpected opcode!"); assert(false && "Unexpected opcode!");
@ -834,11 +862,13 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
if (VARegSaveSize) { if (VARegSaveSize) {
// Epilogue for vararg functions: pop LR to R3 and branch off it. // Epilogue for vararg functions: pop LR to R3 and branch off it.
// FIXME: Verify this is still ok when R3 is no longer being reserved. // FIXME: Verify this is still ok when R3 is no longer being reserved.
AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP))).addReg(ARM::R3); AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)))
.addReg(ARM::R3, RegState::Define);
emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize); emitSPUpdate(MBB, MBBI, TII, dl, *this, VARegSaveSize);
BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg)).addReg(ARM::R3); BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET_vararg))
.addReg(ARM::R3, RegState::Kill);
MBB.erase(MBBI); MBB.erase(MBBI);
} }
} }