Local spiller optimization:

Turn a store folding instruction into a load folding instruction. e.g.
     xorl  %edi, %eax
     movl  %eax, -32(%ebp)
     movl  -36(%ebp), %eax
     orl   %eax, -32(%ebp)
=>
     xorl  %edi, %eax
     orl   -36(%ebp), %eax
     mov   %eax, -32(%ebp)
This enables the unfolding optimization for a subsequent instruction which will
also eliminate the newly introduced store instruction.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43192 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2007-10-19 21:23:22 +00:00
parent 80629c85f1
commit 66f7163545
6 changed files with 370 additions and 182 deletions

View File

@ -563,6 +563,14 @@ public:
return 0;
}
/// getOpcodeAfterMemoryFold - Returns the opcode of the would be new
/// instruction after load / store is folded into an instruction of the
/// specified opcode. It returns zero if the specified unfolding is not
/// possible.
virtual unsigned getOpcodeAfterMemoryFold(unsigned Opc, unsigned OpNum) const{
return 0;
}
/// unfoldMemoryOperand - Separate a single instruction which folded a load or
/// a a store or a load and a store into two or more instruction. If this is
/// possible, returns true as well as the new instructions by reference.
@ -578,8 +586,9 @@ public:
}
/// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
/// instruction after load / store are unfolded from the specified opcode.
/// It returns zero if the specified unfolding is impossible.
/// instruction after load / store are unfolded from an instruction of the
/// specified opcode. It returns zero if the specified unfolding is not
/// possible.
virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore) const {
return 0;

View File

@ -242,6 +242,8 @@ bool SimpleSpiller::runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) {
//===----------------------------------------------------------------------===//
namespace {
class AvailableSpills;
/// LocalSpiller - This spiller does a simple pass over the machine basic
/// block to attempt to keep spills in registers as much as possible for
/// blocks that have low register pressure (the vreg may be spilled due to
@ -270,6 +272,12 @@ namespace {
return true;
}
private:
bool PrepForUnfoldOpti(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MII,
std::vector<MachineInstr*> &MaybeDeadStores,
AvailableSpills &Spills, BitVector &RegKills,
std::vector<MachineOperand*> &KillOps,
VirtRegMap &VRM);
void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM);
};
}
@ -357,7 +365,7 @@ public:
void disallowClobberPhysReg(unsigned PhysReg);
/// ClobberPhysReg - This is called when the specified physreg changes
/// value. We use this to invalidate any info about stuff we thing lives in
/// value. We use this to invalidate any info about stuff that lives in
/// it and any of its aliases.
void ClobberPhysReg(unsigned PhysReg);
@ -450,7 +458,7 @@ void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) {
/// marked kill, then invalidate the information.
static void InvalidateKills(MachineInstr &MI, BitVector &RegKills,
std::vector<MachineOperand*> &KillOps,
SmallVector<unsigned, 1> *KillRegs = NULL) {
SmallVector<unsigned, 2> *KillRegs = NULL) {
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isRegister() || !MO.isUse() || !MO.isKill())
@ -723,6 +731,112 @@ namespace {
};
}
/// PrepForUnfoldOpti - Turn a store folding instruction into a load folding
/// instruction. e.g.
/// xorl %edi, %eax
/// movl %eax, -32(%ebp)
/// movl -36(%ebp), %eax
/// orl %eax, -32(%ebp)
/// ==>
/// xorl %edi, %eax
/// orl -36(%ebp), %eax
/// mov %eax, -32(%ebp)
/// This enables unfolding optimization for a subsequent instruction which will
/// also eliminate the newly introduced store instruction.
bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MII,
std::vector<MachineInstr*> &MaybeDeadStores,
AvailableSpills &Spills,
BitVector &RegKills,
std::vector<MachineOperand*> &KillOps,
VirtRegMap &VRM) {
MachineFunction &MF = *MBB.getParent();
MachineInstr &MI = *MII;
unsigned UnfoldedOpc = 0;
unsigned UnfoldPR = 0;
unsigned UnfoldVR = 0;
int FoldedSS = VirtRegMap::NO_STACK_SLOT;
VirtRegMap::MI2VirtMapTy::const_iterator I, End;
for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
// Only transform a MI that folds a single register.
if (UnfoldedOpc)
return false;
UnfoldVR = I->second.first;
VirtRegMap::ModRef MR = I->second.second;
if (VRM.isAssignedReg(UnfoldVR))
continue;
// If this reference is not a use, any previous store is now dead.
// Otherwise, the store to this stack slot is not dead anymore.
FoldedSS = VRM.getStackSlot(UnfoldVR);
MachineInstr* DeadStore = MaybeDeadStores[FoldedSS];
if (DeadStore && (MR & VirtRegMap::isModRef)) {
unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS);
if (!PhysReg ||
DeadStore->findRegisterUseOperandIdx(PhysReg, true) == -1)
continue;
UnfoldPR = PhysReg;
UnfoldedOpc = MRI->getOpcodeAfterMemoryUnfold(MI.getOpcode(),
false, true);
}
}
if (!UnfoldedOpc)
return false;
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (!MO.isRegister() || MO.getReg() == 0 || !MO.isUse())
continue;
unsigned VirtReg = MO.getReg();
if (MRegisterInfo::isPhysicalRegister(VirtReg) ||
RegMap->isSubRegister(VirtReg))
continue;
if (VRM.isAssignedReg(VirtReg)) {
unsigned PhysReg = VRM.getPhys(VirtReg);
if (PhysReg && MRI->regsOverlap(PhysReg, UnfoldPR))
return false;
} else if (VRM.isReMaterialized(VirtReg))
continue;
int SS = VRM.getStackSlot(VirtReg);
unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
if (PhysReg) {
if (MRI->regsOverlap(PhysReg, UnfoldPR))
return false;
continue;
}
PhysReg = VRM.getPhys(VirtReg);
if (!MRI->regsOverlap(PhysReg, UnfoldPR))
continue;
// Ok, we'll need to reload the value into a register which makes
// it impossible to perform the store unfolding optimization later.
// Let's see if it is possible to fold the load if the store is
// unfolded. This allows us to perform the store unfolding
// optimization.
SmallVector<MachineInstr*, 4> NewMIs;
if (MRI->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) {
assert(NewMIs.size() == 1);
MachineInstr *NewMI = NewMIs.back();
NewMIs.clear();
unsigned Idx = NewMI->findRegisterUseOperandIdx(VirtReg);
MachineInstr *FoldedMI = MRI->foldMemoryOperand(NewMI, Idx, SS);
if (FoldedMI) {
if (VRM.hasPhys(UnfoldVR))
assert(VRM.getPhys(UnfoldVR) == UnfoldPR);
else
VRM.assignVirt2Phys(UnfoldVR, UnfoldPR);
VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef);
MII = MBB.insert(MII, FoldedMI);
VRM.RemoveFromFoldedVirtMap(&MI);
MBB.erase(&MI);
return true;
}
delete NewMI;
}
}
return false;
}
/// rewriteMBB - Keep track of which spills are available even after the
/// register allocator is done with them. If possible, avoid reloading vregs.
@ -754,28 +868,21 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
MII != E; ) {
MachineInstr &MI = *MII;
MachineBasicBlock::iterator NextMII = MII; ++NextMII;
VirtRegMap::MI2VirtMapTy::const_iterator I, End;
VirtRegMap::MI2VirtMapTy::const_iterator I, End;
bool Erased = false;
bool BackTracked = false;
if (PrepForUnfoldOpti(MBB, MII,
MaybeDeadStores, Spills, RegKills, KillOps, VRM))
NextMII = next(MII);
/// ReusedOperands - Keep track of operand reuse in case we need to undo
/// reuse.
MachineInstr &MI = *MII;
ReuseInfo ReusedOperands(MI, MRI);
// Loop over all of the implicit defs, clearing them from our available
// sets.
const TargetInstrDescriptor *TID = MI.getInstrDescriptor();
if (TID->ImplicitDefs) {
const unsigned *ImpDef = TID->ImplicitDefs;
for ( ; *ImpDef; ++ImpDef) {
MF.setPhysRegUsed(*ImpDef);
ReusedOperands.markClobbered(*ImpDef);
Spills.ClobberPhysReg(*ImpDef);
}
}
// Process all of the spilled uses and all non spilled reg references.
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
@ -788,7 +895,6 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
// Ignore physregs for spilling, but remember that it is used by this
// function.
MF.setPhysRegUsed(VirtReg);
ReusedOperands.markClobbered(VirtReg);
continue;
}
@ -826,7 +932,7 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId);
if (!PhysReg && DoReMat) {
// This use is rematerializable. But perhaps the value is available in
// stack if the definition is not deleted. If so, check if we can
// a register if the definition is not deleted. If so, check if we can
// reuse the value.
ReuseSlot = VRM.getStackSlot(VirtReg);
if (ReuseSlot != VirtRegMap::NO_STACK_SLOT)
@ -857,7 +963,6 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
// aren't allowed to modify the reused register. If none of these cases
// apply, reuse it.
bool CanReuse = true;
int ti = TID->getOperandConstraint(i, TOI::TIED_TO);
if (ti != -1 &&
MI.getOperand(ti).isRegister() &&
@ -911,8 +1016,8 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
if (DeadStore) {
DOUT << "Removed dead store:\t" << *DeadStore;
InvalidateKills(*DeadStore, RegKills, KillOps);
MBB.erase(DeadStore);
VRM.RemoveFromFoldedVirtMap(DeadStore);
MBB.erase(DeadStore);
MaybeDeadStores[ReuseSlot] = NULL;
++NumDSE;
}
@ -977,7 +1082,7 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
DOUT << '\t' << *prior(MII);
++NumReused;
continue;
} // is (PhysReg)
} // if (PhysReg)
// Otherwise, reload it and remember that we have it.
PhysReg = VRM.getPhys(VirtReg);
@ -1023,12 +1128,11 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
// If we have folded references to memory operands, make sure we clear all
// physical registers that may contain the value of the spilled virtual
// register
SmallSet<int, 1> FoldedSS;
SmallSet<int, 2> FoldedSS;
for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) {
DOUT << "Folded vreg: " << I->second.first << " MR: "
<< I->second.second;
unsigned VirtReg = I->second.first;
VirtRegMap::ModRef MR = I->second.second;
DOUT << "Folded vreg: " << VirtReg << " MR: " << MR;
if (VRM.isAssignedReg(VirtReg)) {
DOUT << ": No stack slot!\n";
continue;
@ -1084,9 +1188,9 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
// Otherwise, the store to this stack slot is not dead anymore.
MachineInstr* DeadStore = MaybeDeadStores[SS];
if (DeadStore) {
bool isDead = true;
bool isDead = !(MR & VirtRegMap::isRef);
MachineInstr *NewStore = NULL;
if (MR & VirtRegMap::isRef) {
if (MR & VirtRegMap::isMod) {
unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS);
SmallVector<MachineInstr*, 4> NewMIs;
if (PhysReg &&
@ -1101,8 +1205,8 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
--NextMII;
--NextMII; // backtrack to the unfolded instruction.
BackTracked = true;
} else
isDead = false;
isDead = true;
}
}
if (isDead) { // Previous store is dead.
@ -1156,12 +1260,120 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
// Process all of the spilled defs.
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI.getOperand(i);
if (MO.isRegister() && MO.getReg() && MO.isDef()) {
unsigned VirtReg = MO.getReg();
if (!(MO.isRegister() && MO.getReg() && MO.isDef()))
continue;
if (!MRegisterInfo::isVirtualRegister(VirtReg)) {
// Check to see if this is a noop copy. If so, eliminate the
// instruction before considering the dest reg to be changed.
unsigned VirtReg = MO.getReg();
if (!MRegisterInfo::isVirtualRegister(VirtReg)) {
// Check to see if this is a noop copy. If so, eliminate the
// instruction before considering the dest reg to be changed.
unsigned Src, Dst;
if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) {
++NumDCE;
DOUT << "Removing now-noop copy: " << MI;
MBB.erase(&MI);
Erased = true;
VRM.RemoveFromFoldedVirtMap(&MI);
Spills.disallowClobberPhysReg(VirtReg);
goto ProcessNextInst;
}
// If it's not a no-op copy, it clobbers the value in the destreg.
Spills.ClobberPhysReg(VirtReg);
ReusedOperands.markClobbered(VirtReg);
// Check to see if this instruction is a load from a stack slot into
// a register. If so, this provides the stack slot value in the reg.
int FrameIdx;
if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
assert(DestReg == VirtReg && "Unknown load situation!");
// If it is a folded reference, then it's not safe to clobber.
bool Folded = FoldedSS.count(FrameIdx);
// Otherwise, if it wasn't available, remember that it is now!
Spills.addAvailable(FrameIdx, &MI, DestReg, !Folded);
goto ProcessNextInst;
}
continue;
}
bool DoReMat = VRM.isReMaterialized(VirtReg);
if (DoReMat)
ReMatDefs.insert(&MI);
// The only vregs left are stack slot definitions.
int StackSlot = VRM.getStackSlot(VirtReg);
const TargetRegisterClass *RC = RegMap->getRegClass(VirtReg);
// If this def is part of a two-address operand, make sure to execute
// the store from the correct physical register.
unsigned PhysReg;
int TiedOp = MI.getInstrDescriptor()->findTiedToSrcOperand(i);
if (TiedOp != -1)
PhysReg = MI.getOperand(TiedOp).getReg();
else {
PhysReg = VRM.getPhys(VirtReg);
if (ReusedOperands.isClobbered(PhysReg)) {
// Another def has taken the assigned physreg. It must have been a
// use&def which got it due to reuse. Undo the reuse!
PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI,
Spills, MaybeDeadStores, RegKills, KillOps, VRM);
}
}
MF.setPhysRegUsed(PhysReg);
ReusedOperands.markClobbered(PhysReg);
MI.getOperand(i).setReg(PhysReg);
if (!MO.isDead()) {
MRI->storeRegToStackSlot(MBB, next(MII), PhysReg, StackSlot, RC);
DOUT << "Store:\t" << *next(MII);
// If there is a dead store to this stack slot, nuke it now.
MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
if (LastStore) {
DOUT << "Removed dead store:\t" << *LastStore;
++NumDSE;
SmallVector<unsigned, 2> KillRegs;
InvalidateKills(*LastStore, RegKills, KillOps, &KillRegs);
MachineBasicBlock::iterator PrevMII = LastStore;
bool CheckDef = PrevMII != MBB.begin();
if (CheckDef)
--PrevMII;
MBB.erase(LastStore);
VRM.RemoveFromFoldedVirtMap(LastStore);
if (CheckDef) {
// Look at defs of killed registers on the store. Mark the defs
// as dead since the store has been deleted and they aren't
// being reused.
for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
bool HasOtherDef = false;
if (InvalidateRegDef(PrevMII, MI, KillRegs[j], HasOtherDef)) {
MachineInstr *DeadDef = PrevMII;
if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
// FIXME: This assumes a remat def does not have side
// effects.
MBB.erase(DeadDef);
VRM.RemoveFromFoldedVirtMap(DeadDef);
++NumDRM;
}
}
}
}
}
LastStore = next(MII);
// If the stack slot value was previously available in some other
// register, change it now. Otherwise, make the register available,
// in PhysReg.
Spills.ModifyStackSlotOrReMat(StackSlot);
Spills.ClobberPhysReg(PhysReg);
Spills.addAvailable(StackSlot, LastStore, PhysReg);
++NumStores;
// Check to see if this is a noop copy. If so, eliminate the
// instruction before considering the dest reg to be changed.
{
unsigned Src, Dst;
if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) {
++NumDCE;
@ -1169,119 +1381,11 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) {
MBB.erase(&MI);
Erased = true;
VRM.RemoveFromFoldedVirtMap(&MI);
Spills.disallowClobberPhysReg(VirtReg);
UpdateKills(*LastStore, RegKills, KillOps);
goto ProcessNextInst;
}
// If it's not a no-op copy, it clobbers the value in the destreg.
Spills.ClobberPhysReg(VirtReg);
ReusedOperands.markClobbered(VirtReg);
// Check to see if this instruction is a load from a stack slot into
// a register. If so, this provides the stack slot value in the reg.
int FrameIdx;
if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) {
assert(DestReg == VirtReg && "Unknown load situation!");
// If it is a folded reference, then it's not safe to clobber.
bool Folded = FoldedSS.count(FrameIdx);
// Otherwise, if it wasn't available, remember that it is now!
Spills.addAvailable(FrameIdx, &MI, DestReg, !Folded);
goto ProcessNextInst;
}
continue;
}
bool DoReMat = VRM.isReMaterialized(VirtReg);
if (DoReMat)
ReMatDefs.insert(&MI);
// The only vregs left are stack slot definitions.
int StackSlot = VRM.getStackSlot(VirtReg);
const TargetRegisterClass *RC = RegMap->getRegClass(VirtReg);
// If this def is part of a two-address operand, make sure to execute
// the store from the correct physical register.
unsigned PhysReg;
int TiedOp = MI.getInstrDescriptor()->findTiedToSrcOperand(i);
if (TiedOp != -1)
PhysReg = MI.getOperand(TiedOp).getReg();
else {
PhysReg = VRM.getPhys(VirtReg);
if (ReusedOperands.isClobbered(PhysReg)) {
// Another def has taken the assigned physreg. It must have been a
// use&def which got it due to reuse. Undo the reuse!
PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI,
Spills, MaybeDeadStores, RegKills, KillOps, VRM);
}
}
MF.setPhysRegUsed(PhysReg);
ReusedOperands.markClobbered(PhysReg);
MI.getOperand(i).setReg(PhysReg);
if (!MO.isDead()) {
MRI->storeRegToStackSlot(MBB, next(MII), PhysReg, StackSlot, RC);
DOUT << "Store:\t" << *next(MII);
// If there is a dead store to this stack slot, nuke it now.
MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
if (LastStore) {
DOUT << "Removed dead store:\t" << *LastStore;
++NumDSE;
SmallVector<unsigned, 1> KillRegs;
InvalidateKills(*LastStore, RegKills, KillOps, &KillRegs);
MachineBasicBlock::iterator PrevMII = LastStore;
bool CheckDef = PrevMII != MBB.begin();
if (CheckDef)
--PrevMII;
MBB.erase(LastStore);
VRM.RemoveFromFoldedVirtMap(LastStore);
if (CheckDef) {
// Look at defs of killed registers on the store. Mark the defs
// as dead since the store has been deleted and they aren't
// being reused.
for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
bool HasOtherDef = false;
if (InvalidateRegDef(PrevMII, MI, KillRegs[j], HasOtherDef)) {
MachineInstr *DeadDef = PrevMII;
if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
// FIXME: This assumes a remat def does not have side
// effects.
MBB.erase(DeadDef);
VRM.RemoveFromFoldedVirtMap(DeadDef);
++NumDRM;
}
}
}
}
}
LastStore = next(MII);
// If the stack slot value was previously available in some other
// register, change it now. Otherwise, make the register available,
// in PhysReg.
Spills.ModifyStackSlotOrReMat(StackSlot);
Spills.ClobberPhysReg(PhysReg);
Spills.addAvailable(StackSlot, LastStore, PhysReg);
++NumStores;
// Check to see if this is a noop copy. If so, eliminate the
// instruction before considering the dest reg to be changed.
{
unsigned Src, Dst;
if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) {
++NumDCE;
DOUT << "Removing now-noop copy: " << MI;
MBB.erase(&MI);
Erased = true;
VRM.RemoveFromFoldedVirtMap(&MI);
UpdateKills(*LastStore, RegKills, KillOps);
goto ProcessNextInst;
}
}
}
}
}
}
ProcessNextInst:
if (!Erased && !BackTracked)

View File

@ -1062,9 +1062,11 @@ let isTwoAddress = 0, CodeSize = 2 in {
def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst",
[(store (add (loadi8 addr:$dst), 1), addr:$dst)]>;
def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
[(store (add (loadi16 addr:$dst), 1), addr:$dst)]>, OpSize;
[(store (add (loadi16 addr:$dst), 1), addr:$dst)]>,
OpSize, Requires<[In32BitMode]>;
def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
[(store (add (loadi32 addr:$dst), 1), addr:$dst)]>;
[(store (add (loadi32 addr:$dst), 1), addr:$dst)]>,
Requires<[In32BitMode]>;
}
let CodeSize = 2 in
@ -1082,9 +1084,11 @@ let isTwoAddress = 0, CodeSize = 2 in {
def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst",
[(store (add (loadi8 addr:$dst), -1), addr:$dst)]>;
def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
[(store (add (loadi16 addr:$dst), -1), addr:$dst)]>, OpSize;
[(store (add (loadi16 addr:$dst), -1), addr:$dst)]>,
OpSize, Requires<[In32BitMode]>;
def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
[(store (add (loadi32 addr:$dst), -1), addr:$dst)]>;
[(store (add (loadi32 addr:$dst), -1), addr:$dst)]>,
Requires<[In32BitMode]>;
}
} // Defs = [EFLAGS]

View File

@ -461,6 +461,23 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst"
[(set GR32:$dst, (add GR32:$src, -1))]>,
Requires<[In64BitMode]>;
} // isConvertibleToThreeAddress
// These are duplicates of their 32-bit counterparts. Only needed so X86 knows
// how to unfold them.
let isTwoAddress = 0, CodeSize = 2 in {
def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst",
[(store (add (loadi16 addr:$dst), 1), addr:$dst)]>,
OpSize, Requires<[In64BitMode]>;
def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst",
[(store (add (loadi32 addr:$dst), 1), addr:$dst)]>,
Requires<[In64BitMode]>;
def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst",
[(store (add (loadi16 addr:$dst), -1), addr:$dst)]>,
OpSize, Requires<[In64BitMode]>;
def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst",
[(store (add (loadi32 addr:$dst), -1), addr:$dst)]>,
Requires<[In64BitMode]>;
}
} // Defs = [EFLAGS], CodeSize

View File

@ -97,14 +97,14 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm,
{ X86::AND8rr, X86::AND8mr },
{ X86::DEC16r, X86::DEC16m },
{ X86::DEC32r, X86::DEC32m },
{ X86::DEC64_16r, X86::DEC16m },
{ X86::DEC64_32r, X86::DEC32m },
{ X86::DEC64_16r, X86::DEC64_16m },
{ X86::DEC64_32r, X86::DEC64_32m },
{ X86::DEC64r, X86::DEC64m },
{ X86::DEC8r, X86::DEC8m },
{ X86::INC16r, X86::INC16m },
{ X86::INC32r, X86::INC32m },
{ X86::INC64_16r, X86::INC16m },
{ X86::INC64_32r, X86::INC32m },
{ X86::INC64_16r, X86::INC64_16m },
{ X86::INC64_32r, X86::INC64_32m },
{ X86::INC64r, X86::INC64m },
{ X86::INC8r, X86::INC8m },
{ X86::NEG16r, X86::NEG16m },
@ -981,10 +981,9 @@ void X86RegisterInfo::reMaterialize(MachineBasicBlock &MBB,
static MachineInstr *FuseTwoAddrInst(unsigned Opcode,
SmallVector<MachineOperand,4> &MOs,
MachineInstr *MI, const TargetInstrInfo &TII) {
unsigned NumOps = TII.getNumOperands(MI->getOpcode())-2;
// Create the base instruction with the memory operand as the first part.
MachineInstrBuilder MIB = BuildMI(TII.get(Opcode));
MachineInstr *NewMI = new MachineInstr(TII.get(Opcode), true);
MachineInstrBuilder MIB(NewMI);
unsigned NumAddrOps = MOs.size();
for (unsigned i = 0; i != NumAddrOps; ++i)
MIB = X86InstrAddOperand(MIB, MOs[i]);
@ -992,17 +991,23 @@ static MachineInstr *FuseTwoAddrInst(unsigned Opcode,
MIB.addImm(1).addReg(0).addImm(0);
// Loop over the rest of the ri operands, converting them over.
unsigned NumOps = TII.getNumOperands(MI->getOpcode())-2;
for (unsigned i = 0; i != NumOps; ++i) {
MachineOperand &MO = MI->getOperand(i+2);
MIB = X86InstrAddOperand(MIB, MO);
}
for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
MIB = X86InstrAddOperand(MIB, MO);
}
return MIB;
}
static MachineInstr *FuseInst(unsigned Opcode, unsigned OpNo,
SmallVector<MachineOperand,4> &MOs,
MachineInstr *MI, const TargetInstrInfo &TII) {
MachineInstrBuilder MIB = BuildMI(TII.get(Opcode));
MachineInstr *NewMI = new MachineInstr(TII.get(Opcode), true);
MachineInstrBuilder MIB(NewMI);
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
@ -1036,7 +1041,6 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode,
MachineInstr*
X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, unsigned i,
SmallVector<MachineOperand,4> &MOs) const {
// Table (and size) to search
const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
bool isTwoAddrFold = false;
unsigned NumOps = TII.getNumOperands(MI->getOpcode());
@ -1117,6 +1121,49 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, unsigned OpNu
return foldMemoryOperand(MI, OpNum, MOs);
}
unsigned X86RegisterInfo::getOpcodeAfterMemoryFold(unsigned Opc,
unsigned OpNum) const {
// Check switch flag
if (NoFusing) return 0;
const DenseMap<unsigned*, unsigned> *OpcodeTablePtr = NULL;
unsigned NumOps = TII.getNumOperands(Opc);
bool isTwoAddr = NumOps > 1 &&
TII.getOperandConstraint(Opc, 1, TOI::TIED_TO) != -1;
// Folding a memory location into the two-address part of a two-address
// instruction is different than folding it other places. It requires
// replacing the *two* registers with the memory location.
if (isTwoAddr && NumOps >= 2 && OpNum < 2) {
OpcodeTablePtr = &RegOp2MemOpTable2Addr;
} else if (OpNum == 0) { // If operand 0
switch (Opc) {
case X86::MOV16r0:
return X86::MOV16mi;
case X86::MOV32r0:
return X86::MOV32mi;
case X86::MOV64r0:
return X86::MOV64mi32;
case X86::MOV8r0:
return X86::MOV8mi;
default: break;
}
OpcodeTablePtr = &RegOp2MemOpTable0;
} else if (OpNum == 1) {
OpcodeTablePtr = &RegOp2MemOpTable1;
} else if (OpNum == 2) {
OpcodeTablePtr = &RegOp2MemOpTable2;
}
if (OpcodeTablePtr) {
// Find the Opcode to fuse
DenseMap<unsigned*, unsigned>::iterator I =
OpcodeTablePtr->find((unsigned*)Opc);
if (I != OpcodeTablePtr->end())
return I->second;
}
return 0;
}
bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
@ -1126,14 +1173,14 @@ bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
return false;
unsigned Opc = I->second.first;
unsigned Index = I->second.second & 0xf;
bool HasLoad = I->second.second & (1 << 4);
bool HasStore = I->second.second & (1 << 5);
if (UnfoldLoad && !HasLoad)
bool FoldedLoad = I->second.second & (1 << 4);
bool FoldedStore = I->second.second & (1 << 5);
if (UnfoldLoad && !FoldedLoad)
return false;
HasLoad &= UnfoldLoad;
if (UnfoldStore && !HasStore)
UnfoldLoad &= FoldedLoad;
if (UnfoldStore && !FoldedStore)
return false;
HasStore &= UnfoldStore;
UnfoldStore &= FoldedStore;
const TargetInstrDescriptor &TID = TII.get(Opc);
const TargetOperandInfo &TOI = TID.OpInfo[Index];
@ -1156,9 +1203,9 @@ bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
}
// Emit the load instruction.
if (HasLoad) {
if (UnfoldLoad) {
loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs);
if (HasStore) {
if (UnfoldStore) {
// Address operands cannot be marked isKill.
for (unsigned i = 1; i != 5; ++i) {
MachineOperand &MO = NewMIs[0]->getOperand(i);
@ -1169,15 +1216,11 @@ bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
}
// Emit the data processing instruction.
MachineInstr *DataMI = new MachineInstr (TID, true);
MachineInstr *DataMI = new MachineInstr(TID, true);
MachineInstrBuilder MIB(DataMI);
const TargetRegisterClass *DstRC = 0;
if (HasStore) {
const TargetOperandInfo &DstTOI = TID.OpInfo[0];
DstRC = (DstTOI.Flags & M_LOOK_UP_PTR_REG_CLASS)
? TII.getPointerRegClass() : getRegClass(DstTOI.RegClass);
if (FoldedStore)
MIB.addReg(Reg, true);
}
for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i)
MIB = X86InstrAddOperand(MIB, BeforeOps[i]);
MIB.addReg(Reg);
@ -1190,8 +1233,12 @@ bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
NewMIs.push_back(MIB);
// Emit the store instruction.
if (HasStore)
if (UnfoldStore) {
const TargetOperandInfo &DstTOI = TID.OpInfo[0];
const TargetRegisterClass *DstRC = (DstTOI.Flags & M_LOOK_UP_PTR_REG_CLASS)
? TII.getPointerRegClass() : getRegClass(DstTOI.RegClass);
storeRegToAddr(MF, Reg, AddrOps, DstRC, NewMIs);
}
return true;
}
@ -1209,8 +1256,8 @@ X86RegisterInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
return false;
unsigned Opc = I->second.first;
unsigned Index = I->second.second & 0xf;
bool HasLoad = I->second.second & (1 << 4);
bool HasStore = I->second.second & (1 << 5);
bool FoldedLoad = I->second.second & (1 << 4);
bool FoldedStore = I->second.second & (1 << 5);
const TargetInstrDescriptor &TID = TII.get(Opc);
const TargetOperandInfo &TOI = TID.OpInfo[Index];
const TargetRegisterClass *RC = (TOI.Flags & M_LOOK_UP_PTR_REG_CLASS)
@ -1233,7 +1280,7 @@ X86RegisterInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
// Emit the load instruction.
SDNode *Load = 0;
if (HasLoad) {
if (FoldedLoad) {
MVT::ValueType VT = *RC->vt_begin();
Load = DAG.getTargetNode(getLoadRegOpcode(RC), VT, MVT::Other,
&AddrOps[0], AddrOps.size());
@ -1261,7 +1308,7 @@ X86RegisterInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
NewNodes.push_back(NewNode);
// Emit the store instruction.
if (HasStore) {
if (FoldedStore) {
AddrOps.pop_back();
AddrOps.push_back(SDOperand(NewNode, 0));
AddrOps.push_back(Chain);
@ -1279,11 +1326,11 @@ unsigned X86RegisterInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
MemOp2RegOpTable.find((unsigned*)Opc);
if (I == MemOp2RegOpTable.end())
return 0;
bool HasLoad = I->second.second & (1 << 4);
bool HasStore = I->second.second & (1 << 5);
if (UnfoldLoad && !HasLoad)
bool FoldedLoad = I->second.second & (1 << 4);
bool FoldedStore = I->second.second & (1 << 5);
if (UnfoldLoad && !FoldedLoad)
return 0;
if (UnfoldStore && !HasStore)
if (UnfoldStore && !FoldedStore)
return 0;
return I->second.first;
}

View File

@ -132,6 +132,12 @@ public:
unsigned OpNum,
MachineInstr* LoadMI) const;
/// getOpcodeAfterMemoryFold - Returns the opcode of the would be new
/// instruction after load / store is folded into an instruction of the
/// specified opcode. It returns zero if the specified unfolding is not
/// possible.
unsigned getOpcodeAfterMemoryFold(unsigned Opc, unsigned OpNum) const;
/// unfoldMemoryOperand - Separate a single instruction which folded a load or
/// a store or a load and a store into two or more instruction. If this is
/// possible, returns true as well as the new instructions by reference.
@ -143,8 +149,9 @@ public:
SmallVectorImpl<SDNode*> &NewNodes) const;
/// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new
/// instruction after load / store are unfolded from the specified opcode.
/// It returns zero if the specified unfolding is impossible.
/// instruction after load / store are unfolded from an instruction of the
/// specified opcode. It returns zero if the specified unfolding is not
/// possible.
unsigned getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore) const;