From 66f716354527c5ab4687a89a1605915e5128a106 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 19 Oct 2007 21:23:22 +0000 Subject: [PATCH] Local spiller optimization: Turn a store folding instruction into a load folding instruction. e.g. xorl %edi, %eax movl %eax, -32(%ebp) movl -36(%ebp), %eax orl %eax, -32(%ebp) => xorl %edi, %eax orl -36(%ebp), %eax mov %eax, -32(%ebp) This enables the unfolding optimization for a subsequent instruction which will also eliminate the newly introduced store instruction. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@43192 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/Target/MRegisterInfo.h | 13 +- lib/CodeGen/VirtRegMap.cpp | 386 ++++++++++++++++++---------- lib/Target/X86/X86InstrInfo.td | 12 +- lib/Target/X86/X86InstrX86-64.td | 17 ++ lib/Target/X86/X86RegisterInfo.cpp | 113 +++++--- lib/Target/X86/X86RegisterInfo.h | 11 +- 6 files changed, 370 insertions(+), 182 deletions(-) diff --git a/include/llvm/Target/MRegisterInfo.h b/include/llvm/Target/MRegisterInfo.h index dd24f37fe5c..afcadc4a173 100644 --- a/include/llvm/Target/MRegisterInfo.h +++ b/include/llvm/Target/MRegisterInfo.h @@ -563,6 +563,14 @@ public: return 0; } + /// getOpcodeAfterMemoryFold - Returns the opcode of the would be new + /// instruction after load / store is folded into an instruction of the + /// specified opcode. It returns zero if the specified unfolding is not + /// possible. + virtual unsigned getOpcodeAfterMemoryFold(unsigned Opc, unsigned OpNum) const{ + return 0; + } + /// unfoldMemoryOperand - Separate a single instruction which folded a load or /// a a store or a load and a store into two or more instruction. If this is /// possible, returns true as well as the new instructions by reference. @@ -578,8 +586,9 @@ public: } /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new - /// instruction after load / store are unfolded from the specified opcode. - /// It returns zero if the specified unfolding is impossible. + /// instruction after load / store are unfolded from an instruction of the + /// specified opcode. It returns zero if the specified unfolding is not + /// possible. virtual unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore) const { return 0; diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index d6dc92a6428..4f3a96307cf 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -242,6 +242,8 @@ bool SimpleSpiller::runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) { //===----------------------------------------------------------------------===// namespace { + class AvailableSpills; + /// LocalSpiller - This spiller does a simple pass over the machine basic /// block to attempt to keep spills in registers as much as possible for /// blocks that have low register pressure (the vreg may be spilled due to @@ -270,6 +272,12 @@ namespace { return true; } private: + bool PrepForUnfoldOpti(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + std::vector &MaybeDeadStores, + AvailableSpills &Spills, BitVector &RegKills, + std::vector &KillOps, + VirtRegMap &VRM); void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM); }; } @@ -357,7 +365,7 @@ public: void disallowClobberPhysReg(unsigned PhysReg); /// ClobberPhysReg - This is called when the specified physreg changes - /// value. We use this to invalidate any info about stuff we thing lives in + /// value. We use this to invalidate any info about stuff that lives in /// it and any of its aliases. void ClobberPhysReg(unsigned PhysReg); @@ -450,7 +458,7 @@ void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { /// marked kill, then invalidate the information. static void InvalidateKills(MachineInstr &MI, BitVector &RegKills, std::vector &KillOps, - SmallVector *KillRegs = NULL) { + SmallVector *KillRegs = NULL) { for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); if (!MO.isRegister() || !MO.isUse() || !MO.isKill()) @@ -723,6 +731,112 @@ namespace { }; } +/// PrepForUnfoldOpti - Turn a store folding instruction into a load folding +/// instruction. e.g. +/// xorl %edi, %eax +/// movl %eax, -32(%ebp) +/// movl -36(%ebp), %eax +/// orl %eax, -32(%ebp) +/// ==> +/// xorl %edi, %eax +/// orl -36(%ebp), %eax +/// mov %eax, -32(%ebp) +/// This enables unfolding optimization for a subsequent instruction which will +/// also eliminate the newly introduced store instruction. +bool LocalSpiller::PrepForUnfoldOpti(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + std::vector &MaybeDeadStores, + AvailableSpills &Spills, + BitVector &RegKills, + std::vector &KillOps, + VirtRegMap &VRM) { + MachineFunction &MF = *MBB.getParent(); + MachineInstr &MI = *MII; + unsigned UnfoldedOpc = 0; + unsigned UnfoldPR = 0; + unsigned UnfoldVR = 0; + int FoldedSS = VirtRegMap::NO_STACK_SLOT; + VirtRegMap::MI2VirtMapTy::const_iterator I, End; + for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) { + // Only transform a MI that folds a single register. + if (UnfoldedOpc) + return false; + UnfoldVR = I->second.first; + VirtRegMap::ModRef MR = I->second.second; + if (VRM.isAssignedReg(UnfoldVR)) + continue; + // If this reference is not a use, any previous store is now dead. + // Otherwise, the store to this stack slot is not dead anymore. + FoldedSS = VRM.getStackSlot(UnfoldVR); + MachineInstr* DeadStore = MaybeDeadStores[FoldedSS]; + if (DeadStore && (MR & VirtRegMap::isModRef)) { + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(FoldedSS); + if (!PhysReg || + DeadStore->findRegisterUseOperandIdx(PhysReg, true) == -1) + continue; + UnfoldPR = PhysReg; + UnfoldedOpc = MRI->getOpcodeAfterMemoryUnfold(MI.getOpcode(), + false, true); + } + } + + if (!UnfoldedOpc) + return false; + + for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI.getOperand(i); + if (!MO.isRegister() || MO.getReg() == 0 || !MO.isUse()) + continue; + unsigned VirtReg = MO.getReg(); + if (MRegisterInfo::isPhysicalRegister(VirtReg) || + RegMap->isSubRegister(VirtReg)) + continue; + if (VRM.isAssignedReg(VirtReg)) { + unsigned PhysReg = VRM.getPhys(VirtReg); + if (PhysReg && MRI->regsOverlap(PhysReg, UnfoldPR)) + return false; + } else if (VRM.isReMaterialized(VirtReg)) + continue; + int SS = VRM.getStackSlot(VirtReg); + unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); + if (PhysReg) { + if (MRI->regsOverlap(PhysReg, UnfoldPR)) + return false; + continue; + } + PhysReg = VRM.getPhys(VirtReg); + if (!MRI->regsOverlap(PhysReg, UnfoldPR)) + continue; + + // Ok, we'll need to reload the value into a register which makes + // it impossible to perform the store unfolding optimization later. + // Let's see if it is possible to fold the load if the store is + // unfolded. This allows us to perform the store unfolding + // optimization. + SmallVector NewMIs; + if (MRI->unfoldMemoryOperand(MF, &MI, UnfoldVR, false, false, NewMIs)) { + assert(NewMIs.size() == 1); + MachineInstr *NewMI = NewMIs.back(); + NewMIs.clear(); + unsigned Idx = NewMI->findRegisterUseOperandIdx(VirtReg); + MachineInstr *FoldedMI = MRI->foldMemoryOperand(NewMI, Idx, SS); + if (FoldedMI) { + if (VRM.hasPhys(UnfoldVR)) + assert(VRM.getPhys(UnfoldVR) == UnfoldPR); + else + VRM.assignVirt2Phys(UnfoldVR, UnfoldPR); + + VRM.virtFolded(VirtReg, FoldedMI, VirtRegMap::isRef); + MII = MBB.insert(MII, FoldedMI); + VRM.RemoveFromFoldedVirtMap(&MI); + MBB.erase(&MI); + return true; + } + delete NewMI; + } + } + return false; +} /// rewriteMBB - Keep track of which spills are available even after the /// register allocator is done with them. If possible, avoid reloading vregs. @@ -754,28 +868,21 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end(); MII != E; ) { - MachineInstr &MI = *MII; MachineBasicBlock::iterator NextMII = MII; ++NextMII; - VirtRegMap::MI2VirtMapTy::const_iterator I, End; + VirtRegMap::MI2VirtMapTy::const_iterator I, End; bool Erased = false; bool BackTracked = false; + if (PrepForUnfoldOpti(MBB, MII, + MaybeDeadStores, Spills, RegKills, KillOps, VRM)) + NextMII = next(MII); /// ReusedOperands - Keep track of operand reuse in case we need to undo /// reuse. + MachineInstr &MI = *MII; ReuseInfo ReusedOperands(MI, MRI); - // Loop over all of the implicit defs, clearing them from our available - // sets. const TargetInstrDescriptor *TID = MI.getInstrDescriptor(); - if (TID->ImplicitDefs) { - const unsigned *ImpDef = TID->ImplicitDefs; - for ( ; *ImpDef; ++ImpDef) { - MF.setPhysRegUsed(*ImpDef); - ReusedOperands.markClobbered(*ImpDef); - Spills.ClobberPhysReg(*ImpDef); - } - } // Process all of the spilled uses and all non spilled reg references. for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { @@ -788,7 +895,6 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { // Ignore physregs for spilling, but remember that it is used by this // function. MF.setPhysRegUsed(VirtReg); - ReusedOperands.markClobbered(VirtReg); continue; } @@ -826,7 +932,7 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); if (!PhysReg && DoReMat) { // This use is rematerializable. But perhaps the value is available in - // stack if the definition is not deleted. If so, check if we can + // a register if the definition is not deleted. If so, check if we can // reuse the value. ReuseSlot = VRM.getStackSlot(VirtReg); if (ReuseSlot != VirtRegMap::NO_STACK_SLOT) @@ -857,7 +963,6 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { // aren't allowed to modify the reused register. If none of these cases // apply, reuse it. bool CanReuse = true; - int ti = TID->getOperandConstraint(i, TOI::TIED_TO); if (ti != -1 && MI.getOperand(ti).isRegister() && @@ -911,8 +1016,8 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { if (DeadStore) { DOUT << "Removed dead store:\t" << *DeadStore; InvalidateKills(*DeadStore, RegKills, KillOps); - MBB.erase(DeadStore); VRM.RemoveFromFoldedVirtMap(DeadStore); + MBB.erase(DeadStore); MaybeDeadStores[ReuseSlot] = NULL; ++NumDSE; } @@ -977,7 +1082,7 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { DOUT << '\t' << *prior(MII); ++NumReused; continue; - } // is (PhysReg) + } // if (PhysReg) // Otherwise, reload it and remember that we have it. PhysReg = VRM.getPhys(VirtReg); @@ -1023,12 +1128,11 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { // If we have folded references to memory operands, make sure we clear all // physical registers that may contain the value of the spilled virtual // register - SmallSet FoldedSS; + SmallSet FoldedSS; for (tie(I, End) = VRM.getFoldedVirts(&MI); I != End; ++I) { - DOUT << "Folded vreg: " << I->second.first << " MR: " - << I->second.second; unsigned VirtReg = I->second.first; VirtRegMap::ModRef MR = I->second.second; + DOUT << "Folded vreg: " << VirtReg << " MR: " << MR; if (VRM.isAssignedReg(VirtReg)) { DOUT << ": No stack slot!\n"; continue; @@ -1084,9 +1188,9 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { // Otherwise, the store to this stack slot is not dead anymore. MachineInstr* DeadStore = MaybeDeadStores[SS]; if (DeadStore) { - bool isDead = true; + bool isDead = !(MR & VirtRegMap::isRef); MachineInstr *NewStore = NULL; - if (MR & VirtRegMap::isRef) { + if (MR & VirtRegMap::isMod) { unsigned PhysReg = Spills.getSpillSlotOrReMatPhysReg(SS); SmallVector NewMIs; if (PhysReg && @@ -1101,8 +1205,8 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { --NextMII; --NextMII; // backtrack to the unfolded instruction. BackTracked = true; - } else - isDead = false; + isDead = true; + } } if (isDead) { // Previous store is dead. @@ -1156,12 +1260,120 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { // Process all of the spilled defs. for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { MachineOperand &MO = MI.getOperand(i); - if (MO.isRegister() && MO.getReg() && MO.isDef()) { - unsigned VirtReg = MO.getReg(); + if (!(MO.isRegister() && MO.getReg() && MO.isDef())) + continue; - if (!MRegisterInfo::isVirtualRegister(VirtReg)) { - // Check to see if this is a noop copy. If so, eliminate the - // instruction before considering the dest reg to be changed. + unsigned VirtReg = MO.getReg(); + if (!MRegisterInfo::isVirtualRegister(VirtReg)) { + // Check to see if this is a noop copy. If so, eliminate the + // instruction before considering the dest reg to be changed. + unsigned Src, Dst; + if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) { + ++NumDCE; + DOUT << "Removing now-noop copy: " << MI; + MBB.erase(&MI); + Erased = true; + VRM.RemoveFromFoldedVirtMap(&MI); + Spills.disallowClobberPhysReg(VirtReg); + goto ProcessNextInst; + } + + // If it's not a no-op copy, it clobbers the value in the destreg. + Spills.ClobberPhysReg(VirtReg); + ReusedOperands.markClobbered(VirtReg); + + // Check to see if this instruction is a load from a stack slot into + // a register. If so, this provides the stack slot value in the reg. + int FrameIdx; + if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) { + assert(DestReg == VirtReg && "Unknown load situation!"); + + // If it is a folded reference, then it's not safe to clobber. + bool Folded = FoldedSS.count(FrameIdx); + // Otherwise, if it wasn't available, remember that it is now! + Spills.addAvailable(FrameIdx, &MI, DestReg, !Folded); + goto ProcessNextInst; + } + + continue; + } + + bool DoReMat = VRM.isReMaterialized(VirtReg); + if (DoReMat) + ReMatDefs.insert(&MI); + + // The only vregs left are stack slot definitions. + int StackSlot = VRM.getStackSlot(VirtReg); + const TargetRegisterClass *RC = RegMap->getRegClass(VirtReg); + + // If this def is part of a two-address operand, make sure to execute + // the store from the correct physical register. + unsigned PhysReg; + int TiedOp = MI.getInstrDescriptor()->findTiedToSrcOperand(i); + if (TiedOp != -1) + PhysReg = MI.getOperand(TiedOp).getReg(); + else { + PhysReg = VRM.getPhys(VirtReg); + if (ReusedOperands.isClobbered(PhysReg)) { + // Another def has taken the assigned physreg. It must have been a + // use&def which got it due to reuse. Undo the reuse! + PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, + Spills, MaybeDeadStores, RegKills, KillOps, VRM); + } + } + + MF.setPhysRegUsed(PhysReg); + ReusedOperands.markClobbered(PhysReg); + MI.getOperand(i).setReg(PhysReg); + if (!MO.isDead()) { + MRI->storeRegToStackSlot(MBB, next(MII), PhysReg, StackSlot, RC); + DOUT << "Store:\t" << *next(MII); + + // If there is a dead store to this stack slot, nuke it now. + MachineInstr *&LastStore = MaybeDeadStores[StackSlot]; + if (LastStore) { + DOUT << "Removed dead store:\t" << *LastStore; + ++NumDSE; + SmallVector KillRegs; + InvalidateKills(*LastStore, RegKills, KillOps, &KillRegs); + MachineBasicBlock::iterator PrevMII = LastStore; + bool CheckDef = PrevMII != MBB.begin(); + if (CheckDef) + --PrevMII; + MBB.erase(LastStore); + VRM.RemoveFromFoldedVirtMap(LastStore); + if (CheckDef) { + // Look at defs of killed registers on the store. Mark the defs + // as dead since the store has been deleted and they aren't + // being reused. + for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) { + bool HasOtherDef = false; + if (InvalidateRegDef(PrevMII, MI, KillRegs[j], HasOtherDef)) { + MachineInstr *DeadDef = PrevMII; + if (ReMatDefs.count(DeadDef) && !HasOtherDef) { + // FIXME: This assumes a remat def does not have side + // effects. + MBB.erase(DeadDef); + VRM.RemoveFromFoldedVirtMap(DeadDef); + ++NumDRM; + } + } + } + } + } + LastStore = next(MII); + + // If the stack slot value was previously available in some other + // register, change it now. Otherwise, make the register available, + // in PhysReg. + Spills.ModifyStackSlotOrReMat(StackSlot); + Spills.ClobberPhysReg(PhysReg); + Spills.addAvailable(StackSlot, LastStore, PhysReg); + ++NumStores; + + // Check to see if this is a noop copy. If so, eliminate the + // instruction before considering the dest reg to be changed. + { unsigned Src, Dst; if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) { ++NumDCE; @@ -1169,119 +1381,11 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { MBB.erase(&MI); Erased = true; VRM.RemoveFromFoldedVirtMap(&MI); - Spills.disallowClobberPhysReg(VirtReg); + UpdateKills(*LastStore, RegKills, KillOps); goto ProcessNextInst; } - - // If it's not a no-op copy, it clobbers the value in the destreg. - Spills.ClobberPhysReg(VirtReg); - ReusedOperands.markClobbered(VirtReg); - - // Check to see if this instruction is a load from a stack slot into - // a register. If so, this provides the stack slot value in the reg. - int FrameIdx; - if (unsigned DestReg = TII->isLoadFromStackSlot(&MI, FrameIdx)) { - assert(DestReg == VirtReg && "Unknown load situation!"); - - // If it is a folded reference, then it's not safe to clobber. - bool Folded = FoldedSS.count(FrameIdx); - // Otherwise, if it wasn't available, remember that it is now! - Spills.addAvailable(FrameIdx, &MI, DestReg, !Folded); - goto ProcessNextInst; - } - - continue; } - - bool DoReMat = VRM.isReMaterialized(VirtReg); - if (DoReMat) - ReMatDefs.insert(&MI); - - // The only vregs left are stack slot definitions. - int StackSlot = VRM.getStackSlot(VirtReg); - const TargetRegisterClass *RC = RegMap->getRegClass(VirtReg); - - // If this def is part of a two-address operand, make sure to execute - // the store from the correct physical register. - unsigned PhysReg; - int TiedOp = MI.getInstrDescriptor()->findTiedToSrcOperand(i); - if (TiedOp != -1) - PhysReg = MI.getOperand(TiedOp).getReg(); - else { - PhysReg = VRM.getPhys(VirtReg); - if (ReusedOperands.isClobbered(PhysReg)) { - // Another def has taken the assigned physreg. It must have been a - // use&def which got it due to reuse. Undo the reuse! - PhysReg = ReusedOperands.GetRegForReload(PhysReg, &MI, - Spills, MaybeDeadStores, RegKills, KillOps, VRM); - } - } - - MF.setPhysRegUsed(PhysReg); - ReusedOperands.markClobbered(PhysReg); - MI.getOperand(i).setReg(PhysReg); - if (!MO.isDead()) { - MRI->storeRegToStackSlot(MBB, next(MII), PhysReg, StackSlot, RC); - DOUT << "Store:\t" << *next(MII); - - // If there is a dead store to this stack slot, nuke it now. - MachineInstr *&LastStore = MaybeDeadStores[StackSlot]; - if (LastStore) { - DOUT << "Removed dead store:\t" << *LastStore; - ++NumDSE; - SmallVector KillRegs; - InvalidateKills(*LastStore, RegKills, KillOps, &KillRegs); - MachineBasicBlock::iterator PrevMII = LastStore; - bool CheckDef = PrevMII != MBB.begin(); - if (CheckDef) - --PrevMII; - MBB.erase(LastStore); - VRM.RemoveFromFoldedVirtMap(LastStore); - if (CheckDef) { - // Look at defs of killed registers on the store. Mark the defs - // as dead since the store has been deleted and they aren't - // being reused. - for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) { - bool HasOtherDef = false; - if (InvalidateRegDef(PrevMII, MI, KillRegs[j], HasOtherDef)) { - MachineInstr *DeadDef = PrevMII; - if (ReMatDefs.count(DeadDef) && !HasOtherDef) { - // FIXME: This assumes a remat def does not have side - // effects. - MBB.erase(DeadDef); - VRM.RemoveFromFoldedVirtMap(DeadDef); - ++NumDRM; - } - } - } - } - } - LastStore = next(MII); - - // If the stack slot value was previously available in some other - // register, change it now. Otherwise, make the register available, - // in PhysReg. - Spills.ModifyStackSlotOrReMat(StackSlot); - Spills.ClobberPhysReg(PhysReg); - Spills.addAvailable(StackSlot, LastStore, PhysReg); - ++NumStores; - - // Check to see if this is a noop copy. If so, eliminate the - // instruction before considering the dest reg to be changed. - { - unsigned Src, Dst; - if (TII->isMoveInstr(MI, Src, Dst) && Src == Dst) { - ++NumDCE; - DOUT << "Removing now-noop copy: " << MI; - MBB.erase(&MI); - Erased = true; - VRM.RemoveFromFoldedVirtMap(&MI); - UpdateKills(*LastStore, RegKills, KillOps); - goto ProcessNextInst; - } - } - } - } + } } ProcessNextInst: if (!Erased && !BackTracked) diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 6302024e556..e9f9bd6c1c4 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -1062,9 +1062,11 @@ let isTwoAddress = 0, CodeSize = 2 in { def INC8m : I<0xFE, MRM0m, (outs), (ins i8mem :$dst), "inc{b}\t$dst", [(store (add (loadi8 addr:$dst), 1), addr:$dst)]>; def INC16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", - [(store (add (loadi16 addr:$dst), 1), addr:$dst)]>, OpSize; + [(store (add (loadi16 addr:$dst), 1), addr:$dst)]>, + OpSize, Requires<[In32BitMode]>; def INC32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", - [(store (add (loadi32 addr:$dst), 1), addr:$dst)]>; + [(store (add (loadi32 addr:$dst), 1), addr:$dst)]>, + Requires<[In32BitMode]>; } let CodeSize = 2 in @@ -1082,9 +1084,11 @@ let isTwoAddress = 0, CodeSize = 2 in { def DEC8m : I<0xFE, MRM1m, (outs), (ins i8mem :$dst), "dec{b}\t$dst", [(store (add (loadi8 addr:$dst), -1), addr:$dst)]>; def DEC16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", - [(store (add (loadi16 addr:$dst), -1), addr:$dst)]>, OpSize; + [(store (add (loadi16 addr:$dst), -1), addr:$dst)]>, + OpSize, Requires<[In32BitMode]>; def DEC32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", - [(store (add (loadi32 addr:$dst), -1), addr:$dst)]>; + [(store (add (loadi32 addr:$dst), -1), addr:$dst)]>, + Requires<[In32BitMode]>; } } // Defs = [EFLAGS] diff --git a/lib/Target/X86/X86InstrX86-64.td b/lib/Target/X86/X86InstrX86-64.td index 077e9dc8a63..42863d49309 100644 --- a/lib/Target/X86/X86InstrX86-64.td +++ b/lib/Target/X86/X86InstrX86-64.td @@ -461,6 +461,23 @@ def DEC64_32r : I<0xFF, MRM1r, (outs GR32:$dst), (ins GR32:$src), "dec{l}\t$dst" [(set GR32:$dst, (add GR32:$src, -1))]>, Requires<[In64BitMode]>; } // isConvertibleToThreeAddress + +// These are duplicates of their 32-bit counterparts. Only needed so X86 knows +// how to unfold them. +let isTwoAddress = 0, CodeSize = 2 in { + def INC64_16m : I<0xFF, MRM0m, (outs), (ins i16mem:$dst), "inc{w}\t$dst", + [(store (add (loadi16 addr:$dst), 1), addr:$dst)]>, + OpSize, Requires<[In64BitMode]>; + def INC64_32m : I<0xFF, MRM0m, (outs), (ins i32mem:$dst), "inc{l}\t$dst", + [(store (add (loadi32 addr:$dst), 1), addr:$dst)]>, + Requires<[In64BitMode]>; + def DEC64_16m : I<0xFF, MRM1m, (outs), (ins i16mem:$dst), "dec{w}\t$dst", + [(store (add (loadi16 addr:$dst), -1), addr:$dst)]>, + OpSize, Requires<[In64BitMode]>; + def DEC64_32m : I<0xFF, MRM1m, (outs), (ins i32mem:$dst), "dec{l}\t$dst", + [(store (add (loadi32 addr:$dst), -1), addr:$dst)]>, + Requires<[In64BitMode]>; +} } // Defs = [EFLAGS], CodeSize diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index e88c050feba..48280676df7 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -97,14 +97,14 @@ X86RegisterInfo::X86RegisterInfo(X86TargetMachine &tm, { X86::AND8rr, X86::AND8mr }, { X86::DEC16r, X86::DEC16m }, { X86::DEC32r, X86::DEC32m }, - { X86::DEC64_16r, X86::DEC16m }, - { X86::DEC64_32r, X86::DEC32m }, + { X86::DEC64_16r, X86::DEC64_16m }, + { X86::DEC64_32r, X86::DEC64_32m }, { X86::DEC64r, X86::DEC64m }, { X86::DEC8r, X86::DEC8m }, { X86::INC16r, X86::INC16m }, { X86::INC32r, X86::INC32m }, - { X86::INC64_16r, X86::INC16m }, - { X86::INC64_32r, X86::INC32m }, + { X86::INC64_16r, X86::INC64_16m }, + { X86::INC64_32r, X86::INC64_32m }, { X86::INC64r, X86::INC64m }, { X86::INC8r, X86::INC8m }, { X86::NEG16r, X86::NEG16m }, @@ -981,10 +981,9 @@ void X86RegisterInfo::reMaterialize(MachineBasicBlock &MBB, static MachineInstr *FuseTwoAddrInst(unsigned Opcode, SmallVector &MOs, MachineInstr *MI, const TargetInstrInfo &TII) { - unsigned NumOps = TII.getNumOperands(MI->getOpcode())-2; - // Create the base instruction with the memory operand as the first part. - MachineInstrBuilder MIB = BuildMI(TII.get(Opcode)); + MachineInstr *NewMI = new MachineInstr(TII.get(Opcode), true); + MachineInstrBuilder MIB(NewMI); unsigned NumAddrOps = MOs.size(); for (unsigned i = 0; i != NumAddrOps; ++i) MIB = X86InstrAddOperand(MIB, MOs[i]); @@ -992,17 +991,23 @@ static MachineInstr *FuseTwoAddrInst(unsigned Opcode, MIB.addImm(1).addReg(0).addImm(0); // Loop over the rest of the ri operands, converting them over. + unsigned NumOps = TII.getNumOperands(MI->getOpcode())-2; for (unsigned i = 0; i != NumOps; ++i) { MachineOperand &MO = MI->getOperand(i+2); MIB = X86InstrAddOperand(MIB, MO); } + for (unsigned i = NumOps+2, e = MI->getNumOperands(); i != e; ++i) { + MachineOperand &MO = MI->getOperand(i); + MIB = X86InstrAddOperand(MIB, MO); + } return MIB; } static MachineInstr *FuseInst(unsigned Opcode, unsigned OpNo, SmallVector &MOs, MachineInstr *MI, const TargetInstrInfo &TII) { - MachineInstrBuilder MIB = BuildMI(TII.get(Opcode)); + MachineInstr *NewMI = new MachineInstr(TII.get(Opcode), true); + MachineInstrBuilder MIB(NewMI); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); @@ -1036,7 +1041,6 @@ static MachineInstr *MakeM0Inst(const TargetInstrInfo &TII, unsigned Opcode, MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, unsigned i, SmallVector &MOs) const { - // Table (and size) to search const DenseMap *OpcodeTablePtr = NULL; bool isTwoAddrFold = false; unsigned NumOps = TII.getNumOperands(MI->getOpcode()); @@ -1117,6 +1121,49 @@ MachineInstr* X86RegisterInfo::foldMemoryOperand(MachineInstr *MI, unsigned OpNu return foldMemoryOperand(MI, OpNum, MOs); } +unsigned X86RegisterInfo::getOpcodeAfterMemoryFold(unsigned Opc, + unsigned OpNum) const { + // Check switch flag + if (NoFusing) return 0; + const DenseMap *OpcodeTablePtr = NULL; + unsigned NumOps = TII.getNumOperands(Opc); + bool isTwoAddr = NumOps > 1 && + TII.getOperandConstraint(Opc, 1, TOI::TIED_TO) != -1; + + // Folding a memory location into the two-address part of a two-address + // instruction is different than folding it other places. It requires + // replacing the *two* registers with the memory location. + if (isTwoAddr && NumOps >= 2 && OpNum < 2) { + OpcodeTablePtr = &RegOp2MemOpTable2Addr; + } else if (OpNum == 0) { // If operand 0 + switch (Opc) { + case X86::MOV16r0: + return X86::MOV16mi; + case X86::MOV32r0: + return X86::MOV32mi; + case X86::MOV64r0: + return X86::MOV64mi32; + case X86::MOV8r0: + return X86::MOV8mi; + default: break; + } + OpcodeTablePtr = &RegOp2MemOpTable0; + } else if (OpNum == 1) { + OpcodeTablePtr = &RegOp2MemOpTable1; + } else if (OpNum == 2) { + OpcodeTablePtr = &RegOp2MemOpTable2; + } + + if (OpcodeTablePtr) { + // Find the Opcode to fuse + DenseMap::iterator I = + OpcodeTablePtr->find((unsigned*)Opc); + if (I != OpcodeTablePtr->end()) + return I->second; + } + return 0; +} + bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, unsigned Reg, bool UnfoldLoad, bool UnfoldStore, SmallVectorImpl &NewMIs) const { @@ -1126,14 +1173,14 @@ bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, return false; unsigned Opc = I->second.first; unsigned Index = I->second.second & 0xf; - bool HasLoad = I->second.second & (1 << 4); - bool HasStore = I->second.second & (1 << 5); - if (UnfoldLoad && !HasLoad) + bool FoldedLoad = I->second.second & (1 << 4); + bool FoldedStore = I->second.second & (1 << 5); + if (UnfoldLoad && !FoldedLoad) return false; - HasLoad &= UnfoldLoad; - if (UnfoldStore && !HasStore) + UnfoldLoad &= FoldedLoad; + if (UnfoldStore && !FoldedStore) return false; - HasStore &= UnfoldStore; + UnfoldStore &= FoldedStore; const TargetInstrDescriptor &TID = TII.get(Opc); const TargetOperandInfo &TOI = TID.OpInfo[Index]; @@ -1156,9 +1203,9 @@ bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, } // Emit the load instruction. - if (HasLoad) { + if (UnfoldLoad) { loadRegFromAddr(MF, Reg, AddrOps, RC, NewMIs); - if (HasStore) { + if (UnfoldStore) { // Address operands cannot be marked isKill. for (unsigned i = 1; i != 5; ++i) { MachineOperand &MO = NewMIs[0]->getOperand(i); @@ -1169,15 +1216,11 @@ bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, } // Emit the data processing instruction. - MachineInstr *DataMI = new MachineInstr (TID, true); + MachineInstr *DataMI = new MachineInstr(TID, true); MachineInstrBuilder MIB(DataMI); - const TargetRegisterClass *DstRC = 0; - if (HasStore) { - const TargetOperandInfo &DstTOI = TID.OpInfo[0]; - DstRC = (DstTOI.Flags & M_LOOK_UP_PTR_REG_CLASS) - ? TII.getPointerRegClass() : getRegClass(DstTOI.RegClass); + + if (FoldedStore) MIB.addReg(Reg, true); - } for (unsigned i = 0, e = BeforeOps.size(); i != e; ++i) MIB = X86InstrAddOperand(MIB, BeforeOps[i]); MIB.addReg(Reg); @@ -1190,8 +1233,12 @@ bool X86RegisterInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI, NewMIs.push_back(MIB); // Emit the store instruction. - if (HasStore) + if (UnfoldStore) { + const TargetOperandInfo &DstTOI = TID.OpInfo[0]; + const TargetRegisterClass *DstRC = (DstTOI.Flags & M_LOOK_UP_PTR_REG_CLASS) + ? TII.getPointerRegClass() : getRegClass(DstTOI.RegClass); storeRegToAddr(MF, Reg, AddrOps, DstRC, NewMIs); + } return true; } @@ -1209,8 +1256,8 @@ X86RegisterInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, return false; unsigned Opc = I->second.first; unsigned Index = I->second.second & 0xf; - bool HasLoad = I->second.second & (1 << 4); - bool HasStore = I->second.second & (1 << 5); + bool FoldedLoad = I->second.second & (1 << 4); + bool FoldedStore = I->second.second & (1 << 5); const TargetInstrDescriptor &TID = TII.get(Opc); const TargetOperandInfo &TOI = TID.OpInfo[Index]; const TargetRegisterClass *RC = (TOI.Flags & M_LOOK_UP_PTR_REG_CLASS) @@ -1233,7 +1280,7 @@ X86RegisterInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, // Emit the load instruction. SDNode *Load = 0; - if (HasLoad) { + if (FoldedLoad) { MVT::ValueType VT = *RC->vt_begin(); Load = DAG.getTargetNode(getLoadRegOpcode(RC), VT, MVT::Other, &AddrOps[0], AddrOps.size()); @@ -1261,7 +1308,7 @@ X86RegisterInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N, NewNodes.push_back(NewNode); // Emit the store instruction. - if (HasStore) { + if (FoldedStore) { AddrOps.pop_back(); AddrOps.push_back(SDOperand(NewNode, 0)); AddrOps.push_back(Chain); @@ -1279,11 +1326,11 @@ unsigned X86RegisterInfo::getOpcodeAfterMemoryUnfold(unsigned Opc, MemOp2RegOpTable.find((unsigned*)Opc); if (I == MemOp2RegOpTable.end()) return 0; - bool HasLoad = I->second.second & (1 << 4); - bool HasStore = I->second.second & (1 << 5); - if (UnfoldLoad && !HasLoad) + bool FoldedLoad = I->second.second & (1 << 4); + bool FoldedStore = I->second.second & (1 << 5); + if (UnfoldLoad && !FoldedLoad) return 0; - if (UnfoldStore && !HasStore) + if (UnfoldStore && !FoldedStore) return 0; return I->second.first; } diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h index c21868c626f..a17296b6b5f 100644 --- a/lib/Target/X86/X86RegisterInfo.h +++ b/lib/Target/X86/X86RegisterInfo.h @@ -132,6 +132,12 @@ public: unsigned OpNum, MachineInstr* LoadMI) const; + /// getOpcodeAfterMemoryFold - Returns the opcode of the would be new + /// instruction after load / store is folded into an instruction of the + /// specified opcode. It returns zero if the specified unfolding is not + /// possible. + unsigned getOpcodeAfterMemoryFold(unsigned Opc, unsigned OpNum) const; + /// unfoldMemoryOperand - Separate a single instruction which folded a load or /// a store or a load and a store into two or more instruction. If this is /// possible, returns true as well as the new instructions by reference. @@ -143,8 +149,9 @@ public: SmallVectorImpl &NewNodes) const; /// getOpcodeAfterMemoryUnfold - Returns the opcode of the would be new - /// instruction after load / store are unfolded from the specified opcode. - /// It returns zero if the specified unfolding is impossible. + /// instruction after load / store are unfolded from an instruction of the + /// specified opcode. It returns zero if the specified unfolding is not + /// possible. unsigned getOpcodeAfterMemoryUnfold(unsigned Opc, bool UnfoldLoad, bool UnfoldStore) const;