From 752272a5e553313f7b0397a06a23b4fe8ac013c4 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Wed, 11 Feb 2009 08:24:21 +0000 Subject: [PATCH] Implement PR3495: local spiller optimization. The local spiller can now keep availability information over BB boundaries. It visits BB's in depth first order. After visiting a BB if it find a successor which has a single predecessor it visits the successor next without clearing the availability information. This allows the successor to omit reloads or change them into copies. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@64298 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/CodeGen/VirtRegMap.cpp | 299 ++++++++++++++++++++++++++----------- test/CodeGen/X86/pr3495.ll | 78 ++++++++++ 2 files changed, 286 insertions(+), 91 deletions(-) create mode 100644 test/CodeGen/X86/pr3495.ll diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 9cb580b9f0a..5a37738a2f6 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -26,10 +26,11 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" @@ -47,6 +48,8 @@ STATISTIC(NumDSE , "Number of dead stores elided"); STATISTIC(NumDCE , "Number of copies elided"); STATISTIC(NumDSS , "Number of dead spill slots removed"); STATISTIC(NumCommutes, "Number of instructions commuted"); +STATISTIC(NumOmitted , "Number of reloads omited"); +STATISTIC(NumCopified, "Number of available reloads turned into copies"); namespace { enum SpillerName { simple, local }; @@ -308,79 +311,6 @@ bool SimpleSpiller::runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) { // Local Spiller Implementation //===----------------------------------------------------------------------===// -namespace { - class AvailableSpills; - - /// LocalSpiller - This spiller does a simple pass over the machine basic - /// block to attempt to keep spills in registers as much as possible for - /// blocks that have low register pressure (the vreg may be spilled due to - /// register pressure in other blocks). - class VISIBILITY_HIDDEN LocalSpiller : public Spiller { - MachineRegisterInfo *RegInfo; - const TargetRegisterInfo *TRI; - const TargetInstrInfo *TII; - DenseMap DistanceMap; - public: - bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) { - RegInfo = &MF.getRegInfo(); - TRI = MF.getTarget().getRegisterInfo(); - TII = MF.getTarget().getInstrInfo(); - DOUT << "\n**** Local spiller rewriting function '" - << MF.getFunction()->getName() << "':\n"; - DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)" - " ****\n"; - DEBUG(MF.dump()); - - for (MachineFunction::iterator MBB = MF.begin(), E = MF.end(); - MBB != E; ++MBB) - RewriteMBB(*MBB, VRM); - - // Mark unused spill slots. - MachineFrameInfo *MFI = MF.getFrameInfo(); - int SS = VRM.getLowSpillSlot(); - if (SS != VirtRegMap::NO_STACK_SLOT) - for (int e = VRM.getHighSpillSlot(); SS <= e; ++SS) - if (!VRM.isSpillSlotUsed(SS)) { - MFI->RemoveStackObject(SS); - ++NumDSS; - } - - DOUT << "**** Post Machine Instrs ****\n"; - DEBUG(MF.dump()); - - return true; - } - private: - void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist, - unsigned Reg, BitVector &RegKills, - std::vector &KillOps); - bool PrepForUnfoldOpti(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MII, - std::vector &MaybeDeadStores, - AvailableSpills &Spills, BitVector &RegKills, - std::vector &KillOps, - VirtRegMap &VRM); - bool CommuteToFoldReload(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MII, - unsigned VirtReg, unsigned SrcReg, int SS, - BitVector &RegKills, - std::vector &KillOps, - const TargetRegisterInfo *TRI, - VirtRegMap &VRM); - void SpillRegToStackSlot(MachineBasicBlock &MBB, - MachineBasicBlock::iterator &MII, - int Idx, unsigned PhysReg, int StackSlot, - const TargetRegisterClass *RC, - bool isAvailable, MachineInstr *&LastStore, - AvailableSpills &Spills, - SmallSet &ReMatDefs, - BitVector &RegKills, - std::vector &KillOps, - VirtRegMap &VRM); - void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM); - }; -} - /// AvailableSpills - As the local spiller is scanning and rewriting an MBB from /// top down, keep track of which spills slots or remat are available in each /// register. @@ -415,6 +345,12 @@ public: AvailableSpills(const TargetRegisterInfo *tri, const TargetInstrInfo *tii) : TRI(tri), TII(tii) { } + + /// clear - Reset the state. + void clear() { + SpillSlotsOrReMatsAvailable.clear(); + PhysRegsAvailable.clear(); + } const TargetRegisterInfo *getRegInfo() const { return TRI; } @@ -433,8 +369,7 @@ public: /// addAvailable - Mark that the specified stack slot / remat is available in /// the specified physreg. If CanClobber is true, the physreg can be modified /// at any time without changing the semantics of the program. - void addAvailable(int SlotOrReMat, MachineInstr *MI, unsigned Reg, - bool CanClobber = true) { + void addAvailable(int SlotOrReMat, unsigned Reg, bool CanClobber = true) { // If this stack slot is thought to be available in some other physreg, // remove its record. ModifyStackSlotOrReMat(SlotOrReMat); @@ -551,7 +486,126 @@ void AvailableSpills::ModifyStackSlotOrReMat(int SlotOrReMat) { PhysRegsAvailable.erase(I); } +static void findSinglePredSuccessor(MachineBasicBlock *MBB, + SmallVectorImpl &Succs) { + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + MachineBasicBlock *SuccMBB = *SI; + if (SuccMBB->pred_size() == 1) + Succs.push_back(SuccMBB); + } +} +namespace { + class AvailableSpills; + + /// LocalSpiller - This spiller does a simple pass over the machine basic + /// block to attempt to keep spills in registers as much as possible for + /// blocks that have low register pressure (the vreg may be spilled due to + /// register pressure in other blocks). + class VISIBILITY_HIDDEN LocalSpiller : public Spiller { + MachineRegisterInfo *RegInfo; + const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; + DenseMap DistanceMap; + public: + bool runOnMachineFunction(MachineFunction &MF, VirtRegMap &VRM) { + RegInfo = &MF.getRegInfo(); + TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getTarget().getInstrInfo(); + DOUT << "\n**** Local spiller rewriting function '" + << MF.getFunction()->getName() << "':\n"; + DOUT << "**** Machine Instrs (NOTE! Does not include spills and reloads!)" + " ****\n"; + DEBUG(MF.dump()); + + // Spills - Keep track of which spilled values are available in physregs + // so that we can choose to reuse the physregs instead of emitting + // reloads. This is usually refreshed per basic block. + AvailableSpills Spills(TRI, TII); + + // SingleEntrySuccs - Successor blocks which have a single predecessor. + SmallVector SinglePredSuccs; + SmallPtrSet EarlyVisited; + + // Traverse the basic blocks depth first. + MachineBasicBlock *Entry = MF.begin(); + SmallPtrSet Visited; + for (df_ext_iterator > + DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited); + DFI != E; ++DFI) { + MachineBasicBlock *MBB = *DFI; + if (!EarlyVisited.count(MBB)) + RewriteMBB(*MBB, VRM, Spills); + + // If this MBB is the only predecessor of a successor. Keep the + // availability information and visit it next. + do { + // Keep visiting single predecessor successor as long as possible. + SinglePredSuccs.clear(); + findSinglePredSuccessor(MBB, SinglePredSuccs); + if (SinglePredSuccs.empty()) + MBB = 0; + else { + // FIXME: More than one successors, each of which has MBB has + // the only predecessor. + MBB = SinglePredSuccs[0]; + if (!Visited.count(MBB) && EarlyVisited.insert(MBB)) + RewriteMBB(*MBB, VRM, Spills); + } + } while (MBB); + + // Clear the availability info. + Spills.clear(); + } + + DOUT << "**** Post Machine Instrs ****\n"; + DEBUG(MF.dump()); + + // Mark unused spill slots. + MachineFrameInfo *MFI = MF.getFrameInfo(); + int SS = VRM.getLowSpillSlot(); + if (SS != VirtRegMap::NO_STACK_SLOT) + for (int e = VRM.getHighSpillSlot(); SS <= e; ++SS) + if (!VRM.isSpillSlotUsed(SS)) { + MFI->RemoveStackObject(SS); + ++NumDSS; + } + + return true; + } + private: + void TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist, + unsigned Reg, BitVector &RegKills, + std::vector &KillOps); + bool PrepForUnfoldOpti(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + std::vector &MaybeDeadStores, + AvailableSpills &Spills, BitVector &RegKills, + std::vector &KillOps, + VirtRegMap &VRM); + bool CommuteToFoldReload(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + unsigned VirtReg, unsigned SrcReg, int SS, + BitVector &RegKills, + std::vector &KillOps, + const TargetRegisterInfo *TRI, + VirtRegMap &VRM); + void SpillRegToStackSlot(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MII, + int Idx, unsigned PhysReg, int StackSlot, + const TargetRegisterClass *RC, + bool isAvailable, MachineInstr *&LastStore, + AvailableSpills &Spills, + SmallSet &ReMatDefs, + BitVector &RegKills, + std::vector &KillOps, + VirtRegMap &VRM); + void RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM, + AvailableSpills &Spills); + }; +} /// InvalidateKills - MI is going to be deleted. If any of its operands are /// marked kill, then invalidate the information. @@ -843,7 +897,7 @@ namespace { unsigned RReg = SubIdx ? TRI->getSubReg(NewPhysReg, SubIdx) : NewPhysReg; MI->getOperand(NewOp.Operand).setReg(RReg); - Spills.addAvailable(NewOp.StackSlotOrReMat, MI, NewPhysReg); + Spills.addAvailable(NewOp.StackSlotOrReMat, NewPhysReg); --MII; UpdateKills(*MII, RegKills, KillOps, TRI); DOUT << '\t' << *MII; @@ -1152,7 +1206,7 @@ void LocalSpiller::SpillRegToStackSlot(MachineBasicBlock &MBB, // in PhysReg. Spills.ModifyStackSlotOrReMat(StackSlot); Spills.ClobberPhysReg(PhysReg); - Spills.addAvailable(StackSlot, LastStore, PhysReg, isAvailable); + Spills.addAvailable(StackSlot, PhysReg, isAvailable); ++NumStores; } @@ -1201,15 +1255,13 @@ void LocalSpiller::TransferDeadness(MachineBasicBlock *MBB, unsigned CurDist, /// rewriteMBB - Keep track of which spills are available even after the /// register allocator is done with them. If possible, avid reloading vregs. -void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { - DOUT << MBB.getBasicBlock()->getName() << ":\n"; +void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM, + AvailableSpills &Spills) { + DOUT << "\n**** Local spiller rewriting MBB '" + << MBB.getBasicBlock()->getName() << ":\n"; MachineFunction &MF = *MBB.getParent(); - // Spills - Keep track of which spilled values are available in physregs so - // that we can choose to reuse the physregs instead of emitting reloads. - AvailableSpills Spills(TRI, TII); - // MaybeDeadStores - When we need to write a value back into a stack slot, // keep track of the inserted store. If the stack slot value is never read // (because the value was used from some available register, for example), and @@ -1277,18 +1329,82 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { continue; // Split interval spilled again. unsigned Phys = VRM.getPhys(VirtReg); RegInfo->setPhysRegUsed(Phys); + + // Check if the value being restored if available. If so, it must be + // from a predecessor BB that fallthrough into this BB. We do not + // expect: + // BB1: + // r1 = load fi#1 + // ... + // = r1 + // ... # r1 not clobbered + // ... + // = load fi#1 + bool DoReMat = VRM.isReMaterialized(VirtReg); + int SSorRMId = DoReMat + ? VRM.getReMatId(VirtReg) : VRM.getStackSlot(VirtReg); + unsigned InReg = Spills.getSpillSlotOrReMatPhysReg(SSorRMId); + assert((!InReg || !RegKills[InReg]) && + "Restoring a value that's previously defined in the same BB?"); + if (InReg == Phys) { + // If the value is already available in the expected register, save + // a reload / remat. + if (SSorRMId) + DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Reusing SS#" << SSorRMId; + DOUT << " from physreg " + << TRI->getName(InReg) << " for vreg" + << VirtReg <<" instead of reloading into physreg " + << TRI->getName(Phys) << "\n"; + ++NumOmitted; + continue; + } else if (InReg && InReg != Phys) { + if (SSorRMId) + DOUT << "Reusing RM#" << SSorRMId-VirtRegMap::MAX_STACK_SLOT-1; + else + DOUT << "Reusing SS#" << SSorRMId; + DOUT << " from physreg " + << TRI->getName(InReg) << " for vreg" + << VirtReg <<" by copying it into physreg " + << TRI->getName(Phys) << "\n"; + + // If the reloaded / remat value is available in another register, + // copy it to the desired register. + const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); + TII->copyRegToReg(MBB, &MI, Phys, InReg, RC, RC); + + // This invalidates Phys. + Spills.ClobberPhysReg(Phys); + // Remember it's available. + Spills.addAvailable(SSorRMId, Phys); + + // Mark is killed. + MachineInstr *CopyMI = prior(MII); + MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg); + KillOpnd->setIsKill(); + UpdateKills(*CopyMI, RegKills, KillOps, TRI); + + DOUT << '\t' << *CopyMI; + ++NumCopified; + continue; + } + if (VRM.isReMaterialized(VirtReg)) { ReMaterialize(MBB, MII, Phys, VirtReg, TII, TRI, VRM); } else { const TargetRegisterClass* RC = RegInfo->getRegClass(VirtReg); - int SS = VRM.getStackSlot(VirtReg); - TII->loadRegFromStackSlot(MBB, &MI, Phys, SS, RC); + TII->loadRegFromStackSlot(MBB, &MI, Phys, SSorRMId, RC); MachineInstr *LoadMI = prior(MII); - VRM.addSpillSlotUse(SS, LoadMI); + VRM.addSpillSlotUse(SSorRMId, LoadMI); ++NumLoads; } + // This invalidates Phys. Spills.ClobberPhysReg(Phys); + // Remember it's available. + Spills.addAvailable(SSorRMId, Phys); + UpdateKills(*prior(MII), RegKills, KillOps, TRI); DOUT << '\t' << *prior(MII); } @@ -1510,7 +1626,7 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { // This invalidates DesignatedReg. Spills.ClobberPhysReg(DesignatedReg); - Spills.addAvailable(ReuseSlot, &MI, DesignatedReg); + Spills.addAvailable(ReuseSlot, DesignatedReg); unsigned RReg = SubIdx ? TRI->getSubReg(DesignatedReg, SubIdx) : DesignatedReg; MI.getOperand(i).setReg(RReg); @@ -1548,7 +1664,7 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { // Any stores to this stack slot are not dead anymore. if (!DoReMat) MaybeDeadStores[SSorRMId] = NULL; - Spills.addAvailable(SSorRMId, &MI, PhysReg); + Spills.addAvailable(SSorRMId, PhysReg); // Assumes this is the last use. IsKill will be unset if reg is reused // unless it's a two-address operand. if (TID.getOperandConstraint(i, TOI::TIED_TO) == -1) @@ -1738,7 +1854,7 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { // If the stack slot value was previously available in some other // register, change it now. Otherwise, make the register // available in PhysReg. - Spills.addAvailable(StackSlot, &MI, SrcReg, false/*!clobber*/); + Spills.addAvailable(StackSlot, SrcReg, false/*!clobber*/); } } } @@ -1788,7 +1904,7 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { // If it is a folded reference, then it's not safe to clobber. bool Folded = FoldedSS.count(FrameIdx); // Otherwise, if it wasn't available, remember that it is now! - Spills.addAvailable(FrameIdx, &MI, DestReg, !Folded); + Spills.addAvailable(FrameIdx, DestReg, !Folded); goto ProcessNextInst; } @@ -1863,6 +1979,7 @@ void LocalSpiller::RewriteMBB(MachineBasicBlock &MBB, VirtRegMap &VRM) { } MII = NextMII; } + } llvm::Spiller* llvm::createSpiller() { diff --git a/test/CodeGen/X86/pr3495.ll b/test/CodeGen/X86/pr3495.ll new file mode 100644 index 00000000000..b7330db05d5 --- /dev/null +++ b/test/CodeGen/X86/pr3495.ll @@ -0,0 +1,78 @@ +; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of reloads omited} +; RUN: llvm-as < %s | llc -march=x86 -stats |& grep {Number of available reloads turned into copies} +; PR3495 + +target triple = "i386-pc-linux-gnu" +@x = external global [8 x i32], align 32 ; <[8 x i32]*> [#uses=1] +@rows = external global [8 x i32], align 32 ; <[8 x i32]*> [#uses=2] +@up = external global [15 x i32], align 32 ; <[15 x i32]*> [#uses=2] +@down = external global [15 x i32], align 32 ; <[15 x i32]*> [#uses=1] + +define i32 @queens(i32 %c) nounwind { +entry: + %tmp91 = add i32 %c, 1 ; [#uses=3] + %tmp135 = getelementptr [8 x i32]* @x, i32 0, i32 %tmp91 ; [#uses=1] + br label %bb + +bb: ; preds = %bb569, %entry + %r25.0.reg2mem.0 = phi i32 [ 0, %entry ], [ %indvar.next715, %bb569 ] ; [#uses=4] + %tmp27 = getelementptr [8 x i32]* @rows, i32 0, i32 %r25.0.reg2mem.0 ; [#uses=1] + %tmp28 = load i32* %tmp27, align 4 ; [#uses=1] + %tmp29 = icmp eq i32 %tmp28, 0 ; [#uses=1] + br i1 %tmp29, label %bb569, label %bb31 + +bb31: ; preds = %bb + %tmp35 = sub i32 %r25.0.reg2mem.0, 0 ; [#uses=1] + %tmp36 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp35 ; [#uses=1] + %tmp37 = load i32* %tmp36, align 4 ; [#uses=1] + %tmp38 = icmp eq i32 %tmp37, 0 ; [#uses=1] + br i1 %tmp38, label %bb569, label %bb41 + +bb41: ; preds = %bb31 + %tmp54 = sub i32 %r25.0.reg2mem.0, %c ; [#uses=1] + %tmp55 = add i32 %tmp54, 7 ; [#uses=1] + %tmp62 = getelementptr [15 x i32]* @up, i32 0, i32 %tmp55 ; [#uses=2] + store i32 0, i32* %tmp62, align 4 + br label %bb92 + +bb92: ; preds = %bb545, %bb41 + %r20.0.reg2mem.0 = phi i32 [ 0, %bb41 ], [ %indvar.next711, %bb545 ] ; [#uses=5] + %tmp94 = getelementptr [8 x i32]* @rows, i32 0, i32 %r20.0.reg2mem.0 ; [#uses=1] + %tmp95 = load i32* %tmp94, align 4 ; [#uses=0] + %tmp112 = add i32 %r20.0.reg2mem.0, %tmp91 ; [#uses=1] + %tmp113 = getelementptr [15 x i32]* @down, i32 0, i32 %tmp112 ; [#uses=2] + %tmp114 = load i32* %tmp113, align 4 ; [#uses=1] + %tmp115 = icmp eq i32 %tmp114, 0 ; [#uses=1] + br i1 %tmp115, label %bb545, label %bb118 + +bb118: ; preds = %bb92 + %tmp122 = sub i32 %r20.0.reg2mem.0, %tmp91 ; [#uses=0] + store i32 0, i32* %tmp113, align 4 + store i32 %r20.0.reg2mem.0, i32* %tmp135, align 4 + br label %bb142 + +bb142: ; preds = %bb142, %bb118 + %k18.0.reg2mem.0 = phi i32 [ 0, %bb118 ], [ %indvar.next709, %bb142 ] ; [#uses=1] + %indvar.next709 = add i32 %k18.0.reg2mem.0, 1 ; [#uses=2] + %exitcond710 = icmp eq i32 %indvar.next709, 8 ; [#uses=1] + br i1 %exitcond710, label %bb155, label %bb142 + +bb155: ; preds = %bb142 + %tmp156 = tail call i32 @putchar(i32 10) nounwind ; [#uses=0] + br label %bb545 + +bb545: ; preds = %bb155, %bb92 + %indvar.next711 = add i32 %r20.0.reg2mem.0, 1 ; [#uses=2] + %exitcond712 = icmp eq i32 %indvar.next711, 8 ; [#uses=1] + br i1 %exitcond712, label %bb553, label %bb92 + +bb553: ; preds = %bb545 + store i32 1, i32* %tmp62, align 4 + br label %bb569 + +bb569: ; preds = %bb553, %bb31, %bb + %indvar.next715 = add i32 %r25.0.reg2mem.0, 1 ; [#uses=1] + br label %bb +} + +declare i32 @putchar(i32)