diff --git a/include/llvm/CodeGen/LiveIntervalAnalysis.h b/include/llvm/CodeGen/LiveIntervalAnalysis.h index 2f1176d36a9..dacec8ea969 100644 --- a/include/llvm/CodeGen/LiveIntervalAnalysis.h +++ b/include/llvm/CodeGen/LiveIntervalAnalysis.h @@ -162,6 +162,11 @@ namespace llvm { return i2miMap_[index]; } + /// conflictsWithPhysRegDef - Returns true if the specified register + /// is defined during the duration of the specified interval. + bool conflictsWithPhysRegDef(const LiveInterval &li, VirtRegMap &vrm, + unsigned reg); + /// findLiveInMBBs - Given a live range, if the value of the range /// is live in any MBB returns true as well as the list of basic blocks /// where the value is live in. diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index ca6c04d8f75..7819640b6c1 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -442,6 +442,39 @@ addIntervalsForSpills(const LiveInterval &li, VirtRegMap &vrm, unsigned reg) { return added; } +/// conflictsWithPhysRegDef - Returns true if the specified register +/// is defined during the duration of the specified interval. +bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li, + VirtRegMap &vrm, unsigned reg) { + for (LiveInterval::Ranges::const_iterator + I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) { + for (unsigned index = getBaseIndex(I->start), + end = getBaseIndex(I->end-1) + InstrSlots::NUM; index != end; + index += InstrSlots::NUM) { + // skip deleted instructions + while (index != end && !getInstructionFromIndex(index)) + index += InstrSlots::NUM; + if (index == end) break; + + MachineInstr *MI = getInstructionFromIndex(index); + for (unsigned i = 0; i != MI->getNumOperands(); ++i) { + MachineOperand& mop = MI->getOperand(i); + if (!mop.isRegister() || !mop.isDef()) + continue; + unsigned PhysReg = mop.getReg(); + if (PhysReg == 0) + continue; + if (MRegisterInfo::isVirtualRegister(PhysReg)) + PhysReg = vrm.getPhys(PhysReg); + if (mri_->regsOverlap(PhysReg, reg)) + return true; + } + } + } + + return false; +} + void LiveIntervals::printRegName(unsigned reg) const { if (MRegisterInfo::isPhysicalRegister(reg)) cerr << mri_->getName(reg); diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 8a9eb3de0b5..2d675b09b76 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/SSARegMap.h" #include "llvm/Target/MRegisterInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/ADT/EquivalenceClasses.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/STLExtras.h" @@ -38,6 +39,7 @@ using namespace llvm; STATISTIC(NumIters , "Number of iterations performed"); STATISTIC(NumBacktracks, "Number of times we had to backtrack"); +STATISTIC(NumCoalesce, "Number of copies coalesced"); static RegisterRegAlloc linearscanRegAlloc("linearscan", " linear scan register allocator", @@ -60,6 +62,9 @@ namespace { MachineFunction* mf_; const TargetMachine* tm_; const MRegisterInfo* mri_; + const TargetInstrInfo* tii_; + SSARegMap *regmap_; + BitVector allocatableRegs_; LiveIntervals* li_; /// handled_ - Intervals are added to the handled_ set in the order of their @@ -122,6 +127,15 @@ namespace { /// is available, or spill. void assignRegOrStackSlotAtInterval(LiveInterval* cur); + /// attemptTrivialCoalescing - If a simple interval is defined by a copy, + /// try allocate the definition the same register as the source register + /// if the register is not defined during live time of the interval. This + /// eliminate a copy. This is used to coalesce copies which were not + /// coalesced away before allocation either due to dest and src being in + /// different register classes or because the coalescer was overly + /// conservative. + unsigned attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg); + /// /// register handling helpers /// @@ -187,10 +201,54 @@ void RALinScan::ComputeRelatedRegClasses() { RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]); } +/// attemptTrivialCoalescing - If a simple interval is defined by a copy, +/// try allocate the definition the same register as the source register +/// if the register is not defined during live time of the interval. This +/// eliminate a copy. This is used to coalesce copies which were not +/// coalesced away before allocation either due to dest and src being in +/// different register classes or because the coalescer was overly +/// conservative. +unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) { + if (cur.preference && cur.preference == Reg || !cur.containsOneValue()) + return Reg; + + VNInfo *vni = cur.getValNumInfo(0); + if (!vni->def || vni->def == ~1U || vni->def == ~0U) + return Reg; + MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); + unsigned SrcReg, DstReg; + if (!CopyMI || !tii_->isMoveInstr(*CopyMI, SrcReg, DstReg)) + return Reg; + if (MRegisterInfo::isVirtualRegister(SrcReg)) + if (!vrm_->isAssignedReg(SrcReg)) + return Reg; + else + SrcReg = vrm_->getPhys(SrcReg); + if (Reg == SrcReg) + return Reg; + + const TargetRegisterClass *RC = regmap_->getRegClass(cur.reg); + if (!RC->contains(SrcReg)) + return Reg; + + // Try to coalesce. + if (!li_->conflictsWithPhysRegDef(cur, *vrm_, SrcReg)) { + vrm_->clearVirt(cur.reg); + vrm_->assignVirt2Phys(cur.reg, SrcReg); + ++NumCoalesce; + return SrcReg; + } + + return Reg; +} + bool RALinScan::runOnMachineFunction(MachineFunction &fn) { mf_ = &fn; tm_ = &fn.getTarget(); mri_ = tm_->getRegisterInfo(); + tii_ = tm_->getInstrInfo(); + regmap_ = mf_->getSSARegMap(); + allocatableRegs_ = mri_->getAllocatableSet(fn); li_ = &getAnalysis(); // We don't run the coalescer here because we have no reason to @@ -292,12 +350,12 @@ void RALinScan::linearScan() MachineFunction::iterator EntryMBB = mf_->begin(); SmallVector LiveInMBBs; for (LiveIntervals::iterator i = li_->begin(), e = li_->end(); i != e; ++i) { - const LiveInterval &cur = i->second; + LiveInterval &cur = i->second; unsigned Reg = 0; if (MRegisterInfo::isPhysicalRegister(cur.reg)) Reg = i->second.reg; else if (vrm_->isAssignedReg(cur.reg)) - Reg = vrm_->getPhys(cur.reg); + Reg = attemptTrivialCoalescing(cur, vrm_->getPhys(cur.reg)); if (!Reg) continue; for (LiveInterval::Ranges::const_iterator I = cur.begin(), E = cur.end(); @@ -441,9 +499,31 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) std::vector > SpillWeightsToAdd; unsigned StartPosition = cur->beginNumber(); - const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(cur->reg); + const TargetRegisterClass *RC = regmap_->getRegClass(cur->reg); const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); - + + // If this live interval is defined by a move instruction and its source is + // assigned a physical register that is compatible with the target register + // class, then we should try to assign it the same register. + // This can happen when the move is from a larger register class to a smaller + // one, e.g. X86::mov32to32_. These move instructions are not coalescable. + if (!cur->preference && cur->containsOneValue()) { + VNInfo *vni = cur->getValNumInfo(0); + if (vni->def && vni->def != ~1U && vni->def != ~0U) { + MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def); + unsigned SrcReg, DstReg; + if (tii_->isMoveInstr(*CopyMI, SrcReg, DstReg)) { + unsigned Reg = 0; + if (MRegisterInfo::isPhysicalRegister(SrcReg)) + Reg = SrcReg; + else if (vrm_->isAssignedReg(SrcReg)) + Reg = vrm_->getPhys(SrcReg); + if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) + cur->preference = Reg; + } + } + } + // for every interval in inactive we overlap with, mark the // register as not free and update spill weights. for (IntervalPtrs::const_iterator i = inactive_.begin(), @@ -451,7 +531,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) unsigned Reg = i->first->reg; assert(MRegisterInfo::isVirtualRegister(Reg) && "Can only allocate virtual registers!"); - const TargetRegisterClass *RegRC = mf_->getSSARegMap()->getRegClass(Reg); + const TargetRegisterClass *RegRC = regmap_->getRegClass(Reg); // If this is not in a related reg class to the register we're allocating, // don't check it. if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader && @@ -472,7 +552,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // We got a register. However, if it's in the fixed_ list, we might // conflict with it. Check to see if we conflict with it or any of its // aliases. - std::set RegAliases; + SmallSet RegAliases; for (const unsigned *AS = mri_->getAliasSet(physReg); *AS; ++AS) RegAliases.insert(*AS); @@ -642,7 +722,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) unsigned earliestStart = cur->beginNumber(); // set of spilled vregs (used later to rollback properly) - std::set spilled; + SmallSet spilled; // spill live intervals of virtual regs mapped to the physical register we // want to clear (and its aliases). We only spill those that overlap with the @@ -744,7 +824,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { std::vector inactiveCounts(mri_->getNumRegs(), 0); unsigned MaxInactiveCount = 0; - const TargetRegisterClass *RC = mf_->getSSARegMap()->getRegClass(cur->reg); + const TargetRegisterClass *RC = regmap_->getRegClass(cur->reg); const TargetRegisterClass *RCLeader = RelatedRegClasses.getLeaderValue(RC); for (IntervalPtrs::iterator i = inactive_.begin(), e = inactive_.end(); @@ -755,7 +835,7 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { // If this is not in a related reg class to the register we're allocating, // don't check it. - const TargetRegisterClass *RegRC = mf_->getSSARegMap()->getRegClass(reg); + const TargetRegisterClass *RegRC = regmap_->getRegClass(reg); if (RelatedRegClasses.getLeaderValue(RegRC) == RCLeader) { reg = vrm_->getPhys(reg); ++inactiveCounts[reg]; diff --git a/test/CodeGen/X86/postalloc-coalescing.ll b/test/CodeGen/X86/postalloc-coalescing.ll new file mode 100644 index 00000000000..9c44a5a7075 --- /dev/null +++ b/test/CodeGen/X86/postalloc-coalescing.ll @@ -0,0 +1,35 @@ +; RUN: llvm-as < %s | llc -march=x86 | grep mov | count 3 + +define fastcc i32 @_Z18yy_get_next_bufferv() { +entry: + br label %bb131 + +bb116: ; preds = %bb131 + %tmp125126 = trunc i32 %c.1 to i8 ; [#uses=1] + store i8 %tmp125126, i8* null, align 1 + br label %bb131 + +bb131: ; preds = %bb116, %entry + %c.2 = phi i32 [ %c.1, %bb116 ], [ 42, %entry ] ; [#uses=1] + %c.1 = select i1 false, i32 0, i32 %c.2 ; [#uses=4] + %tmp181 = icmp eq i32 %c.1, -1 ; [#uses=1] + br i1 %tmp181, label %bb158, label %bb116 + +bb158: ; preds = %bb131 + br i1 true, label %cond_true163, label %cond_next178 + +cond_true163: ; preds = %bb158 + %tmp172173 = trunc i32 %c.1 to i8 ; [#uses=1] + store i8 %tmp172173, i8* null, align 1 + br label %cond_next178 + +cond_next178: ; preds = %cond_true163, %bb158 + %tmp180 = icmp eq i32 %c.1, -1 ; [#uses=1] + br i1 %tmp180, label %cond_next184, label %cond_next199 + +cond_next184: ; preds = %cond_next178 + ret i32 0 + +cond_next199: ; preds = %cond_next178 + ret i32 0 +}