diff --git a/include/llvm/CodeGen/RegisterCoalescer.h b/include/llvm/CodeGen/RegisterCoalescer.h index 1490aa0172f..e3d3e796336 100644 --- a/include/llvm/CodeGen/RegisterCoalescer.h +++ b/include/llvm/CodeGen/RegisterCoalescer.h @@ -25,6 +25,9 @@ namespace llvm { class RegallocQuery; class AnalysisUsage; class MachineInstr; + class TargetRegisterInfo; + class TargetRegisterClass; + class TargetInstrInfo; /// An abstract interface for register coalescers. Coalescers must /// implement this interface to be part of the coalescer analysis @@ -141,6 +144,87 @@ namespace llvm { return true; } }; + + + /// CoalescerPair - A helper class for register coalescers. When deciding if + /// two registers can be coalesced, CoalescerPair can determine if a copy + /// instruction would become an identity copy after coalescing. + class CoalescerPair { + const TargetInstrInfo &tii_; + const TargetRegisterInfo &tri_; + + /// dstReg_ - The register that will be left after coalescing. It can be a + /// virtual or physical register. + unsigned dstReg_; + + /// srcReg_ - the virtual register that will be coalesced into dstReg. + unsigned srcReg_; + + /// subReg_ - The subregister index of srcReg in dstReg_. It is possible the + /// coalesce srcReg_ into a subreg of the larger dstReg_ when dstReg_ is a + /// virtual register. + unsigned subIdx_; + + /// partial_ - True when the original copy was a partial subregister copy. + bool partial_; + + /// flipped_ - True when DstReg and SrcReg are reversed from the oriignal copy + /// instruction. + bool flipped_; + + /// newRC_ - The register class of the coalesced register, or NULL if dstReg_ + /// is a physreg. + const TargetRegisterClass *newRC_; + + /// compose - Compose subreg indices a and b, either may be 0. + unsigned compose(unsigned, unsigned) const; + + /// isMoveInstr - Return true if MI is a move or subreg instruction. + bool isMoveInstr(const MachineInstr *MI, unsigned &Src, unsigned &Dst, + unsigned &SrcSub, unsigned &DstSub) const; + + public: + CoalescerPair(const TargetInstrInfo &tii, const TargetRegisterInfo &tri) + : tii_(tii), tri_(tri), dstReg_(0), srcReg_(0), subIdx_(0), + partial_(false), flipped_(false), newRC_(0) {} + + /// setRegisters - set registers to match the copy instruction MI. Return + /// false if MI is not a coalescable copy instruction. + bool setRegisters(const MachineInstr*); + + /// flip - Swap srcReg_ and dstReg_. Return false if swapping is impossible + /// because dstReg_ is a physical register, or subIdx_ is set. + bool flip(); + + /// isCoalescable - Return true if MI is a copy instruction that will become + /// an identity copy after coalescing. + bool isCoalescable(const MachineInstr*) const; + + /// isPhys - Return true if DstReg is a physical register. + bool isPhys() const { return !newRC_; } + + /// isPartial - Return true if the original copy instruction did not copy the + /// full register, but was a subreg operation. + bool isPartial() const { return partial_; } + + /// isFlipped - Return true when getSrcReg is the register being defined by + /// the original copy instruction. + bool isFlipped() const { return flipped_; } + + /// getDstReg - Return the register (virtual or physical) that will remain + /// after coalescing. + unsigned getDstReg() const { return dstReg_; } + + /// getSrcReg - Return the virtual register that will be coalesced away. + unsigned getSrcReg() const { return srcReg_; } + + /// getSubIdx - Return the subregister index in DstReg that SrcReg will be + /// coalesced into, or 0. + unsigned getSubIdx() const { return subIdx_; } + + /// getNewRC - Return the register class of the coalesced register. + const TargetRegisterClass *getNewRC() const { return newRC_; } + }; } // Because of the way .a files work, we must force the SimpleRC diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 1131e3db4e7..b18f0957b62 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -16,6 +16,8 @@ #include "llvm/CodeGen/RegisterCoalescer.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Pass.h" @@ -33,6 +35,151 @@ char RegisterCoalescer::ID = 0; // RegisterCoalescer::~RegisterCoalescer() {} +unsigned CoalescerPair::compose(unsigned a, unsigned b) const { + if (!a) return b; + if (!b) return a; + return tri_.composeSubRegIndices(a, b); +} + +bool CoalescerPair::isMoveInstr(const MachineInstr *MI, + unsigned &Src, unsigned &Dst, + unsigned &SrcSub, unsigned &DstSub) const { + if (MI->isExtractSubreg()) { + Dst = MI->getOperand(0).getReg(); + DstSub = MI->getOperand(0).getSubReg(); + Src = MI->getOperand(1).getReg(); + SrcSub = compose(MI->getOperand(1).getSubReg(), MI->getOperand(2).getImm()); + } else if (MI->isInsertSubreg() || MI->isSubregToReg()) { + Dst = MI->getOperand(0).getReg(); + DstSub = compose(MI->getOperand(0).getSubReg(), MI->getOperand(3).getImm()); + Src = MI->getOperand(2).getReg(); + SrcSub = MI->getOperand(2).getSubReg(); + } else if (!tii_.isMoveInstr(*MI, Src, Dst, SrcSub, DstSub)) { + return false; + } + return true; +} + +bool CoalescerPair::setRegisters(const MachineInstr *MI) { + srcReg_ = dstReg_ = subIdx_ = 0; + newRC_ = 0; + flipped_ = false; + + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + return false; + partial_ = SrcSub || DstSub; + + // If one register is a physreg, it must be Dst. + if (TargetRegisterInfo::isPhysicalRegister(Src)) { + if (TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + flipped_ = true; + } + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + + if (TargetRegisterInfo::isPhysicalRegister(Dst)) { + // Eliminate DstSub on a physreg. + if (DstSub) { + Dst = tri_.getSubReg(Dst, DstSub); + if (!Dst) return false; + DstSub = 0; + } + + // Eliminate SrcSub by picking a corresponding Dst superregister. + if (SrcSub) { + Dst = tri_.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); + if (!Dst) return false; + SrcSub = 0; + } else if (!MRI.getRegClass(Src)->contains(Dst)) { + return false; + } + } else { + // Both registers are virtual. + + // Identical sub to sub. + if (SrcSub == DstSub) + SrcSub = DstSub = 0; + else if (SrcSub && DstSub) + return false; // FIXME: Qreg:ssub_3 + Dreg:ssub_1 => QReg:dsub_1 + Dreg. + + // There can be no SrcSub. + if (SrcSub) { + std::swap(Src, Dst); + DstSub = SrcSub; + SrcSub = 0; + assert(!flipped_ && "Unexpected flip"); + flipped_ = true; + } + + // Find the new register class. + const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); + const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); + if (DstSub) + newRC_ = tri_.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); + else + newRC_ = getCommonSubClass(DstRC, SrcRC); + if (!newRC_) + return false; + } + // Check our invariants + assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); + assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && + "Cannot have a physical SubIdx"); + srcReg_ = Src; + dstReg_ = Dst; + subIdx_ = DstSub; + return true; +} + +bool CoalescerPair::flip() { + if (subIdx_ || TargetRegisterInfo::isPhysicalRegister(dstReg_)) + return false; + std::swap(srcReg_, dstReg_); + flipped_ = !flipped_; + return true; +} + +bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { + if (!MI) + return false; + unsigned Src, Dst, SrcSub, DstSub; + if (!isMoveInstr(MI, Src, Dst, SrcSub, DstSub)) + return false; + + // Find the virtual register that is srcReg_. + if (Dst == srcReg_) { + std::swap(Src, Dst); + std::swap(SrcSub, DstSub); + } else if (Src != srcReg_) { + return false; + } + + // Now check that Dst matches dstReg_. + if (TargetRegisterInfo::isPhysicalRegister(dstReg_)) { + if (!TargetRegisterInfo::isPhysicalRegister(Dst)) + return false; + assert(!subIdx_ && "Inconsistent CoalescerPair state."); + // DstSub could be set for a physreg from INSERT_SUBREG. + if (DstSub) + Dst = tri_.getSubReg(Dst, DstSub); + // Full copy of Src. + if (!SrcSub) + return dstReg_ == Dst; + // This is a partial register copy. Check that the parts match. + return tri_.getSubReg(dstReg_, SrcSub) == Dst; + } else { + // dstReg_ is virtual. + if (dstReg_ != Dst) + return false; + // Registers match, do the subregisters line up? + return compose(subIdx_, SrcSub) == DstSub; + } +} + // Because of the way .a files work, we must force the SimpleRC // implementation to be pulled in if the RegisterCoalescer classes are // pulled in. Otherwise we run the risk of RegisterCoalescer being diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 57a8ad029df..16d929bbfa3 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -1395,6 +1395,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { return false; // Not coalescable. } + CoalescerPair CP(*tii_, *tri_); + if (!CP.setRegisters(CopyMI)) { + DEBUG(dbgs() << "\tNot coalescable.\n"); + return false; + } + bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); @@ -1722,7 +1728,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { DEBUG(dbgs() << "\tNot profitable!\n"); return false; } - } else if (!JoinIntervals(DstInt, SrcInt, Swapped)) { + } else if (!JoinIntervals(DstInt, SrcInt, Swapped, CP)) { // Coalescing failed. // If definition of source is defined by trivial computation, try @@ -1919,33 +1925,13 @@ static bool InVector(VNInfo *Val, const SmallVector &V) { return std::find(V.begin(), V.end(), Val) != V.end(); } -static bool isValNoDefMove(const MachineInstr *MI, unsigned DR, unsigned SR, - const TargetInstrInfo *TII, - const TargetRegisterInfo *TRI) { - unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx; - if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) - ; - else if (MI->isExtractSubreg()) { - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(1).getReg(); - } else if (MI->isSubregToReg() || - MI->isInsertSubreg()) { - DstReg = MI->getOperand(0).getReg(); - SrcReg = MI->getOperand(2).getReg(); - } else - return false; - return (SrcReg == SR || TRI->isSuperRegister(SR, SrcReg)) && - (DstReg == DR || TRI->isSuperRegister(DR, DstReg)); -} - /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of /// the specified live interval is defined by a copy from the specified /// register. -bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li, - LiveRange *LR, - unsigned Reg) { - unsigned SrcReg = li_->getVNInfoSourceReg(LR->valno); - if (SrcReg == Reg) +bool SimpleRegisterCoalescing::RangeIsDefinedByCopy(LiveInterval &li, + LiveRange *LR, + CoalescerPair &CP) { + if (CP.isCoalescable(LR->valno->getCopy())) return true; // FIXME: Do isPHIDef and isDefAccurate both need to be tested? if ((LR->valno->isPHIDef() || !LR->valno->isDefAccurate()) && @@ -1954,7 +1940,7 @@ bool SimpleRegisterCoalescing::RangeIsDefinedByCopyFromReg(LiveInterval &li, // It's a sub-register live interval, we may not have precise information. // Re-compute it. MachineInstr *DefMI = li_->getInstructionFromIndex(LR->start); - if (DefMI && isValNoDefMove(DefMI, li.reg, Reg, tii_, tri_)) { + if (CP.isCoalescable(DefMI)) { // Cache computed info. LR->valno->def = LR->start; LR->valno->setCopy(DefMI); @@ -1986,7 +1972,8 @@ bool SimpleRegisterCoalescing::ValueLiveAt(LiveInterval::iterator LRItr, /// value number and that the RHS is not defined by a copy from this /// interval. This returns false if the intervals are not joinable, or it /// joins them and returns true. -bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ +bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS, + CoalescerPair &CP) { assert(RHS.containsOneValue()); // Some number (potentially more than one) value numbers in the current @@ -2028,7 +2015,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ if (LHSIt->valno->hasRedefByEC()) return false; // Copy from the RHS? - if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) + if (!RangeIsDefinedByCopy(LHS, LHSIt, CP)) return false; // Nope, bail out. if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def)) @@ -2072,7 +2059,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ return false; // Otherwise, if this is a copy from the RHS, mark it as being merged // in. - if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) { + if (RangeIsDefinedByCopy(LHS, LHSIt, CP)) { if (ValueLiveAt(LHSIt, LHS.end(), RHSIt->valno->def)) // Here is an interesting situation: // BB1: @@ -2171,7 +2158,7 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ /// below to update aliases. bool SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, - bool &Swapped) { + bool &Swapped, CoalescerPair &CP) { // Compute the final value assignment, assuming that the live ranges can be // coalesced. SmallVector LHSValNoAssignments; @@ -2252,7 +2239,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, // faster checks to see if the live ranges are coalescable. This joiner // can't swap the LHS/RHS intervals though. if (!TargetRegisterInfo::isPhysicalRegister(RHS.reg)) { - return SimpleJoin(LHS, RHS); + return SimpleJoin(LHS, RHS, CP); } else { RHSValNoInfo = RHSValNoInfo0; } @@ -2318,7 +2305,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, // DstReg is known to be a register in the LHS interval. If the src is // from the RHS interval, we can use its value #. - if (li_->getVNInfoSourceReg(VNI) != RHS.reg) + if (!CP.isCoalescable(VNI->getCopy())) continue; // Figure out the value # from the RHS. @@ -2337,7 +2324,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, // DstReg is known to be a register in the RHS interval. If the src is // from the LHS interval, we can use its value #. - if (li_->getVNInfoSourceReg(VNI) != LHS.reg) + if (!CP.isCoalescable(VNI->getCopy())) continue; // Figure out the value # from the LHS. diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index 1be04f32aa6..2f9c69c706a 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -111,14 +111,15 @@ namespace llvm { /// physreg, this method always canonicalizes DestInt to be it. The output /// "SrcInt" will not have been modified, so we can use this information /// below to update aliases. - bool JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, bool &Swapped); + bool JoinIntervals(LiveInterval &LHS, LiveInterval &RHS, bool &Swapped, + CoalescerPair &CP); /// SimpleJoin - Attempt to join the specified interval into this one. The /// caller of this method must guarantee that the RHS only contains a single /// value number and that the RHS is not defined by a copy from this /// interval. This returns false if the intervals are not joinable, or it /// joins them and returns true. - bool SimpleJoin(LiveInterval &LHS, LiveInterval &RHS); + bool SimpleJoin(LiveInterval &LHS, LiveInterval &RHS, CoalescerPair &CP); /// Return true if the two specified registers belong to different register /// classes. The registers may be either phys or virt regs. @@ -210,11 +211,10 @@ namespace llvm { bool ValueLiveAt(LiveInterval::iterator LRItr, LiveInterval::iterator LREnd, SlotIndex defPoint) const; - /// RangeIsDefinedByCopyFromReg - Return true if the specified live range of - /// the specified live interval is defined by a copy from the specified - /// register. - bool RangeIsDefinedByCopyFromReg(LiveInterval &li, LiveRange *LR, - unsigned Reg); + /// RangeIsDefinedByCopy - Return true if the specified live range of the + /// specified live interval is defined by a coalescable copy. + bool RangeIsDefinedByCopy(LiveInterval &li, LiveRange *LR, + CoalescerPair &CP); /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a diff --git a/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll new file mode 100644 index 00000000000..62c5790b392 --- /dev/null +++ b/test/CodeGen/Thumb2/2010-06-14-NEONCoalescer.ll @@ -0,0 +1,41 @@ +; RUN: llc < %s -O3 -relocation-model=pic -mattr=+thumb2 -mcpu=cortex-a8 | FileCheck %s +target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" +target triple = "thumbv7-apple-darwin10" + +; This is a case where the coalescer was too eager. These two copies were +; considered equivalent and coalescable: +; +; 140 %reg1038:dsub_0 = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0 +; 148 %reg1038:dsub_1 = VMOVD %reg1047:dsub_0, pred:14, pred:%reg0 +; +; Only one can be coalesced. + +@.str = private constant [7 x i8] c"%g %g\0A\00", align 4 ; <[7 x i8]*> [#uses=1] + +define arm_apcscc i32 @main(i32 %argc, i8** nocapture %Argv) nounwind { +entry: + %0 = icmp eq i32 %argc, 2123 ; [#uses=1] + %U.0 = select i1 %0, double 3.282190e+01, double 8.731834e+02 ; [#uses=2] + %1 = icmp eq i32 %argc, 5123 ; [#uses=1] + %V.0.ph = select i1 %1, double 7.779980e+01, double 0x409CCB9C779A6B51 ; [#uses=1] + %2 = insertelement <2 x double> undef, double %U.0, i32 0 ; <<2 x double>> [#uses=2] + %3 = insertelement <2 x double> %2, double %U.0, i32 1 ; <<2 x double>> [#uses=2] + %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2] +; Constant pool load followed by add. +; Then clobber the loaded register, not the sum. +; CHECK: vldr.64 [[LDR:d.]] +; CHECK: vadd.f64 [[ADD:d.]], [[LDR]], [[LDR]] +; CHECK: vmov.f64 [[LDR]] + %5 = fadd <2 x double> %3, %3 ; <<2 x double>> [#uses=2] + %6 = fadd <2 x double> %4, %4 ; <<2 x double>> [#uses=2] + %tmp7 = extractelement <2 x double> %5, i32 0 ; [#uses=1] + %tmp5 = extractelement <2 x double> %5, i32 1 ; [#uses=1] +; CHECK: printf + %7 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp7, double %tmp5) nounwind ; [#uses=0] + %tmp3 = extractelement <2 x double> %6, i32 0 ; [#uses=1] + %tmp1 = extractelement <2 x double> %6, i32 1 ; [#uses=1] + %8 = tail call arm_apcscc i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([7 x i8]* @.str, i32 0, i32 0), double %tmp3, double %tmp1) nounwind ; [#uses=0] + ret i32 0 +} + +declare arm_apcscc i32 @printf(i8* nocapture, ...) nounwind