diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index ba6b4569a8f..bbae204c1eb 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -68,6 +68,11 @@ VerifyCoalescing("verify-coalescing", cl::desc("Verify machine instrs before and after register coalescing"), cl::Hidden); +// Temporary option for testing new coalescer algo. +static cl::opt +NewCoalescer("new-coalescer", cl::Hidden, cl::init(false), + cl::desc("Use new coalescer algorithm")); + namespace { class RegisterCoalescer : public MachineFunctionPass, private LiveRangeEdit::Delegate { @@ -1877,10 +1882,348 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { return true; } +/// ComputeUltimateVN - Assuming we are going to join two live intervals, +/// compute what the resultant value numbers for each value in the input two +/// ranges will be. This is complicated by copies between the two which can +/// and will commonly cause multiple value numbers to be merged into one. +/// +/// VN is the value number that we're trying to resolve. InstDefiningValue +/// keeps track of the new InstDefiningValue assignment for the result +/// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of +/// whether a value in this or other is a copy from the opposite set. +/// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have +/// already been assigned. +/// +/// ThisFromOther[x] - If x is defined as a copy from the other interval, this +/// contains the value number the copy is from. +/// +static unsigned ComputeUltimateVN(VNInfo *VNI, + SmallVector &NewVNInfo, + DenseMap &ThisFromOther, + DenseMap &OtherFromThis, + SmallVector &ThisValNoAssignments, + SmallVector &OtherValNoAssignments) { + unsigned VN = VNI->id; + + // If the VN has already been computed, just return it. + if (ThisValNoAssignments[VN] >= 0) + return ThisValNoAssignments[VN]; + assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); + + // If this val is not a copy from the other val, then it must be a new value + // number in the destination. + DenseMap::iterator I = ThisFromOther.find(VNI); + if (I == ThisFromOther.end()) { + NewVNInfo.push_back(VNI); + return ThisValNoAssignments[VN] = NewVNInfo.size()-1; + } + VNInfo *OtherValNo = I->second; + + // Otherwise, this *is* a copy from the RHS. If the other side has already + // been computed, return it. + if (OtherValNoAssignments[OtherValNo->id] >= 0) + return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; + + // Mark this value number as currently being computed, then ask what the + // ultimate value # of the other value is. + ThisValNoAssignments[VN] = -2; + unsigned UltimateVN = + ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, + OtherValNoAssignments, ThisValNoAssignments); + return ThisValNoAssignments[VN] = UltimateVN; +} + + +// Find out if we have something like +// A = X +// B = X +// if so, we can pretend this is actually +// A = X +// B = A +// which allows us to coalesce A and B. +// VNI is the definition of B. LR is the life range of A that includes +// the slot just before B. If we return true, we add "B = X" to DupCopies. +// This implies that A dominates B. +static bool RegistersDefinedFromSameValue(LiveIntervals &li, + const TargetRegisterInfo &tri, + CoalescerPair &CP, + VNInfo *VNI, + VNInfo *OtherVNI, + SmallVector &DupCopies) { + // FIXME: This is very conservative. For example, we don't handle + // physical registers. + + MachineInstr *MI = li.getInstructionFromIndex(VNI->def); + + if (!MI || CP.isPartial() || CP.isPhys()) + return false; + + unsigned A = CP.getDstReg(); + if (!TargetRegisterInfo::isVirtualRegister(A)) + return false; + + unsigned B = CP.getSrcReg(); + if (!TargetRegisterInfo::isVirtualRegister(B)) + return false; + + MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def); + if (!OtherMI) + return false; + + if (MI->isImplicitDef()) { + DupCopies.push_back(MI); + return true; + } else { + if (!MI->isFullCopy()) + return false; + unsigned Src = MI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Src)) + return false; + if (!OtherMI->isFullCopy()) + return false; + unsigned OtherSrc = OtherMI->getOperand(1).getReg(); + if (!TargetRegisterInfo::isVirtualRegister(OtherSrc)) + return false; + + if (Src != OtherSrc) + return false; + + // If the copies use two different value numbers of X, we cannot merge + // A and B. + LiveInterval &SrcInt = li.getInterval(Src); + // getVNInfoBefore returns NULL for undef copies. In this case, the + // optimization is still safe. + if (SrcInt.getVNInfoBefore(OtherVNI->def) != + SrcInt.getVNInfoBefore(VNI->def)) + return false; + + DupCopies.push_back(MI); + return true; + } +} + /// joinIntervals - Attempt to join these two intervals. On failure, this /// returns false. bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { - return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); + // Handle physreg joins separately. + if (CP.isPhys()) + return joinReservedPhysReg(CP); + + if (NewCoalescer) + return joinVirtRegs(CP); + + LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); + DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS + << '\n'); + + // Compute the final value assignment, assuming that the live ranges can be + // coalesced. + SmallVector LHSValNoAssignments; + SmallVector RHSValNoAssignments; + DenseMap LHSValsDefinedFromRHS; + DenseMap RHSValsDefinedFromLHS; + SmallVector NewVNInfo; + + SmallVector DupCopies; + SmallVector DeadCopies; + + LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg()); + DEBUG(dbgs() << "\t\tLHS = " << PrintReg(CP.getDstReg(), TRI) << ' ' << LHS + << '\n'); + + // Loop over the value numbers of the LHS, seeing if any are defined from + // the RHS. + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || VNI->isPHIDef()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); + assert(MI && "Missing def"); + if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? + continue; + + // Figure out the value # from the RHS. + VNInfo *OtherVNI = RHS.getVNInfoBefore(VNI->def); + // The copy could be to an aliased physreg. + if (!OtherVNI) + continue; + + // DstReg is known to be a register in the LHS interval. If the src is + // from the RHS interval, we can use its value #. + if (CP.isCoalescable(MI)) + DeadCopies.push_back(MI); + else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, + DupCopies)) + continue; + + LHSValsDefinedFromRHS[VNI] = OtherVNI; + } + + // Loop over the value numbers of the RHS, seeing if any are defined from + // the LHS. + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + if (VNI->isUnused() || VNI->isPHIDef()) + continue; + MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); + assert(MI && "Missing def"); + if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? + continue; + + // Figure out the value # from the LHS. + VNInfo *OtherVNI = LHS.getVNInfoBefore(VNI->def); + // The copy could be to an aliased physreg. + if (!OtherVNI) + continue; + + // DstReg is known to be a register in the RHS interval. If the src is + // from the LHS interval, we can use its value #. + if (CP.isCoalescable(MI)) + DeadCopies.push_back(MI); + else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, + DupCopies)) + continue; + + RHSValsDefinedFromLHS[VNI] = OtherVNI; + } + + LHSValNoAssignments.resize(LHS.getNumValNums(), -1); + RHSValNoAssignments.resize(RHS.getNumValNums(), -1); + NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); + + for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + ComputeUltimateVN(VNI, NewVNInfo, + LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, + LHSValNoAssignments, RHSValNoAssignments); + } + for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); + i != e; ++i) { + VNInfo *VNI = *i; + unsigned VN = VNI->id; + if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) + continue; + // If this value number isn't a copy from the LHS, it's a new number. + if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { + NewVNInfo.push_back(VNI); + RHSValNoAssignments[VN] = NewVNInfo.size()-1; + continue; + } + + ComputeUltimateVN(VNI, NewVNInfo, + RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, + RHSValNoAssignments, LHSValNoAssignments); + } + + // Armed with the mappings of LHS/RHS values to ultimate values, walk the + // interval lists to see if these intervals are coalescable. + LiveInterval::const_iterator I = LHS.begin(); + LiveInterval::const_iterator IE = LHS.end(); + LiveInterval::const_iterator J = RHS.begin(); + LiveInterval::const_iterator JE = RHS.end(); + + // Collect interval end points that will no longer be kills. + SmallVector LHSOldKills; + SmallVector RHSOldKills; + + // Skip ahead until the first place of potential sharing. + if (I != IE && J != JE) { + if (I->start < J->start) { + I = std::upper_bound(I, IE, J->start); + if (I != LHS.begin()) --I; + } else if (J->start < I->start) { + J = std::upper_bound(J, JE, I->start); + if (J != RHS.begin()) --J; + } + } + + while (I != IE && J != JE) { + // Determine if these two live ranges overlap. + // If so, check value # info to determine if they are really different. + if (I->end > J->start && J->end > I->start) { + // If the live range overlap will map to the same value number in the + // result liverange, we can still coalesce them. If not, we can't. + if (LHSValNoAssignments[I->valno->id] != + RHSValNoAssignments[J->valno->id]) + return false; + + // Extended live ranges should no longer be killed. + if (!I->end.isBlock() && I->end < J->end) + if (MachineInstr *MI = LIS->getInstructionFromIndex(I->end)) + LHSOldKills.push_back(MI); + if (!J->end.isBlock() && J->end < I->end) + if (MachineInstr *MI = LIS->getInstructionFromIndex(J->end)) + RHSOldKills.push_back(MI); + } + + if (I->end < J->end) + ++I; + else + ++J; + } + + // Clear kill flags where live ranges are extended. + while (!LHSOldKills.empty()) + LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI); + while (!RHSOldKills.empty()) + RHSOldKills.pop_back_val()->clearRegisterKills(RHS.reg, TRI); + + if (LHSValNoAssignments.empty()) + LHSValNoAssignments.push_back(-1); + if (RHSValNoAssignments.empty()) + RHSValNoAssignments.push_back(-1); + + // Now erase all the redundant copies. + for (unsigned i = 0, e = DeadCopies.size(); i != e; ++i) { + MachineInstr *MI = DeadCopies[i]; + if (!ErasedInstrs.insert(MI)) + continue; + DEBUG(dbgs() << "\t\terased:\t" << LIS->getInstructionIndex(MI) + << '\t' << *MI); + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } + + SmallVector SourceRegisters; + for (SmallVector::iterator I = DupCopies.begin(), + E = DupCopies.end(); I != E; ++I) { + MachineInstr *MI = *I; + if (!ErasedInstrs.insert(MI)) + continue; + + // If MI is a copy, then we have pretended that the assignment to B in + // A = X + // B = X + // was actually a copy from A. Now that we decided to coalesce A and B, + // transform the code into + // A = X + // In the case of the implicit_def, we just have to remove it. + if (!MI->isImplicitDef()) { + unsigned Src = MI->getOperand(1).getReg(); + SourceRegisters.push_back(Src); + } + LIS->RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + } + + // If B = X was the last use of X in a liverange, we have to shrink it now + // that B = X is gone. + for (SmallVector::iterator I = SourceRegisters.begin(), + E = SourceRegisters.end(); I != E; ++I) { + LIS->shrinkToUses(&LIS->getInterval(*I)); + } + + // If we get here, we know that we can coalesce the live ranges. Ask the + // intervals to coalesce themselves now. + LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, + MRI); + return true; } namespace { diff --git a/test/CodeGen/ARM/coalesce-subregs.ll b/test/CodeGen/ARM/coalesce-subregs.ll index 238ba24a797..69fc26c60c1 100644 --- a/test/CodeGen/ARM/coalesce-subregs.ll +++ b/test/CodeGen/ARM/coalesce-subregs.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mcpu=cortex-a9 -verify-coalescing -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -new-coalescer -mcpu=cortex-a9 -verify-coalescing -verify-machineinstrs | FileCheck %s target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32" target triple = "thumbv7-apple-ios0.0.0" diff --git a/test/CodeGen/X86/pr14098.ll b/test/CodeGen/X86/pr14098.ll new file mode 100644 index 00000000000..6ce2449ab6a --- /dev/null +++ b/test/CodeGen/X86/pr14098.ll @@ -0,0 +1,23 @@ +; RUN: llc -mtriple i386-unknown-linux-gnu -relocation-model=pic -verify-machineinstrs < %s +; We used to crash on this. + +declare void @foo() +declare void @foo3(i1 %x) +define void @bar(i1 %a1, i16 %a2) nounwind align 2 { +bb0: + %a3 = trunc i16 %a2 to i8 + %a4 = lshr i16 %a2, 8 + %a5 = trunc i16 %a4 to i8 + br i1 %a1, label %bb1, label %bb2 +bb1: + br label %bb2 +bb2: + %a6 = phi i8 [ 3, %bb0 ], [ %a5, %bb1 ] + %a7 = phi i8 [ 9, %bb0 ], [ %a3, %bb1 ] + %a8 = icmp eq i8 %a6, 1 + call void @foo() + %a9 = icmp eq i8 %a7, 0 + call void @foo3(i1 %a9) + call void @foo3(i1 %a8) + ret void +}