diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index b8cae4a59ed..5b7d22b88f9 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -1209,10 +1209,10 @@ bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { } LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); - if (CP.isFlipped() && JoinVInt.containsOneValue()) + if (JoinVInt.containsOneValue()) return true; - DEBUG(dbgs() << "\tCannot join defs into reserved register.\n"); + DEBUG(dbgs() << "\tCannot join complex intervals into reserved register.\n"); return false; } @@ -1431,8 +1431,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); - assert(CP.isFlipped() && RHS.containsOneValue() && - "Invalid join with reserved register"); + assert(RHS.containsOneValue() && "Invalid join with reserved register"); // Optimization for reserved registers like ESP. We can only merge with a // reserved physreg if RHS has a single value that is a copy of CP.DstReg(). @@ -1453,7 +1452,50 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { // defs are there. // Delete the identity copy. - MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg); + MachineInstr *CopyMI; + if (CP.isFlipped()) { + CopyMI = MRI->getVRegDef(RHS.reg); + } else { + if (!MRI->hasOneNonDBGUse(RHS.reg)) { + DEBUG(dbgs() << "\t\tMultiple vreg uses!\n"); + return false; + } + + MachineInstr *DestMI = MRI->getVRegDef(RHS.reg); + CopyMI = &*MRI->use_instr_nodbg_begin(RHS.reg); + const SlotIndex CopyRegIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); + const SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot(); + + // We checked above that there are no interfering defs of the physical + // register. However, for this case, where we indent to move up the def of + // the physical register, we also need to check for interfering uses. + SlotIndexes *Indexes = LIS->getSlotIndexes(); + for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx); + SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) { + MachineInstr *MI = LIS->getInstructionFromIndex(SI); + if (MI->readsRegister(CP.getDstReg(), TRI)) { + DEBUG(dbgs() << "\t\tInterference (read): " << *MI); + return false; + } + } + + // We're going to remove the copy which defines a physical reserved + // register, so remove its valno, etc. + for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI) { + LiveRange &LR = LIS->getRegUnit(*UI); + VNInfo *OrigRegVNI = LR.getVNInfoAt(CopyRegIdx); + if (!OrigRegVNI) + continue; + + DEBUG(dbgs() << "\t\tRemoving: " << CopyRegIdx << " from " << LR << "\n"); + LR.removeSegment(CopyRegIdx, CopyRegIdx.getDeadSlot()); + LR.removeValNo(OrigRegVNI); + + // Create a new dead def at the new def location. + LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator()); + } + } + LIS->RemoveMachineInstrFromMaps(CopyMI); CopyMI->eraseFromParent(); diff --git a/test/CodeGen/ARM/dyn-stackalloc.ll b/test/CodeGen/ARM/dyn-stackalloc.ll index 4ac5b8a31e5..05c143d258a 100644 --- a/test/CodeGen/ARM/dyn-stackalloc.ll +++ b/test/CodeGen/ARM/dyn-stackalloc.ll @@ -1,4 +1,4 @@ -; RUN: llc -mtriple=arm-eabi %s -o /dev/null +; RUN: llc -mcpu=generic -mtriple=arm-eabi -verify-machineinstrs < %s | FileCheck %s %struct.comment = type { i8**, i32*, i32, i8* } %struct.info = type { i32, i32, i32, i32, i32, i32, i32, i8* } @@ -7,6 +7,18 @@ @str215 = external global [2 x i8] define void @t1(%struct.state* %v) { + +; Make sure we generate: +; sub sp, sp, r1 +; instead of: +; sub r1, sp, r1 +; mov sp, r1 + +; CHECK-LABEL: @t1 +; CHECK: bic [[REG1:r[0-9]+]], +; CHECK-NOT: sub r{{[0-9]+}}, sp, [[REG1]] +; CHECK: sub sp, sp, [[REG1]] + %tmp6 = load i32* null %tmp8 = alloca float, i32 %tmp6 store i32 1, i32* null diff --git a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll index 6c8e3b5a8fd..e6202f92bd2 100644 --- a/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll +++ b/test/CodeGen/X86/2009-02-12-DebugInfoVLA.ll @@ -1,9 +1,19 @@ ; RUN: llc < %s -; RUN: llc < %s -march=x86-64 +; RUN: llc < %s -march=x86-64 -verify-machineinstrs | FileCheck %s ; PR3538 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128" target triple = "i386-apple-darwin9" define signext i8 @foo(i8* %s1) nounwind ssp { + +; Make sure we generate: +; movq -40(%rbp), %rsp +; Instead of: +; movq -40(%rbp), %rax +; movq %rax, %rsp + +; CHECK-LABEL: @foo +; CHECK: movq -40(%rbp), %rsp + entry: %s1_addr = alloca i8* ; [#uses=2] %retval = alloca i32 ; [#uses=2]