diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index d2087f9beea..c07970d69bf 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -55,6 +55,7 @@ STATISTIC(numExtends , "Number of copies extended"); STATISTIC(NumReMats , "Number of instructions re-materialized"); STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); STATISTIC(numAborts , "Number of times interval joining aborted"); +STATISTIC(NumInflated , "Number of register classes inflated"); static cl::opt EnableJoining("join-liveintervals", @@ -1852,7 +1853,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { // Perform a final pass over the instructions and compute spill weights // and remove identity moves. - SmallVector DeadDefs; + SmallVector DeadDefs, InflateRegs; for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); mbbi != mbbe; ++mbbi) { MachineBasicBlock* mbb = mbbi; @@ -1864,6 +1865,16 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { bool DoDelete = true; assert(MI->isCopyLike() && "Unrecognized copy instruction"); unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); + unsigned DstReg = MI->getOperand(0).getReg(); + + // Collect candidates for register class inflation. + if (TargetRegisterInfo::isVirtualRegister(SrcReg) && + RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg))) + InflateRegs.push_back(SrcReg); + if (TargetRegisterInfo::isVirtualRegister(DstReg) && + RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg))) + InflateRegs.push_back(DstReg); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && MI->getNumOperands() > 2) // Do not delete extract_subreg, insert_subreg of physical @@ -1905,8 +1916,12 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { unsigned Reg = MO.getReg(); if (!Reg) continue; - if (TargetRegisterInfo::isVirtualRegister(Reg)) + if (TargetRegisterInfo::isVirtualRegister(Reg)) { DeadDefs.push_back(Reg); + // Remat may also enable register class inflation. + if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg))) + InflateRegs.push_back(Reg); + } if (MO.isDead()) continue; if (TargetRegisterInfo::isPhysicalRegister(Reg) || @@ -1954,6 +1969,24 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { } } + // After deleting a lot of copies, register classes may be less constrained. + // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 -> + // DPR inflation. + array_pod_sort(InflateRegs.begin(), InflateRegs.end()); + InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), + InflateRegs.end()); + DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); + for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { + unsigned Reg = InflateRegs[i]; + if (MRI->reg_nodbg_empty(Reg)) + continue; + if (MRI->recomputeRegClass(Reg, *TM)) { + DEBUG(dbgs() << PrintReg(Reg) << " inflated to " + << MRI->getRegClass(Reg)->getName() << '\n'); + ++NumInflated; + } + } + DEBUG(dump()); DEBUG(LDV->dump()); if (VerifyCoalescing) diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll index 51efe51bf15..45c322dce8b 100644 --- a/test/CodeGen/ARM/fabss.ll +++ b/test/CodeGen/ARM/fabss.ll @@ -22,6 +22,8 @@ declare float @fabsf(float) ; NFP0: vabs.f32 s1, s1 ; CORTEXA8: test: -; CORTEXA8: vabs.f32 d1, d1 +; CORTEXA8: vadd.f32 [[D1:d[0-9]+]] +; CORTEXA8: vabs.f32 {{d[0-9]+}}, [[D1]] + ; CORTEXA9: test: ; CORTEXA9: vabs.f32 s{{.}}, s{{.}} diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll index 86c06f1ddd9..7002cecf364 100644 --- a/test/CodeGen/ARM/fp_convert.ll +++ b/test/CodeGen/ARM/fp_convert.ll @@ -7,7 +7,8 @@ define i32 @test1(float %a, float %b) { ; VFP2: test1: ; VFP2: vcvt.s32.f32 s{{.}}, s{{.}} ; NEON: test1: -; NEON: vcvt.s32.f32 d0, d0 +; NEON: vadd.f32 [[D0:d[0-9]+]] +; NEON: vcvt.s32.f32 d0, [[D0]] entry: %0 = fadd float %a, %b %1 = fptosi float %0 to i32 @@ -18,7 +19,8 @@ define i32 @test2(float %a, float %b) { ; VFP2: test2: ; VFP2: vcvt.u32.f32 s{{.}}, s{{.}} ; NEON: test2: -; NEON: vcvt.u32.f32 d0, d0 +; NEON: vadd.f32 [[D0:d[0-9]+]] +; NEON: vcvt.u32.f32 d0, [[D0]] entry: %0 = fadd float %a, %b %1 = fptoui float %0 to i32