From 4a74b3b933e2944ff313dc5d24da6f9e8ec4c1c4 Mon Sep 17 00:00:00 2001
From: Jakob Stoklund Olesen <stoklund@2pi.dk>
Date: Tue, 9 Aug 2011 18:19:41 +0000
Subject: [PATCH] Inflate register classes after coalescing.

Coalescing can remove copy-like instructions with sub-register operands
that constrained the register class.  Examples are:

  x86: GR32_ABCD:sub_8bit_hi -> GR32
  arm: DPR_VFP2:ssub0 -> DPR

Recompute the register class of any virtual registers that are used by
less instructions after coalescing.

This affects code generation for the Cortex-A8 where we use NEON
instructions for f32 operations, c.f. fp_convert.ll:

  vadd.f32  d16, d1, d0
  vcvt.s32.f32  d0, d16

The register allocator is now free to use d16 for the temporary, and
that comes first in the allocation order because it doesn't interfere
with any s-registers.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@137133 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/CodeGen/RegisterCoalescer.cpp | 37 +++++++++++++++++++++++++++++--
 test/CodeGen/ARM/fabss.ll         |  4 +++-
 test/CodeGen/ARM/fp_convert.ll    |  6 +++--
 3 files changed, 42 insertions(+), 5 deletions(-)
diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp
index d2087f9beea..c07970d69bf 100644
--- a/lib/CodeGen/RegisterCoalescer.cpp
+++ b/lib/CodeGen/RegisterCoalescer.cpp
@@ -55,6 +55,7 @@ STATISTIC(numExtends  , "Number of copies extended");
 STATISTIC(NumReMats   , "Number of instructions re-materialized");
 STATISTIC(numPeep     , "Number of identity moves eliminated after coalescing");
 STATISTIC(numAborts   , "Number of times interval joining aborted");
+STATISTIC(NumInflated , "Number of register classes inflated");
 
 static cl::opt<bool>
 EnableJoining("join-liveintervals",
@@ -1852,7 +1853,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
 
   // Perform a final pass over the instructions and compute spill weights
   // and remove identity moves.
-  SmallVector<unsigned, 4> DeadDefs;
+  SmallVector<unsigned, 4> DeadDefs, InflateRegs;
   for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end();
        mbbi != mbbe; ++mbbi) {
     MachineBasicBlock* mbb = mbbi;
@@ -1864,6 +1865,16 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
         bool DoDelete = true;
         assert(MI->isCopyLike() && "Unrecognized copy instruction");
         unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg();
+        unsigned DstReg = MI->getOperand(0).getReg();
+
+        // Collect candidates for register class inflation.
+        if (TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+            RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg)))
+          InflateRegs.push_back(SrcReg);
+        if (TargetRegisterInfo::isVirtualRegister(DstReg) &&
+            RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg)))
+          InflateRegs.push_back(DstReg);
+
         if (TargetRegisterInfo::isPhysicalRegister(SrcReg) &&
             MI->getNumOperands() > 2)
           // Do not delete extract_subreg, insert_subreg of physical
@@ -1905,8 +1916,12 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
           unsigned Reg = MO.getReg();
           if (!Reg)
             continue;
-          if (TargetRegisterInfo::isVirtualRegister(Reg))
+          if (TargetRegisterInfo::isVirtualRegister(Reg)) {
             DeadDefs.push_back(Reg);
+            // Remat may also enable register class inflation.
+            if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)))
+              InflateRegs.push_back(Reg);
+          }
           if (MO.isDead())
             continue;
           if (TargetRegisterInfo::isPhysicalRegister(Reg) ||
@@ -1954,6 +1969,24 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
     }
   }
 
+  // After deleting a lot of copies, register classes may be less constrained.
+  // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 ->
+  // DPR inflation.
+  array_pod_sort(InflateRegs.begin(), InflateRegs.end());
+  InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
+                    InflateRegs.end());
+  DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n");
+  for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
+    unsigned Reg = InflateRegs[i];
+    if (MRI->reg_nodbg_empty(Reg))
+      continue;
+    if (MRI->recomputeRegClass(Reg, *TM)) {
+      DEBUG(dbgs() << PrintReg(Reg) << " inflated to "
+                   << MRI->getRegClass(Reg)->getName() << '\n');
+      ++NumInflated;
+    }
+  }
+
   DEBUG(dump());
   DEBUG(LDV->dump());
   if (VerifyCoalescing)
diff --git a/test/CodeGen/ARM/fabss.ll b/test/CodeGen/ARM/fabss.ll
index 51efe51bf15..45c322dce8b 100644
--- a/test/CodeGen/ARM/fabss.ll
+++ b/test/CodeGen/ARM/fabss.ll
@@ -22,6 +22,8 @@ declare float @fabsf(float)
 ; NFP0: 	vabs.f32	s1, s1
 
 ; CORTEXA8: test:
-; CORTEXA8: 	vabs.f32	d1, d1
+; CORTEXA8:     vadd.f32        [[D1:d[0-9]+]]
+; CORTEXA8: 	vabs.f32	{{d[0-9]+}}, [[D1]]
+
 ; CORTEXA9: test:
 ; CORTEXA9: 	vabs.f32	s{{.}}, s{{.}}
diff --git a/test/CodeGen/ARM/fp_convert.ll b/test/CodeGen/ARM/fp_convert.ll
index 86c06f1ddd9..7002cecf364 100644
--- a/test/CodeGen/ARM/fp_convert.ll
+++ b/test/CodeGen/ARM/fp_convert.ll
@@ -7,7 +7,8 @@ define i32 @test1(float %a, float %b) {
 ; VFP2: test1:
 ; VFP2: vcvt.s32.f32 s{{.}}, s{{.}}
 ; NEON: test1:
-; NEON: vcvt.s32.f32 d0, d0
+; NEON: vadd.f32 [[D0:d[0-9]+]]
+; NEON: vcvt.s32.f32 d0, [[D0]]
 entry:
         %0 = fadd float %a, %b
         %1 = fptosi float %0 to i32
@@ -18,7 +19,8 @@ define i32 @test2(float %a, float %b) {
 ; VFP2: test2:
 ; VFP2: vcvt.u32.f32 s{{.}}, s{{.}}
 ; NEON: test2:
-; NEON: vcvt.u32.f32 d0, d0
+; NEON: vadd.f32 [[D0:d[0-9]+]]
+; NEON: vcvt.u32.f32 d0, [[D0]]
 entry:
         %0 = fadd float %a, %b
         %1 = fptoui float %0 to i32