[MachineLICM] First steps of sinking GEPs near calls.

Specifically, if there are copy-like instructions in the loop header they are moved into the loop close to their uses. This reduces the live intervals of the values and can avoid register spills. This is working towards a fix for http://llvm.org/PR22230. Review: http://reviews.llvm.org/D7259 Next steps: - Find a better cost model (which non-copy instructions should be sunk?) - Make this dependent on register pressure git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@232262 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-10 08:40:41 +00:00 · 2015-03-14 10:58:38 +00:00 · 2015-03-14 10:58:38 +00:00 · 439cc2c5de
commit 439cc2c5de
parent e282854e63
2 changed files with 123 additions and 0 deletions
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@ -54,6 +54,12 @@ HoistCheapInsts("hoist-cheap-insts",
                cl::desc("MachineLICM should hoist even cheap instructions"),
                cl::init(false), cl::Hidden);

+static cl::opt<bool>
+SinkInstsToAvoidSpills("sink-insts-to-avoid-spills",
+                       cl::desc("MachineLICM should sink instructions into "
+                                "loops to avoid register spills"),
+                       cl::init(false), cl::Hidden);
+
 STATISTIC(NumHoisted,
          "Number of machine instructions hoisted out of loops");
 STATISTIC(NumLowRP,
@ -243,6 +249,11 @@ namespace {
    void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
    void HoistRegion(MachineDomTreeNode *N, bool IsHeader);

+    /// SinkIntoLoop - Sink instructions into loops if profitable. This
+    /// especially tries to prevent register spills caused by register pressure
+    /// if there is little to no overhead moving instructions into loops.
+    void SinkIntoLoop();
+
    /// getRegisterClassIDAndCost - For a given MI, register, and the operand
    /// index, return the ID and cost of its representative register class by
    /// reference.
@ -381,6 +392,9 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
      FirstInLoop = true;
      HoistOutOfLoop(N);
      CSEMap.clear();
+
+      if (SinkInstsToAvoidSpills)
+        SinkIntoLoop();
    }
  }

@ -771,6 +785,53 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
  }
 }

+void MachineLICM::SinkIntoLoop() {
+  MachineBasicBlock *Preheader = getCurPreheader();
+  if (!Preheader)
+    return;
+
+  SmallVector<MachineInstr *, 8> Candidates;
+  for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin();
+       I != Preheader->instr_end(); ++I) {
+    // We need to ensure that we can safely move this instruction into the loop.
+    // As such, it must not have side-effects, e.g. such as a call has.  
+    if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(I))
+      Candidates.push_back(I);
+  }
+
+  for (MachineInstr *I : Candidates) {
+    const MachineOperand &MO = I->getOperand(0);
+    if (!MO.isDef() || !MO.isReg() || !MO.getReg())
+      continue;
+    if (!MRI->hasOneDef(MO.getReg()))
+      continue;
+    bool CanSink = true;
+    MachineBasicBlock *B = nullptr;
+    for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) {
+      // FIXME: Come up with a proper cost model that estimates whether sinking
+      // the instruction (and thus possibly executing it on every loop
+      // iteration) is more expensive than a register.
+      // For now assumes that copies are cheap and thus almost always worth it.
+      if (!MI.isCopy()) {
+        CanSink = false;
+        break;
+      }
+      if (!B) {
+        B = MI.getParent();
+        continue;
+      }
+      B = DT->findNearestCommonDominator(B, MI.getParent());
+      if (!B) {
+        CanSink = false;
+        break;
+      }
+    }
+    if (!CanSink || !B || B == Preheader)
+      continue;
+    B->splice(B->getFirstNonPHI(), Preheader, I);
+  }
+}
+
 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
  return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
 }
--- a/test/CodeGen/X86/sink-cheap-instructions.ll
+++ b/test/CodeGen/X86/sink-cheap-instructions.ll
@ -0,0 +1,62 @@
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefix=CHECK
+; RUN: llc < %s -mtriple=x86_64-linux -sink-insts-to-avoid-spills | FileCheck %s -check-prefix=SINK
+
+; Ensure that we sink copy-like instructions into loops to avoid register
+; spills.
+
+; CHECK: Spill
+; SINK-NOT: Spill
+
+%struct.A = type { i32, i32, i32, i32, i32, i32 }
+
+define void @_Z1fPhP1A(i8* nocapture readonly %input, %struct.A* %a) {
+  %1 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 0
+  %2 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 1
+  %3 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 2
+  %4 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 3
+  %5 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 4
+  %6 = getelementptr inbounds %struct.A, %struct.A* %a, i64 0, i32 5
+  br label %.backedge
+
+.backedge:
+  %.0 = phi i8* [ %input, %0 ], [ %7, %.backedge.backedge ]
+  %7 = getelementptr inbounds i8, i8* %.0, i64 1
+  %8 = load i8, i8* %7, align 1
+  switch i8 %8, label %.backedge.backedge [
+    i8 0, label %9
+    i8 10, label %10
+    i8 20, label %11
+    i8 30, label %12
+    i8 40, label %13
+    i8 50, label %14
+  ]
+
+; <label>:9
+  tail call void @_Z6assignPj(i32* %1)
+  br label %.backedge.backedge
+
+; <label>:10
+  tail call void @_Z6assignPj(i32* %2)
+  br label %.backedge.backedge
+
+.backedge.backedge:
+  br label %.backedge
+
+; <label>:11
+  tail call void @_Z6assignPj(i32* %3)
+  br label %.backedge.backedge
+
+; <label>:12
+  tail call void @_Z6assignPj(i32* %4)
+  br label %.backedge.backedge
+
+; <label>:13
+  tail call void @_Z6assignPj(i32* %5)
+  br label %.backedge.backedge
+
+; <label>:14
+  tail call void @_Z6assignPj(i32* %6)
+  br label %.backedge.backedge
+}
+
+declare void @_Z6assignPj(i32*)