Collapse DomainValues across loop back-edges.

During the initial RPO traversal of the basic blocks, remember the ones
that are incomplete because of back-edges from predecessors that haven't
been visited yet.

After the initial RPO, revisit all those loop headers so the incoming
DomainValues on the back-edges can be properly collapsed.

This will properly fix execution domains on software pipelined code,
like the included test case.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@144151 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Jakob Stoklund Olesen
2011-11-09 01:06:56 +00:00
parent dbc372f47e
commit f4c4768fb2
2 changed files with 80 additions and 8 deletions
+35 -8
View File
@@ -160,7 +160,7 @@ private:
void collapse(DomainValue *dv, unsigned domain);
bool merge(DomainValue *A, DomainValue *B);
void enterBasicBlock(MachineBasicBlock*);
bool enterBasicBlock(MachineBasicBlock*);
void leaveBasicBlock(MachineBasicBlock*);
void visitInstr(MachineInstr*);
void visitGenericInstr(MachineInstr*);
@@ -317,7 +317,13 @@ bool ExeDepsFix::merge(DomainValue *A, DomainValue *B) {
return true;
}
void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// enterBasicBlock - Set up LiveRegs by merging predecessor live-out values.
// Return true if some predecessor hasn't been processed yet (like on a loop
// back-edge).
bool ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
// Detect back-edges from predecessors we haven't processed yet.
bool seenBackEdge = false;
// Try to coalesce live-out registers from predecessors.
for (MachineBasicBlock::livein_iterator i = MBB->livein_begin(),
e = MBB->livein_end(); i != e; ++i) {
@@ -326,7 +332,12 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
for (MachineBasicBlock::const_pred_iterator pi = MBB->pred_begin(),
pe = MBB->pred_end(); pi != pe; ++pi) {
LiveOutMap::const_iterator fi = LiveOuts.find(*pi);
if (fi == LiveOuts.end()) continue;
if (fi == LiveOuts.end()) {
seenBackEdge = true;
continue;
}
if (!fi->second)
continue;
DomainValue *pdv = resolve(fi->second[rx]);
if (!pdv) continue;
if (!LiveRegs || !LiveRegs[rx]) {
@@ -350,12 +361,19 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) {
force(rx, pdv->getFirstDomain());
}
}
return seenBackEdge;
}
void ExeDepsFix::leaveBasicBlock(MachineBasicBlock *MBB) {
// Save live registers at end of MBB - used by enterBasicBlock().
if (LiveRegs)
LiveOuts.insert(std::make_pair(MBB, LiveRegs));
// Also use LiveOuts as a visited set to detect back-edges.
if (!LiveOuts.insert(std::make_pair(MBB, LiveRegs)).second && LiveRegs) {
// Insertion failed, this must be the second pass.
// Release all the DomainValues instead of keeping them.
for (unsigned i = 0, e = NumRegs; i != e; ++i)
release(LiveRegs[i]);
delete[] LiveRegs;
}
LiveRegs = 0;
}
@@ -545,23 +563,32 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) {
MachineBasicBlock *Entry = MF->begin();
ReversePostOrderTraversal<MachineBasicBlock*> RPOT(Entry);
SmallVector<MachineBasicBlock*, 16> Loops;
for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
MachineBasicBlock *MBB = *MBBI;
enterBasicBlock(MBB);
if (enterBasicBlock(MBB))
Loops.push_back(MBB);
for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
++I)
visitInstr(I);
leaveBasicBlock(MBB);
}
// Visit all the loop blocks again in order to merge DomainValues from
// back-edges.
for (unsigned i = 0, e = Loops.size(); i != e; ++i) {
MachineBasicBlock *MBB = Loops[i];
enterBasicBlock(MBB);
leaveBasicBlock(MBB);
}
// Clear the LiveOuts vectors and collapse any remaining DomainValues.
for (ReversePostOrderTraversal<MachineBasicBlock*>::rpo_iterator
MBBI = RPOT.begin(), MBBE = RPOT.end(); MBBI != MBBE; ++MBBI) {
LiveOutMap::const_iterator FI = LiveOuts.find(*MBBI);
if (FI == LiveOuts.end())
if (FI == LiveOuts.end() || !FI->second)
continue;
assert(FI->second && "Null entry");
for (unsigned i = 0, e = NumRegs; i != e; ++i)
if (FI->second[i])
release(FI->second[i]);
+45
View File
@@ -0,0 +1,45 @@
; RUN: llc < %s | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.7"
; CHECK: f
;
; This function contains load / store / and operations that all can execute in
; any domain. The only domain-specific operation is the %add = shl... operation
; which is <4 x i32>.
;
; The paddd instruction can only influence the other operations through the loop
; back-edge. Check that everything is still moved into the integer domain.
define void @f(<4 x i32>* nocapture %p, i32 %n) nounwind uwtable ssp {
entry:
br label %while.body
; Materialize a zeroinitializer and a constant-pool load in the integer domain.
; The order is not important.
; CHECK: pxor
; CHECK: movdqa
; The instructions in the loop must all be integer domain as well.
; CHECK: while.body
; CHECK: pand
; CHECK: movdqa
; CHECK: movdqa
; Finally, the controlling integer-only instruction.
; CHECK: paddd
while.body:
%p.addr.04 = phi <4 x i32>* [ %incdec.ptr, %while.body ], [ %p, %entry ]
%n.addr.03 = phi i32 [ %dec, %while.body ], [ %n, %entry ]
%x.02 = phi <4 x i32> [ %add, %while.body ], [ zeroinitializer, %entry ]
%dec = add nsw i32 %n.addr.03, -1
%and = and <4 x i32> %x.02, <i32 127, i32 127, i32 127, i32 127>
%incdec.ptr = getelementptr inbounds <4 x i32>* %p.addr.04, i64 1
store <4 x i32> %and, <4 x i32>* %p.addr.04, align 16
%0 = load <4 x i32>* %incdec.ptr, align 16
%add = shl <4 x i32> %0, <i32 1, i32 1, i32 1, i32 1>
%tobool = icmp eq i32 %dec, 0
br i1 %tobool, label %while.end, label %while.body
while.end:
ret void
}