Implement smarter algorithm for choosing which blocks to tail-merge.

See test/CodeGen/X86/test-pic-jtbl.ll for a case where it works well;
shaves another 10K off our favorite benchmark.  I was hesitant about
this because of compile speed, but seems to do OK on a bootstrap.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@37392 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Dale Johannesen 2007-06-01 23:02:45 +00:00
parent e770787be1
commit a5a2117a46
2 changed files with 45 additions and 31 deletions

View File

@ -466,43 +466,58 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
continue;
}
// Look through all the blocks that have the same hash as this one, and
// find the one that has the largest number of instructions in common.
// Since instructions may get combined later (e.g. single stores into
// Look through all the pairs of blocks that have the same hash as this
// one, and find the pair that has the largest number of instructions in
// common.
// Since instructions may get combined later (e.g. single stores into
// store multiple) this measure is not particularly accurate.
MachineBasicBlock::iterator BBI1, BBI2;
MachineBasicBlock::iterator BBI1, BBI2;
unsigned FoundMatch = ~0U;
unsigned FoundI = ~0U, FoundJ = ~0U;
unsigned maxCommonTailLength = 0U;
for (int i = MergePotentials.size()-2;
for (int i = MergePotentials.size()-1;
i != -1 && MergePotentials[i].first == CurHash; --i) {
MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
unsigned CommonTailLen = ComputeCommonTailLength(CurMBB,
MergePotentials[i].second,
TrialBBI1, TrialBBI2);
if (CommonTailLen >= minCommonTailLength &&
CommonTailLen >= maxCommonTailLength) {
FoundMatch = i;
maxCommonTailLength = CommonTailLen;
BBI1 = TrialBBI1;
BBI2 = TrialBBI2;
for (int j = i-1;
j != -1 && MergePotentials[j].first == CurHash; --j) {
MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
unsigned CommonTailLen = ComputeCommonTailLength(
MergePotentials[i].second,
MergePotentials[j].second,
TrialBBI1, TrialBBI2);
if (CommonTailLen >= minCommonTailLength &&
CommonTailLen > maxCommonTailLength) {
FoundI = i;
FoundJ = j;
maxCommonTailLength = CommonTailLen;
BBI1 = TrialBBI1;
BBI2 = TrialBBI2;
}
}
}
// If we didn't find anything that has at least minCommonTailLength
// instructions matching this one, bail out.
if (FoundMatch == ~0U) {
// Put the unconditional branch back, if we need one.
if (SuccBB && CurMBB != PredBB)
FixTail(CurMBB, SuccBB, TII);
MergePotentials.pop_back();
// If we didn't find any pair that has at least minCommonTailLength
// instructions in common, bail out. All entries with this
// hash code can go away now.
if (FoundI == ~0U) {
for (int i = MergePotentials.size()-1;
i != -1 && MergePotentials[i].first == CurHash; --i) {
// Put the unconditional branch back, if we need one.
CurMBB = MergePotentials[i].second;
if (SuccBB && CurMBB != PredBB)
FixTail(CurMBB, SuccBB, TII);
MergePotentials.pop_back();
}
continue;
}
// Otherwise, move the matching block to the right position.
if (FoundMatch != MergePotentials.size()-2)
std::swap(MergePotentials[FoundMatch], *(MergePotentials.end()-2));
// Otherwise, move the block(s) to the right position(s). So that
// BBI1/2 will be valid, the last must be I and the next-to-last J.
if (FoundI != MergePotentials.size()-1)
std::swap(MergePotentials[FoundI], *(MergePotentials.end()-1));
if (FoundJ != MergePotentials.size()-2)
std::swap(MergePotentials[FoundJ], *(MergePotentials.end()-2));
CurMBB = (MergePotentials.end()-1)->second;
MachineBasicBlock *MBB2 = (MergePotentials.end()-2)->second;
// If neither block is the entire common tail, split the tail of one block

View File

@ -2,10 +2,9 @@
; RUN: -o %t -f
; RUN: grep _GLOBAL_OFFSET_TABLE_ %t
; RUN: grep piclabel %t | wc -l | grep 3
; RUN: grep PLT %t | wc -l | grep 11
; RUN: grep PLT %t | wc -l | grep 6
; RUN: grep GOTOFF %t | wc -l | grep 1
; RUN: grep JTI %t | wc -l | grep 13
; Improved tail merging could reduce the number of PLT's and JTI's further.
; RUN: grep JTI %t | wc -l | grep 8
define void @bar(i32 %n.u) {
entry: