- Clean up some crappy code which deals with coalescing of copies which look at

extract_subreg / insert_subreg, etc.
- Add support for more aggressive insert_subreg coalescing.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@101971 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2010-04-21 00:44:22 +00:00
parent d217cfcf46
commit 30fdb5c2ac
7 changed files with 88 additions and 59 deletions

View File

@ -111,6 +111,12 @@ namespace llvm {
double getScaledIntervalSize(LiveInterval& I) {
return (1000.0 * I.getSize()) / indexes_->getIndexesLength();
}
/// getFuncInstructionCount - Return the number of instructions in the
/// current function.
unsigned getFuncInstructionCount() {
return indexes_->getFunctionSize();
}
/// getApproximateInstructionCount - computes an estimate of the number
/// of instructions in a given LiveInterval.

View File

@ -1168,20 +1168,44 @@ SimpleRegisterCoalescing::isWinToJoinVRWithDstPhysReg(MachineInstr *CopyMI,
/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
/// two virtual registers from different register classes.
bool
SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned LargeReg,
unsigned SmallReg,
unsigned Threshold) {
// Then make sure the intervals are *short*.
LiveInterval &LargeInt = li_->getInterval(LargeReg);
LiveInterval &SmallInt = li_->getInterval(SmallReg);
unsigned LargeSize = li_->getApproximateInstructionCount(LargeInt);
unsigned SmallSize = li_->getApproximateInstructionCount(SmallInt);
if (LargeSize > Threshold) {
unsigned SmallUses = std::distance(mri_->use_nodbg_begin(SmallReg),
mri_->use_nodbg_end());
unsigned LargeUses = std::distance(mri_->use_nodbg_begin(LargeReg),
mri_->use_nodbg_end());
if (SmallUses*LargeSize < LargeUses*SmallSize)
SimpleRegisterCoalescing::isWinToJoinCrossClass(unsigned SrcReg,
unsigned DstReg,
const TargetRegisterClass *SrcRC,
const TargetRegisterClass *DstRC,
const TargetRegisterClass *NewRC) {
unsigned NewRCCount = allocatableRCRegs_[NewRC].count();
// This heuristics is good enough in practice, but it's obviously not *right*.
// 4 is a magic number that works well enough for x86, ARM, etc. It filter
// out all but the most restrictive register classes.
if (NewRCCount > 4 ||
// Early exit if the function is fairly small, coalesce aggressively if
// that's the case. For really special register classes with 3 or
// fewer registers, be a bit more careful.
(li_->getFuncInstructionCount() / NewRCCount) < 8)
return true;
LiveInterval &SrcInt = li_->getInterval(SrcReg);
LiveInterval &DstInt = li_->getInterval(DstReg);
unsigned SrcSize = li_->getApproximateInstructionCount(SrcInt);
unsigned DstSize = li_->getApproximateInstructionCount(DstInt);
if (SrcSize <= NewRCCount && DstSize <= NewRCCount)
return true;
// Estimate *register use density*. If it doubles or more, abort.
unsigned SrcUses = std::distance(mri_->use_nodbg_begin(SrcReg),
mri_->use_nodbg_end());
unsigned DstUses = std::distance(mri_->use_nodbg_begin(DstReg),
mri_->use_nodbg_end());
float NewDensity = ((float)(SrcUses + DstUses) / (SrcSize + DstSize)) /
NewRCCount;
if (SrcRC != NewRC && SrcSize > NewRCCount) {
unsigned SrcRCCount = allocatableRCRegs_[SrcRC].count();
float Density = ((float)SrcUses / SrcSize) / SrcRCCount;
if (NewDensity > Density * 2.0f)
return false;
}
if (DstRC != NewRC && DstSize > NewRCCount) {
unsigned DstRCCount = allocatableRCRegs_[DstRC].count();
float Density = ((float)DstUses / DstSize) / DstRCCount;
if (NewDensity > Density * 2.0f)
return false;
}
return true;
@ -1517,10 +1541,11 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
return false; // Not coalescable
}
unsigned LargeReg = isExtSubReg ? SrcReg : DstReg;
unsigned SmallReg = isExtSubReg ? DstReg : SrcReg;
unsigned Limit= allocatableRCRegs_[mri_->getRegClass(SmallReg)].count();
if (!isWinToJoinCrossClass(LargeReg, SmallReg, Limit)) {
if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
DEBUG(dbgs() << "\tAvoid coalescing to constrainted register class: "
<< SrcRC->getName() << "/"
<< DstRC->getName() << " -> "
<< NewRC->getName() << ".\n");
Again = true; // May be possible to coalesce later.
return false;
}
@ -1568,49 +1593,40 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
}
}
unsigned LargeReg = SrcReg;
unsigned SmallReg = DstReg;
// Now determine the register class of the joined register.
if (isExtSubReg) {
if (SubIdx && DstRC && DstRC->isASubClass()) {
// This is a move to a sub-register class. However, the source is a
// sub-register of a larger register class. We don't know what should
// the register class be. FIXME.
Again = true;
return false;
if (!SrcIsPhys && !DstIsPhys) {
if (isExtSubReg) {
NewRC =
SubIdx ? tri_->getMatchingSuperRegClass(SrcRC, DstRC, SubIdx) : SrcRC;
} else if (isInsSubReg) {
NewRC =
SubIdx ? tri_->getMatchingSuperRegClass(DstRC, SrcRC, SubIdx) : DstRC;
} else {
NewRC = getCommonSubClass(SrcRC, DstRC);
}
if (!DstIsPhys && !SrcIsPhys)
NewRC = SrcRC;
} else if (!SrcIsPhys && !DstIsPhys) {
NewRC = getCommonSubClass(SrcRC, DstRC);
if (!NewRC) {
DEBUG(dbgs() << "\tDisjoint regclasses: "
<< SrcRC->getName() << ", "
<< DstRC->getName() << ".\n");
return false; // Not coalescable.
}
if (DstRC->getSize() > SrcRC->getSize())
std::swap(LargeReg, SmallReg);
}
// If we are joining two virtual registers and the resulting register
// class is more restrictive (fewer register, smaller size). Check if it's
// worth doing the merge.
if (!SrcIsPhys && !DstIsPhys &&
(isExtSubReg || DstRC->isASubClass()) &&
!isWinToJoinCrossClass(LargeReg, SmallReg,
allocatableRCRegs_[NewRC].count())) {
DEBUG(dbgs() << "\tSrc/Dest are different register classes: "
<< SrcRC->getName() << "/"
<< DstRC->getName() << " -> "
<< NewRC->getName() << ".\n");
// Allow the coalescer to try again in case either side gets coalesced to
// a physical register that's compatible with the other side. e.g.
// r1024 = MOV32to32_ r1025
// But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
Again = true; // May be possible to coalesce later.
return false;
// If we are joining two virtual registers and the resulting register
// class is more restrictive (fewer register, smaller size). Check if it's
// worth doing the merge.
if (!isWinToJoinCrossClass(SrcReg, DstReg, SrcRC, DstRC, NewRC)) {
DEBUG(dbgs() << "\tAvoid coalescing to constrainted register class: "
<< SrcRC->getName() << "/"
<< DstRC->getName() << " -> "
<< NewRC->getName() << ".\n");
// Allow the coalescer to try again in case either side gets coalesced to
// a physical register that's compatible with the other side. e.g.
// r1024 = MOV32to32_ r1025
// But later r1024 is assigned EAX then r1025 may be coalesced with EAX.
Again = true; // May be possible to coalesce later.
return false;
}
}
}

View File

@ -179,8 +179,11 @@ namespace llvm {
/// isWinToJoinCrossClass - Return true if it's profitable to coalesce
/// two virtual registers from different register classes.
bool isWinToJoinCrossClass(unsigned LargeReg, unsigned SmallReg,
unsigned Threshold);
bool isWinToJoinCrossClass(unsigned SrcReg,
unsigned DstReg,
const TargetRegisterClass *SrcRC,
const TargetRegisterClass *DstRC,
const TargetRegisterClass *NewRC);
/// HasIncompatibleSubRegDefUse - If we are trying to coalesce a virtual
/// register with a physical register, check if any of the virtual register

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 3
; RUN: llc < %s -mtriple=thumbv7-apple-darwin9 -mcpu=cortex-a8 | grep vmov.f32 | count 1
define arm_apcscc void @fht(float* nocapture %fz, i16 signext %n) nounwind {
entry:

View File

@ -1,4 +1,5 @@
; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | grep {andl.*7.*edi}
; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin -stats |& grep asm-printer | grep 40
; RUN: llc < %s -relocation-model=pic -disable-fp-elim -mtriple=i386-apple-darwin | FileCheck %s
%struct.XXDActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
%struct.XXDAlphaTest = type { float, i16, i8, i8 }
@ -61,11 +62,15 @@
define void @t(%struct.XXDState* %gldst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._XXVMConstants* %cnstn, %struct.YYToken* %pstrm, %struct.XXVMVPContext* %vmctx, %struct.XXVMTextures* %txtrs, %struct.XXVMVPStack* %vpstk, <4 x float>* %atr0, <4 x float>* %atr1, <4 x float>* %atr2, <4 x float>* %atr3, <4 x float>* %vtx0, <4 x float>* %vtx1, <4 x float>* %vtx2, <4 x float>* %vtx3, [4 x <4 x float>]* %tmpGbl, i32* %oldMsk, <4 x i32>* %adrGbl, i64 %key_token) nounwind {
entry:
; CHECK: t:
; CHECK: xorl %ecx, %ecx
%0 = trunc i64 %key_token to i32 ; <i32> [#uses=1]
%1 = getelementptr %struct.YYToken* %pstrm, i32 %0 ; <%struct.YYToken*> [#uses=5]
br label %bb1132
bb51: ; preds = %bb1132
; CHECK: .align 4
; CHECK: andl $7
%2 = getelementptr %struct.YYToken* %1, i32 %operation.0.rec, i32 0, i32 0 ; <i16*> [#uses=1]
%3 = load i16* %2, align 1 ; <i16> [#uses=3]
%4 = lshr i16 %3, 6 ; <i16> [#uses=1]

View File

@ -149,7 +149,6 @@ entry:
bb.nph: ; preds = %entry
; X86-64: movq _map_4_to_16@GOTPCREL(%rip)
; X86-64: movq _map_4_to_16@GOTPCREL(%rip)
; X86-64: .align 4
%tmp5 = zext i32 undef to i64 ; <i64> [#uses=1]
%tmp6 = add i64 %tmp5, 1 ; <i64> [#uses=1]

View File

@ -1,6 +1,6 @@
; RUN: llc < %s -mtriple=x86_64-apple-darwin10 -relocation-model=pic -disable-fp-elim -color-ss-with-regs -stats -info-output-file - > %t
; RUN: grep asm-printer %t | grep 156
; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 4
; RUN: grep asm-printer %t | grep 166
; RUN: grep stackcoloring %t | grep "stack slot refs replaced with reg refs" | grep 5
type { [62 x %struct.Bitvec*] } ; type %0
type { i8* } ; type %1