Teach two-address pass to do some coalescing while eliminating REG_SEQUENCE

instructions.

e.g.
%reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
%reg1027<def> = EXTRACT_SUBREG %reg1026, 6
%reg1028<def> = EXTRACT_SUBREG %reg1026<kill>, 5
...
%reg1029<def> = REG_SEQUENCE %reg1028<kill>, 5, %reg1027<kill>, 6, %reg1028, 7, %reg1027, 8, %reg1028, 9, %reg1027, 10, %reg1030<kill>, 11, %reg1032<kill>, 12

After REG_SEQUENCE is eliminated, we are left with:

%reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
%reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
%reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5

The regular coalescer will not be able to coalesce reg1026 and reg1029 because it doesn't
know how to combine sub-register indices 5 and 6. Now 2-address pass will consult the
target whether sub-registers 5 and 6 of reg1026 can be combined to into a larger
sub-register (or combined to be reg1026 itself as is the case here). If it is possible, 
it will be able to replace references of reg1026 with reg1029 + the larger sub-register
index.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@103835 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2010-05-14 23:21:14 +00:00
parent fd72617688
commit b990a2f249
7 changed files with 204 additions and 3 deletions

View File

@ -28,6 +28,7 @@ class BitVector;
class MachineFunction;
class MachineMove;
class RegScavenger;
template<class T> class SmallVectorImpl;
/// TargetRegisterDesc - This record contains all of the information known about
/// a particular register. The AliasSet field (if not null) contains a pointer
@ -479,6 +480,17 @@ public:
return 0;
}
/// canCombinedSubRegIndex - Given a register class and a list of sub-register
/// indices, return true if it's possible to combine the sub-register indices
/// into one that corresponds to a larger sub-register. Return the new sub-
/// register index by reference. Note the new index by be zero if the given
/// sub-registers combined to form the whole register.
virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC,
SmallVectorImpl<unsigned> &SubIndices,
unsigned &NewSubIdx) const {
return 0;
}
/// getMatchingSuperRegClass - Return a subclass of the specified register
/// class A so that each register in it has a sub-register of the
/// specified sub-register index which is in the specified register class B.

View File

@ -1166,6 +1166,7 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
llvm_unreachable(0);
}
SmallVector<unsigned, 4> RealSrcs;
SmallSet<unsigned, 4> Seen;
for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) {
unsigned SrcReg = MI->getOperand(i).getReg();
@ -1176,6 +1177,16 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
}
MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
if (DefMI->isImplicitDef()) {
DefMI->eraseFromParent();
continue;
}
// Remember EXTRACT_SUBREG sources. These might be candidate for
// coalescing.
if (DefMI->isExtractSubreg())
RealSrcs.push_back(DefMI->getOperand(1).getReg());
if (!Seen.insert(SrcReg) || MI->getParent() != DefMI->getParent()) {
// REG_SEQUENCE cannot have duplicated operands, add a copy.
// Also add an copy if the source if live-in the block. We don't want
@ -1216,6 +1227,44 @@ bool TwoAddressInstructionPass::EliminateRegSequences() {
DEBUG(dbgs() << "Eliminated: " << *MI);
MI->eraseFromParent();
// Try coalescing some EXTRACT_SUBREG instructions.
Seen.clear();
for (unsigned i = 0, e = RealSrcs.size(); i != e; ++i) {
unsigned SrcReg = RealSrcs[i];
if (!Seen.insert(SrcReg))
continue;
// If there are no other uses than extract_subreg which feed into
// the reg_sequence, then we might be able to coalesce them.
bool CanCoalesce = true;
SmallVector<unsigned, 4> SubIndices;
for (MachineRegisterInfo::use_nodbg_iterator
UI = MRI->use_nodbg_begin(SrcReg),
UE = MRI->use_nodbg_end(); UI != UE; ++UI) {
MachineInstr *UseMI = &*UI;
if (!UseMI->isExtractSubreg() ||
UseMI->getOperand(0).getReg() != DstReg) {
CanCoalesce = false;
break;
}
SubIndices.push_back(UseMI->getOperand(2).getImm());
}
if (!CanCoalesce)
continue;
// %reg1026<def> = VLDMQ %reg1025<kill>, 260, pred:14, pred:%reg0
// %reg1029:6<def> = EXTRACT_SUBREG %reg1026, 6
// %reg1029:5<def> = EXTRACT_SUBREG %reg1026<kill>, 5
// Since D subregs 5, 6 can combine to a Q register, we can coalesce
// reg1026 to reg1029.
std::sort(SubIndices.begin(), SubIndices.end());
unsigned NewSubIdx = 0;
if (TRI->canCombinedSubRegIndex(MRI->getRegClass(SrcReg), SubIndices,
NewSubIdx))
UpdateRegSequenceSrcs(SrcReg, DstReg, NewSubIdx, MRI);
}
}
RegSequences.clear();

View File

@ -351,6 +351,123 @@ ARMBaseRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return 0;
}
bool
ARMBaseRegisterInfo::canCombinedSubRegIndex(const TargetRegisterClass *RC,
SmallVectorImpl<unsigned> &SubIndices,
unsigned &NewSubIdx) const {
unsigned Size = RC->getSize() * 8;
if (Size < 6)
return 0;
NewSubIdx = 0; // Whole register.
unsigned NumRegs = SubIndices.size();
if (NumRegs == 8) {
// 8 D registers -> 1 QQQQ register.
return (Size == 512 &&
SubIndices[0] == ARM::DSUBREG_0 &&
SubIndices[1] == ARM::DSUBREG_1 &&
SubIndices[2] == ARM::DSUBREG_2 &&
SubIndices[3] == ARM::DSUBREG_3 &&
SubIndices[4] == ARM::DSUBREG_4 &&
SubIndices[5] == ARM::DSUBREG_5 &&
SubIndices[6] == ARM::DSUBREG_6 &&
SubIndices[7] == ARM::DSUBREG_7);
} else if (NumRegs == 4) {
if (SubIndices[0] == ARM::QSUBREG_0) {
// 4 Q registers -> 1 QQQQ register.
return (Size == 512 &&
SubIndices[1] == ARM::QSUBREG_1 &&
SubIndices[2] == ARM::QSUBREG_2 &&
SubIndices[3] == ARM::QSUBREG_3);
} else if (SubIndices[0] == ARM::DSUBREG_0) {
// 4 D registers -> 1 QQ register.
if (Size >= 256 &&
SubIndices[1] == ARM::DSUBREG_1 &&
SubIndices[2] == ARM::DSUBREG_2 &&
SubIndices[3] == ARM::DSUBREG_3) {
if (Size == 512)
NewSubIdx = ARM::QQSUBREG_0;
return true;
}
} else if (SubIndices[0] == ARM::DSUBREG_4) {
// 4 D registers -> 1 QQ register (2nd).
if (Size == 512 &&
SubIndices[1] == ARM::DSUBREG_5 &&
SubIndices[2] == ARM::DSUBREG_6 &&
SubIndices[3] == ARM::DSUBREG_7) {
NewSubIdx = ARM::QQSUBREG_1;
return true;
}
} else if (SubIndices[0] == ARM::SSUBREG_0) {
// 4 S registers -> 1 Q register.
if (Size >= 128 &&
SubIndices[1] == ARM::SSUBREG_1 &&
SubIndices[2] == ARM::SSUBREG_2 &&
SubIndices[3] == ARM::SSUBREG_3) {
if (Size >= 256)
NewSubIdx = ARM::QSUBREG_0;
return true;
}
}
} else if (NumRegs == 2) {
if (SubIndices[0] == ARM::QSUBREG_0) {
// 2 Q registers -> 1 QQ register.
if (Size >= 256 && SubIndices[1] == ARM::QSUBREG_1) {
if (Size == 512)
NewSubIdx = ARM::QQSUBREG_0;
return true;
}
} else if (SubIndices[0] == ARM::QSUBREG_2) {
// 2 Q registers -> 1 QQ register (2nd).
if (Size == 512 && SubIndices[1] == ARM::QSUBREG_3) {
NewSubIdx = ARM::QQSUBREG_1;
return true;
}
} else if (SubIndices[0] == ARM::DSUBREG_0) {
// 2 D registers -> 1 Q register.
if (Size >= 128 && SubIndices[1] == ARM::DSUBREG_1) {
if (Size >= 256)
NewSubIdx = ARM::QSUBREG_0;
return true;
}
} else if (SubIndices[0] == ARM::DSUBREG_2) {
// 2 D registers -> 1 Q register (2nd).
if (Size >= 256 && SubIndices[1] == ARM::DSUBREG_3) {
NewSubIdx = ARM::QSUBREG_1;
return true;
}
} else if (SubIndices[0] == ARM::DSUBREG_4) {
// 2 D registers -> 1 Q register (3rd).
if (Size == 512 && SubIndices[1] == ARM::DSUBREG_5) {
NewSubIdx = ARM::QSUBREG_2;
return true;
}
} else if (SubIndices[0] == ARM::DSUBREG_6) {
// 2 D registers -> 1 Q register (3rd).
if (Size == 512 && SubIndices[1] == ARM::DSUBREG_7) {
NewSubIdx = ARM::QSUBREG_3;
return true;
}
} else if (SubIndices[0] == ARM::SSUBREG_0) {
// 2 S registers -> 1 D register.
if (SubIndices[1] == ARM::SSUBREG_1) {
if (Size >= 128)
NewSubIdx = ARM::DSUBREG_0;
return true;
}
} else if (SubIndices[0] == ARM::SSUBREG_2) {
// 2 S registers -> 1 D register (2nd).
if (Size >= 128 && SubIndices[1] == ARM::SSUBREG_3) {
NewSubIdx = ARM::DSUBREG_1;
return true;
}
}
}
return false;
}
const TargetRegisterClass *
ARMBaseRegisterInfo::getPointerRegClass(unsigned Kind) const {
return ARM::GPRRegisterClass;

View File

@ -81,6 +81,15 @@ public:
getMatchingSuperRegClass(const TargetRegisterClass *A,
const TargetRegisterClass *B, unsigned Idx) const;
/// canCombinedSubRegIndex - Given a register class and a list of sub-register
/// indices, return true if it's possible to combine the sub-register indices
/// into one that corresponds to a larger sub-register. Return the new sub-
/// register index by reference. Note the new index by be zero if the given
/// sub-registers combined to form the whole register.
virtual bool canCombinedSubRegIndex(const TargetRegisterClass *RC,
SmallVectorImpl<unsigned> &SubIndices,
unsigned &NewSubIdx) const;
const TargetRegisterClass *getPointerRegClass(unsigned Kind = 0) const;
std::pair<TargetRegisterClass::iterator,TargetRegisterClass::iterator>

View File

@ -31,7 +31,8 @@ namespace ARM {
SSUBREG_0 = 1, SSUBREG_1 = 2, SSUBREG_2 = 3, SSUBREG_3 = 4,
DSUBREG_0 = 5, DSUBREG_1 = 6, DSUBREG_2 = 7, DSUBREG_3 = 8,
DSUBREG_4 = 9, DSUBREG_5 = 10, DSUBREG_6 = 11, DSUBREG_7 = 12,
QSUBREG_0 = 13, QSUBREG_1 = 14, QSUBREG_2 = 15, QSUBREG_3 = 16
QSUBREG_0 = 13, QSUBREG_1 = 14, QSUBREG_2 = 15, QSUBREG_3 = 16,
QQSUBREG_0= 17, QQSUBREG_1= 18
};
}

View File

@ -465,6 +465,10 @@ def arm_qsubreg_1 : PatLeaf<(i32 14)>;
def arm_qsubreg_2 : PatLeaf<(i32 15)>;
def arm_qsubreg_3 : PatLeaf<(i32 16)>;
def arm_qqsubreg_0 : PatLeaf<(i32 17)>;
def arm_qqsubreg_1 : PatLeaf<(i32 18)>;
// S sub-registers of D registers.
def : SubRegSet<1, [D0, D1, D2, D3, D4, D5, D6, D7,
D8, D9, D10, D11, D12, D13, D14, D15],
@ -552,3 +556,10 @@ def : SubRegSet<15, [QQQQ0, QQQQ1, QQQQ2, QQQQ3],
[Q2, Q6, Q10, Q14]>;
def : SubRegSet<16, [QQQQ0, QQQQ1, QQQQ2, QQQQ3],
[Q3, Q7, Q11, Q15]>;
// QQ sub-registers of QQQQQQQQ registers.
def : SubRegSet<17, [QQQQ0, QQQQ1, QQQQ2, QQQQ3],
[QQ0, QQ2, QQ4, QQ6]>;
def : SubRegSet<18, [QQQQ0, QQQQ1, QQQQ2, QQQQ3],
[QQ1, QQ3, QQ5, QQ7]>;

View File

@ -414,7 +414,9 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
return false;
LastSrcReg = VirtReg;
const TargetRegisterClass *RC = MRI->getRegClass(VirtReg);
if (RC != ARM::QPRRegisterClass && RC != ARM::QQPRRegisterClass)
if (RC != ARM::QPRRegisterClass &&
RC != ARM::QQPRRegisterClass &&
RC != ARM::QQQQPRRegisterClass)
return false;
unsigned SubIdx = DefMI->getOperand(2).getImm();
if (LastSubIdx) {
@ -432,7 +434,7 @@ NEONPreAllocPass::FormsRegSequence(MachineInstr *MI,
// FIXME: Update the uses of EXTRACT_SUBREG from REG_SEQUENCE is
// currently required for correctness. e.g.
// %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
// %reg1041;<def> = REG_SEQUENCE %reg1040<kill>, 5, %reg1035<kill>, 6
// %reg1042<def> = EXTRACT_SUBREG %reg1041, 6
// %reg1043<def> = EXTRACT_SUBREG %reg1041, 5
// VST1q16 %reg1025<kill>, 0, %reg1043<kill>, %reg1042<kill>,