[PBQP] Unique allowed-sets for nodes in the PBQP graph and use pairs of these

sets as keys into a cache of interference matrice values in the Interference
constraint adder.

Creating interference matrices was one of the large remaining time-sinks in
PBQP. Caching them reduces the total compile time (when using PBQP) on the
nightly test suite by ~10%.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@220688 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Lang Hames 2014-10-27 17:44:25 +00:00
parent 86ec9c4081
commit 57902cc070
5 changed files with 247 additions and 86 deletions

View File

@ -28,7 +28,7 @@ namespace PBQP {
template <typename ValueT>
class ValuePool {
public:
typedef std::shared_ptr<ValueT> PoolRef;
typedef std::shared_ptr<const ValueT> PoolRef;
private:
@ -38,7 +38,6 @@ private:
PoolEntry(ValuePool &Pool, ValueKeyT Value)
: Pool(Pool), Value(std::move(Value)) {}
~PoolEntry() { Pool.removeEntry(this); }
ValueT& getValue() { return Value; }
const ValueT& getValue() const { return Value; }
private:
ValuePool &Pool;

View File

@ -387,9 +387,29 @@ namespace PBQP {
return NId;
}
/// @brief Add a node bypassing the cost allocator.
/// @param Costs Cost vector ptr for the new node (must be convertible to
/// VectorPtr).
/// @return Node iterator for the added node.
///
/// This method allows for fast addition of a node whose costs don't need
/// to be passed through the cost allocator. The most common use case for
/// this is when duplicating costs from an existing node (when using a
/// pooling allocator). These have already been uniqued, so we can avoid
/// re-constructing and re-uniquing them by attaching them directly to the
/// new node.
template <typename OtherVectorPtrT>
NodeId addNodeBypassingCostAllocator(OtherVectorPtrT Costs) {
NodeId NId = addConstructedNode(NodeEntry(Costs));
if (Solver)
Solver->handleAddNode(NId);
return NId;
}
/// @brief Add an edge between the given nodes with the given costs.
/// @param N1Id First node.
/// @param N2Id Second node.
/// @param Costs Cost matrix for new edge.
/// @return Edge iterator for the added edge.
template <typename OtherVectorT>
EdgeId addEdge(NodeId N1Id, NodeId N2Id, OtherVectorT Costs) {
@ -404,6 +424,31 @@ namespace PBQP {
return EId;
}
/// @brief Add an edge bypassing the cost allocator.
/// @param N1Id First node.
/// @param N2Id Second node.
/// @param Costs Cost matrix for new edge.
/// @return Edge iterator for the added edge.
///
/// This method allows for fast addition of an edge whose costs don't need
/// to be passed through the cost allocator. The most common use case for
/// this is when duplicating costs from an existing edge (when using a
/// pooling allocator). These have already been uniqued, so we can avoid
/// re-constructing and re-uniquing them by attaching them directly to the
/// new edge.
template <typename OtherMatrixPtrT>
NodeId addEdgeBypassingCostAllocator(NodeId N1Id, NodeId N2Id,
OtherMatrixPtrT Costs) {
assert(getNodeCosts(N1Id).getLength() == Costs->getRows() &&
getNodeCosts(N2Id).getLength() == Costs->getCols() &&
"Matrix dimensions mismatch.");
// Get cost matrix from the problem domain.
EdgeId EId = addConstructedEdge(EdgeEntry(N1Id, N2Id, Costs));
if (Solver)
Solver->handleAddEdge(EId);
return EId;
}
/// @brief Returns true if the graph is empty.
bool empty() const { return NodeIdSet(*this).empty(); }
@ -431,10 +476,24 @@ namespace PBQP {
getNode(NId).Costs = AllocatedCosts;
}
/// @brief Get a node's cost vector (const version).
/// @brief Get a VectorPtr to a node's cost vector. Rarely useful - use
/// getNodeCosts where possible.
/// @param NId Node id.
/// @return VectorPtr to node cost vector.
///
/// This method is primarily useful for duplicating costs quickly by
/// bypassing the cost allocator. See addNodeBypassingCostAllocator. Prefer
/// getNodeCosts when dealing with node cost values.
const VectorPtr& getNodeCostsPtr(NodeId NId) const {
return getNode(NId).Costs;
}
/// @brief Get a node's cost vector.
/// @param NId Node id.
/// @return Node cost vector.
const Vector& getNodeCosts(NodeId NId) const { return *getNode(NId).Costs; }
const Vector& getNodeCosts(NodeId NId) const {
return *getNodeCostsPtr(NId);
}
NodeMetadata& getNodeMetadata(NodeId NId) {
return getNode(NId).Metadata;
@ -459,19 +518,31 @@ namespace PBQP {
getEdge(EId).Costs = AllocatedCosts;
}
/// @brief Get an edge's cost matrix (const version).
/// @brief Get a MatrixPtr to a node's cost matrix. Rarely useful - use
/// getEdgeCosts where possible.
/// @param EId Edge id.
/// @return MatrixPtr to edge cost matrix.
///
/// This method is primarily useful for duplicating costs quickly by
/// bypassing the cost allocator. See addNodeBypassingCostAllocator. Prefer
/// getEdgeCosts when dealing with edge cost values.
const MatrixPtr& getEdgeCostsPtr(EdgeId EId) const {
return getEdge(EId).Costs;
}
/// @brief Get an edge's cost matrix.
/// @param EId Edge id.
/// @return Edge cost matrix.
const Matrix& getEdgeCosts(EdgeId EId) const {
return *getEdge(EId).Costs;
}
EdgeMetadata& getEdgeMetadata(EdgeId NId) {
return getEdge(NId).Metadata;
EdgeMetadata& getEdgeMetadata(EdgeId EId) {
return getEdge(EId).Metadata;
}
const EdgeMetadata& getEdgeMetadata(EdgeId NId) const {
return getEdge(NId).Metadata;
const EdgeMetadata& getEdgeMetadata(EdgeId EId) const {
return getEdge(EId).Metadata;
}
/// @brief Get the first node connected to this edge.

View File

@ -73,9 +73,109 @@ private:
std::unique_ptr<bool[]> UnsafeCols;
};
/// \brief Holds a vector of the allowed physical regs for a vreg.
class AllowedRegVector {
friend hash_code hash_value(const AllowedRegVector &);
public:
AllowedRegVector() : NumOpts(0), Opts(nullptr) {}
AllowedRegVector(const std::vector<unsigned> &OptVec)
: NumOpts(OptVec.size()), Opts(new unsigned[NumOpts]) {
std::copy(OptVec.begin(), OptVec.end(), Opts.get());
}
AllowedRegVector(const AllowedRegVector &Other)
: NumOpts(Other.NumOpts), Opts(new unsigned[NumOpts]) {
std::copy(Other.Opts.get(), Other.Opts.get() + NumOpts, Opts.get());
}
AllowedRegVector(AllowedRegVector &&Other)
: NumOpts(std::move(Other.NumOpts)), Opts(std::move(Other.Opts)) {}
AllowedRegVector& operator=(const AllowedRegVector &Other) {
NumOpts = Other.NumOpts;
Opts.reset(new unsigned[NumOpts]);
std::copy(Other.Opts.get(), Other.Opts.get() + NumOpts, Opts.get());
return *this;
}
AllowedRegVector& operator=(AllowedRegVector &&Other) {
NumOpts = std::move(Other.NumOpts);
Opts = std::move(Other.Opts);
return *this;
}
unsigned size() const { return NumOpts; }
unsigned operator[](size_t I) const { return Opts[I]; }
bool operator==(const AllowedRegVector &Other) const {
if (NumOpts != Other.NumOpts)
return false;
return std::equal(Opts.get(), Opts.get() + NumOpts, Other.Opts.get());
}
bool operator!=(const AllowedRegVector &Other) const {
return !(*this == Other);
}
private:
unsigned NumOpts;
std::unique_ptr<unsigned[]> Opts;
};
inline hash_code hash_value(const AllowedRegVector &OptRegs) {
unsigned *OStart = OptRegs.Opts.get();
unsigned *OEnd = OptRegs.Opts.get() + OptRegs.NumOpts;
return hash_combine(OptRegs.NumOpts,
hash_combine_range(OStart, OEnd));
}
/// \brief Holds graph-level metadata relevent to PBQP RA problems.
class GraphMetadata {
private:
typedef ValuePool<AllowedRegVector> AllowedRegVecPool;
public:
typedef typename AllowedRegVecPool::PoolRef AllowedRegVecRef;
GraphMetadata(MachineFunction &MF,
LiveIntervals &LIS,
MachineBlockFrequencyInfo &MBFI)
: MF(MF), LIS(LIS), MBFI(MBFI) {}
MachineFunction &MF;
LiveIntervals &LIS;
MachineBlockFrequencyInfo &MBFI;
void setNodeIdForVReg(unsigned VReg, GraphBase::NodeId NId) {
VRegToNodeId[VReg] = NId;
}
GraphBase::NodeId getNodeIdForVReg(unsigned VReg) const {
auto VRegItr = VRegToNodeId.find(VReg);
if (VRegItr == VRegToNodeId.end())
return GraphBase::invalidNodeId();
return VRegItr->second;
}
void eraseNodeIdForVReg(unsigned VReg) {
VRegToNodeId.erase(VReg);
}
AllowedRegVecRef getAllowedRegs(AllowedRegVector Allowed) {
return AllowedRegVecs.getValue(std::move(Allowed));
}
private:
DenseMap<unsigned, GraphBase::NodeId> VRegToNodeId;
AllowedRegVecPool AllowedRegVecs;
};
/// \brief Holds solver state and other metadata relevant to each PBQP RA node.
class NodeMetadata {
public:
typedef std::vector<unsigned> OptionToRegMap;
typedef AllowedRegVector AllowedRegVector;
typedef enum { Unprocessed,
OptimallyReducible,
@ -91,7 +191,7 @@ public:
NodeMetadata(const NodeMetadata &Other)
: RS(Other.RS), NumOpts(Other.NumOpts), DeniedOpts(Other.DeniedOpts),
OptUnsafeEdges(new unsigned[NumOpts]), VReg(Other.VReg),
OptionRegs(Other.OptionRegs) {
AllowedRegs(Other.AllowedRegs) {
std::copy(&Other.OptUnsafeEdges[0], &Other.OptUnsafeEdges[NumOpts],
&OptUnsafeEdges[0]);
}
@ -101,7 +201,7 @@ public:
NodeMetadata(NodeMetadata &&Other)
: RS(Other.RS), NumOpts(Other.NumOpts), DeniedOpts(Other.DeniedOpts),
OptUnsafeEdges(std::move(Other.OptUnsafeEdges)), VReg(Other.VReg),
OptionRegs(std::move(Other.OptionRegs)) {}
AllowedRegs(std::move(Other.AllowedRegs)) {}
// FIXME: Re-implementing default behavior to work around MSVC. Remove once
// MSVC synthesizes move constructors properly.
@ -113,7 +213,7 @@ public:
std::copy(Other.OptUnsafeEdges.get(), Other.OptUnsafeEdges.get() + NumOpts,
OptUnsafeEdges.get());
VReg = Other.VReg;
OptionRegs = Other.OptionRegs;
AllowedRegs = Other.AllowedRegs;
return *this;
}
@ -125,17 +225,17 @@ public:
DeniedOpts = Other.DeniedOpts;
OptUnsafeEdges = std::move(Other.OptUnsafeEdges);
VReg = Other.VReg;
OptionRegs = std::move(Other.OptionRegs);
AllowedRegs = std::move(Other.AllowedRegs);
return *this;
}
void setVReg(unsigned VReg) { this->VReg = VReg; }
unsigned getVReg() const { return VReg; }
void setOptionRegs(OptionToRegMap OptionRegs) {
this->OptionRegs = std::move(OptionRegs);
void setAllowedRegs(GraphMetadata::AllowedRegVecRef AllowedRegs) {
this->AllowedRegs = std::move(AllowedRegs);
}
const OptionToRegMap& getOptionRegs() const { return OptionRegs; }
const AllowedRegVector& getAllowedRegs() const { return *AllowedRegs; }
void setup(const Vector& Costs) {
NumOpts = Costs.getLength() - 1;
@ -173,7 +273,7 @@ private:
unsigned DeniedOpts;
std::unique_ptr<unsigned[]> OptUnsafeEdges;
unsigned VReg;
OptionToRegMap OptionRegs;
GraphMetadata::AllowedRegVecRef AllowedRegs;
};
class RegAllocSolverImpl {
@ -190,38 +290,8 @@ public:
typedef GraphBase::EdgeId EdgeId;
typedef RegAlloc::NodeMetadata NodeMetadata;
struct EdgeMetadata { };
class GraphMetadata {
public:
GraphMetadata(MachineFunction &MF,
LiveIntervals &LIS,
MachineBlockFrequencyInfo &MBFI)
: MF(MF), LIS(LIS), MBFI(MBFI) {}
MachineFunction &MF;
LiveIntervals &LIS;
MachineBlockFrequencyInfo &MBFI;
void setNodeIdForVReg(unsigned VReg, GraphBase::NodeId NId) {
VRegToNodeId[VReg] = NId;
}
GraphBase::NodeId getNodeIdForVReg(unsigned VReg) const {
auto VRegItr = VRegToNodeId.find(VReg);
if (VRegItr == VRegToNodeId.end())
return GraphBase::invalidNodeId();
return VRegItr->second;
}
void eraseNodeIdForVReg(unsigned VReg) {
VRegToNodeId.erase(VReg);
}
private:
DenseMap<unsigned, NodeId> VRegToNodeId;
};
typedef RegAlloc::GraphMetadata GraphMetadata;
typedef PBQP::Graph<RegAllocSolverImpl> Graph;

View File

@ -166,6 +166,12 @@ public:
class Interference : public PBQPRAConstraint {
private:
private:
typedef const PBQP::RegAlloc::AllowedRegVector* AllowedRegVecPtr;
typedef std::pair<AllowedRegVecPtr, AllowedRegVecPtr> IMatrixKey;
typedef DenseMap<IMatrixKey, PBQPRAGraph::MatrixPtr> IMatrixCache;
// Holds (Interval, CurrentSegmentID, and NodeId). The first two are required
// for the fast interference graph construction algorithm. The last is there
// to save us from looking up node ids via the VRegToNode map in the graph
@ -226,8 +232,11 @@ public:
// number of registers, but rather the size of the largest clique in the
// graph. Still, we expect this to be better than N^2.
LiveIntervals &LIS = G.getMetadata().LIS;
const TargetRegisterInfo &TRI =
*G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
// Interferenc matrices are incredibly regular - they're only a function of
// the allowed sets, so we cache them to avoid the overhead of constructing
// and uniquing them.
IMatrixCache C;
typedef std::set<IntervalInfo, decltype(&lowestEndPoint)> IntervalSet;
typedef std::priority_queue<IntervalInfo, std::vector<IntervalInfo>,
@ -275,13 +284,11 @@ public:
// Check that we haven't already added this edge
// FIXME: findEdge is expensive in the worst case (O(max_clique(G))).
// It might be better to replace this with a local bit-matrix.
if (G.findEdge(NId, MId) != PBQP::GraphBase::invalidEdgeId())
if (G.findEdge(NId, MId) != PBQPRAGraph::invalidEdgeId())
continue;
// This is a new edge - add it to the graph.
const auto &NOpts = G.getNodeMetadata(NId).getOptionRegs();
const auto &MOpts = G.getNodeMetadata(MId).getOptionRegs();
G.addEdge(NId, MId, createInterferenceMatrix(TRI, NOpts, MOpts));
createInterferenceEdge(G, NId, MId, C);
}
// Finally, add Cur to the Active set.
@ -291,21 +298,35 @@ public:
private:
PBQPRAGraph::RawMatrix createInterferenceMatrix(
const TargetRegisterInfo &TRI,
const PBQPRAGraph::NodeMetadata::OptionToRegMap &NOpts,
const PBQPRAGraph::NodeMetadata::OptionToRegMap &MOpts) {
PBQPRAGraph::RawMatrix M(NOpts.size() + 1, MOpts.size() + 1, 0);
for (unsigned I = 0; I != NOpts.size(); ++I) {
unsigned PRegN = NOpts[I];
for (unsigned J = 0; J != MOpts.size(); ++J) {
unsigned PRegM = MOpts[J];
void createInterferenceEdge(PBQPRAGraph &G, PBQPRAGraph::NodeId NId,
PBQPRAGraph::NodeId MId, IMatrixCache &C) {
const TargetRegisterInfo &TRI =
*G.getMetadata().MF.getTarget().getSubtargetImpl()->getRegisterInfo();
const auto &NRegs = G.getNodeMetadata(NId).getAllowedRegs();
const auto &MRegs = G.getNodeMetadata(MId).getAllowedRegs();
// Try looking the edge costs up in the IMatrixCache first.
IMatrixKey K(&NRegs, &MRegs);
IMatrixCache::iterator I = C.find(K);
if (I != C.end()) {
G.addEdgeBypassingCostAllocator(NId, MId, I->second);
return;
}
PBQPRAGraph::RawMatrix M(NRegs.size() + 1, MRegs.size() + 1, 0);
for (unsigned I = 0; I != NRegs.size(); ++I) {
unsigned PRegN = NRegs[I];
for (unsigned J = 0; J != MRegs.size(); ++J) {
unsigned PRegM = MRegs[J];
if (TRI.regsOverlap(PRegN, PRegM))
M[I + 1][J + 1] = std::numeric_limits<PBQP::PBQPNum>::infinity();
}
}
return M;
PBQPRAGraph::EdgeId EId = G.addEdge(NId, MId, std::move(M));
C[K] = G.getEdgeCostsPtr(EId);
}
};
@ -341,8 +362,8 @@ public:
PBQPRAGraph::NodeId NId = G.getMetadata().getNodeIdForVReg(SrcReg);
const PBQPRAGraph::NodeMetadata::OptionToRegMap &Allowed =
G.getNodeMetadata(NId).getOptionRegs();
const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed =
G.getNodeMetadata(NId).getAllowedRegs();
unsigned PRegOpt = 0;
while (PRegOpt < Allowed.size() && Allowed[PRegOpt] != DstReg)
@ -356,10 +377,10 @@ public:
} else {
PBQPRAGraph::NodeId N1Id = G.getMetadata().getNodeIdForVReg(DstReg);
PBQPRAGraph::NodeId N2Id = G.getMetadata().getNodeIdForVReg(SrcReg);
const PBQPRAGraph::NodeMetadata::OptionToRegMap *Allowed1 =
&G.getNodeMetadata(N1Id).getOptionRegs();
const PBQPRAGraph::NodeMetadata::OptionToRegMap *Allowed2 =
&G.getNodeMetadata(N2Id).getOptionRegs();
const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed1 =
&G.getNodeMetadata(N1Id).getAllowedRegs();
const PBQPRAGraph::NodeMetadata::AllowedRegVector *Allowed2 =
&G.getNodeMetadata(N2Id).getAllowedRegs();
PBQPRAGraph::EdgeId EId = G.findEdge(N1Id, N2Id);
if (EId == G.invalidEdgeId()) {
@ -384,10 +405,10 @@ public:
private:
void addVirtRegCoalesce(
PBQPRAGraph::RawMatrix &CostMat,
const PBQPRAGraph::NodeMetadata::OptionToRegMap &Allowed1,
const PBQPRAGraph::NodeMetadata::OptionToRegMap &Allowed2,
PBQP::PBQPNum Benefit) {
PBQPRAGraph::RawMatrix &CostMat,
const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed1,
const PBQPRAGraph::NodeMetadata::AllowedRegVector &Allowed2,
PBQP::PBQPNum Benefit) {
assert(CostMat.getRows() == Allowed1.size() + 1 && "Size mismatch.");
assert(CostMat.getCols() == Allowed2.size() + 1 && "Size mismatch.");
for (unsigned I = 0; I != Allowed1.size(); ++I) {
@ -501,7 +522,8 @@ void RegAllocPBQP::initializeGraph(PBQPRAGraph &G) {
PBQPRAGraph::RawVector NodeCosts(VRegAllowed.size() + 1, 0);
PBQPRAGraph::NodeId NId = G.addNode(std::move(NodeCosts));
G.getNodeMetadata(NId).setVReg(VReg);
G.getNodeMetadata(NId).setOptionRegs(std::move(VRegAllowed));
G.getNodeMetadata(NId).setAllowedRegs(
G.getMetadata().getAllowedRegs(std::move(VRegAllowed)));
G.getMetadata().setNodeIdForVReg(VReg, NId);
}
}
@ -529,7 +551,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
unsigned AllocOption = Solution.getSelection(NId);
if (AllocOption != PBQP::RegAlloc::getSpillOptionIdx()) {
unsigned PReg = G.getNodeMetadata(NId).getOptionRegs()[AllocOption - 1];
unsigned PReg = G.getNodeMetadata(NId).getAllowedRegs()[AllocOption - 1];
DEBUG(dbgs() << "VREG " << PrintReg(VReg, &TRI) << " -> "
<< TRI.getName(PReg) << "\n");
assert(PReg != 0 && "Invalid preg selected.");
@ -563,7 +585,6 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAGraph &G,
return !AnotherRoundNeeded;
}
void RegAllocPBQP::finalizeAlloc(MachineFunction &MF,
LiveIntervals &LIS,
VirtRegMap &VRM) const {

View File

@ -174,10 +174,10 @@ bool A57ChainingConstraint::addIntraChainConstraint(PBQPRAGraph &G, unsigned Rd,
PBQPRAGraph::NodeId node1 = G.getMetadata().getNodeIdForVReg(Rd);
PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(Ra);
const PBQPRAGraph::NodeMetadata::OptionToRegMap *vRdAllowed =
&G.getNodeMetadata(node1).getOptionRegs();
const PBQPRAGraph::NodeMetadata::OptionToRegMap *vRaAllowed =
&G.getNodeMetadata(node2).getOptionRegs();
const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed =
&G.getNodeMetadata(node1).getAllowedRegs();
const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRaAllowed =
&G.getNodeMetadata(node2).getAllowedRegs();
PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2);
@ -268,12 +268,12 @@ void A57ChainingConstraint::addInterChainConstraint(PBQPRAGraph &G, unsigned Rd,
const LiveInterval &lr = LIs.getInterval(r);
if (ld.overlaps(lr)) {
const PBQPRAGraph::NodeMetadata::OptionToRegMap *vRdAllowed =
&G.getNodeMetadata(node1).getOptionRegs();
const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRdAllowed =
&G.getNodeMetadata(node1).getAllowedRegs();
PBQPRAGraph::NodeId node2 = G.getMetadata().getNodeIdForVReg(r);
const PBQPRAGraph::NodeMetadata::OptionToRegMap *vRrAllowed =
&G.getNodeMetadata(node2).getOptionRegs();
const PBQPRAGraph::NodeMetadata::AllowedRegVector *vRrAllowed =
&G.getNodeMetadata(node2).getAllowedRegs();
PBQPRAGraph::EdgeId edge = G.findEdge(node1, node2);
assert(edge != G.invalidEdgeId() &&