Fix PR3241: Currently EmitCopyFromReg emits a copy from the physical register to a virtual register unless it requires an expensive cross class copy. That means we are only treating "expensive to copy" register dependency as physical register dependency.

Also future proof the scheduler to handle "normal" physical register dependencies. The code is not exercised yet.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@62074 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng
2009-01-12 03:19:55 +00:00
parent e1762c9826
commit c29a56dedb
7 changed files with 114 additions and 66 deletions

View File

@ -485,7 +485,7 @@ namespace llvm {
protected: protected:
void AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO); void AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO);
void EmitCrossRCCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap); void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
/// ForceUnitLatencies - Return true if all scheduling edges should be given a /// ForceUnitLatencies - Return true if all scheduling edges should be given a
/// latency value of one. The default is to return false; schedulers may /// latency value of one. The default is to return false; schedulers may

View File

@ -36,7 +36,7 @@ void ScheduleDAG::EmitNoop() {
TII->insertNoop(*BB, BB->end()); TII->insertNoop(*BB, BB->end());
} }
void ScheduleDAG::EmitCrossRCCopy(SUnit *SU, void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
DenseMap<SUnit*, unsigned> &VRBaseMap) { DenseMap<SUnit*, unsigned> &VRBaseMap) {
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) { I != E; ++I) {
@ -49,12 +49,11 @@ void ScheduleDAG::EmitCrossRCCopy(SUnit *SU,
unsigned Reg = 0; unsigned Reg = 0;
for (SUnit::const_succ_iterator II = SU->Succs.begin(), for (SUnit::const_succ_iterator II = SU->Succs.begin(),
EE = SU->Succs.end(); II != EE; ++II) { EE = SU->Succs.end(); II != EE; ++II) {
if (I->getReg()) { if (II->getReg()) {
Reg = I->getReg(); Reg = II->getReg();
break; break;
} }
} }
assert(I->getReg() && "Unknown physical register!");
TII->copyRegToReg(*BB, BB->end(), Reg, VRI->second, TII->copyRegToReg(*BB, BB->end(), Reg, VRI->second,
SU->CopyDstRC, SU->CopySrcRC); SU->CopyDstRC, SU->CopySrcRC);
} else { } else {

View File

@ -28,7 +28,7 @@ using namespace llvm;
STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumUnfolds, "Number of nodes unfolded");
STATISTIC(NumDups, "Number of duplicated nodes"); STATISTIC(NumDups, "Number of duplicated nodes");
STATISTIC(NumCCCopies, "Number of cross class copies"); STATISTIC(NumPRCopies, "Number of physical copies");
static RegisterScheduler static RegisterScheduler
fastDAGScheduler("fast", "Fast suboptimal list scheduling", fastDAGScheduler("fast", "Fast suboptimal list scheduling",
@ -93,10 +93,10 @@ private:
void ReleasePred(SUnit *SU, SDep *PredEdge); void ReleasePred(SUnit *SU, SDep *PredEdge);
void ScheduleNodeBottomUp(SUnit*, unsigned); void ScheduleNodeBottomUp(SUnit*, unsigned);
SUnit *CopyAndMoveSuccessors(SUnit*); SUnit *CopyAndMoveSuccessors(SUnit*);
void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
const TargetRegisterClass*, const TargetRegisterClass*,
const TargetRegisterClass*, const TargetRegisterClass*,
SmallVector<SUnit*, 2>&); SmallVector<SUnit*, 2>&);
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
void ListScheduleBottomUp(); void ListScheduleBottomUp();
@ -361,17 +361,16 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
DelDeps.push_back(std::make_pair(SuccSU, D)); DelDeps.push_back(std::make_pair(SuccSU, D));
} }
} }
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
RemovePred(DelDeps[i].first, DelDeps[i].second); RemovePred(DelDeps[i].first, DelDeps[i].second);
}
++NumDups; ++NumDups;
return NewSU; return NewSU;
} }
/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies /// InsertCopiesAndMoveSuccs - Insert register copies and move all
/// and move all scheduled successors of the given SUnit to the last copy. /// scheduled successors of the given SUnit to the last copy.
void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
const TargetRegisterClass *DestRC, const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC, const TargetRegisterClass *SrcRC,
SmallVector<SUnit*, 2> &Copies) { SmallVector<SUnit*, 2> &Copies) {
@ -408,7 +407,7 @@ void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
Copies.push_back(CopyFromSU); Copies.push_back(CopyFromSU);
Copies.push_back(CopyToSU); Copies.push_back(CopyToSU);
++NumCCCopies; ++NumPRCopies;
} }
/// getPhysicalRegisterVT - Returns the ValueType of the physical register /// getPhysicalRegisterVT - Returns the ValueType of the physical register
@ -524,19 +523,22 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
assert(LRegs.size() == 1 && "Can't handle this yet!"); assert(LRegs.size() == 1 && "Can't handle this yet!");
unsigned Reg = LRegs[0]; unsigned Reg = LRegs[0];
SUnit *LRDef = LiveRegDefs[Reg]; SUnit *LRDef = LiveRegDefs[Reg];
SUnit *NewDef = CopyAndMoveSuccessors(LRDef); MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
TRI->getPhysicalRegisterRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
// If cross copy register class is null, then it must be possible copy
// the value directly. Do not try duplicate the def.
SUnit *NewDef = 0;
if (DestRC)
NewDef = CopyAndMoveSuccessors(LRDef);
else
DestRC = RC;
if (!NewDef) { if (!NewDef) {
// Issue expensive cross register class copies. // Issue copies, these can be expensive cross register class copies.
MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
TRI->getPhysicalRegisterRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
if (!DestRC) {
assert(false && "Don't know how to copy this physical register!");
abort();
}
SmallVector<SUnit*, 2> Copies; SmallVector<SUnit*, 2> Copies;
InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
DOUT << "Adding an edge from SU # " << TrySU->NodeNum DOUT << "Adding an edge from SU # " << TrySU->NodeNum
<< " to SU #" << Copies.front()->NodeNum << "\n"; << " to SU #" << Copies.front()->NodeNum << "\n";
AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,

View File

@ -35,7 +35,7 @@ using namespace llvm;
STATISTIC(NumBacktracks, "Number of times scheduler backtracked"); STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
STATISTIC(NumUnfolds, "Number of nodes unfolded"); STATISTIC(NumUnfolds, "Number of nodes unfolded");
STATISTIC(NumDups, "Number of duplicated nodes"); STATISTIC(NumDups, "Number of duplicated nodes");
STATISTIC(NumCCCopies, "Number of cross class copies"); STATISTIC(NumPRCopies, "Number of physical register copies");
static RegisterScheduler static RegisterScheduler
burrListDAGScheduler("list-burr", burrListDAGScheduler("list-burr",
@ -121,10 +121,10 @@ private:
void UnscheduleNodeBottomUp(SUnit*); void UnscheduleNodeBottomUp(SUnit*);
void BacktrackBottomUp(SUnit*, unsigned, unsigned&); void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
SUnit *CopyAndMoveSuccessors(SUnit*); SUnit *CopyAndMoveSuccessors(SUnit*);
void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned, void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
const TargetRegisterClass*, const TargetRegisterClass*,
const TargetRegisterClass*, const TargetRegisterClass*,
SmallVector<SUnit*, 2>&); SmallVector<SUnit*, 2>&);
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
void ListScheduleTopDown(); void ListScheduleTopDown();
void ListScheduleBottomUp(); void ListScheduleBottomUp();
@ -517,11 +517,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
return NewSU; return NewSU;
} }
/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies /// InsertCopiesAndMoveSuccs - Insert register copies and move all
/// and move all scheduled successors of the given SUnit to the last copy. /// scheduled successors of the given SUnit to the last copy.
void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
const TargetRegisterClass *DestRC, const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC, const TargetRegisterClass *SrcRC,
SmallVector<SUnit*, 2> &Copies) { SmallVector<SUnit*, 2> &Copies) {
SUnit *CopyFromSU = CreateNewSUnit(NULL); SUnit *CopyFromSU = CreateNewSUnit(NULL);
CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopySrcRC = SrcRC;
@ -546,9 +546,8 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
DelDeps.push_back(std::make_pair(SuccSU, *I)); DelDeps.push_back(std::make_pair(SuccSU, *I));
} }
} }
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) { for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
RemovePred(DelDeps[i].first, DelDeps[i].second); RemovePred(DelDeps[i].first, DelDeps[i].second);
}
AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg)); AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0)); AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
@ -559,7 +558,7 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
Copies.push_back(CopyFromSU); Copies.push_back(CopyFromSU);
Copies.push_back(CopyToSU); Copies.push_back(CopyToSU);
++NumCCCopies; ++NumPRCopies;
} }
/// getPhysicalRegisterVT - Returns the ValueType of the physical register /// getPhysicalRegisterVT - Returns the ValueType of the physical register
@ -705,27 +704,32 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
} }
if (!CurSU) { if (!CurSU) {
// Can't backtrack. Try duplicating the nodes that produces these // Can't backtrack. If it's too expensive to copy the value, then try
// "expensive to copy" values to break the dependency. In case even // duplicate the nodes that produces these "too expensive to copy"
// that doesn't work, insert cross class copies. // values to break the dependency. In case even that doesn't work,
// insert cross class copies.
// If it's not too expensive, i.e. cost != -1, issue copies.
SUnit *TrySU = NotReady[0]; SUnit *TrySU = NotReady[0];
SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU]; SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
assert(LRegs.size() == 1 && "Can't handle this yet!"); assert(LRegs.size() == 1 && "Can't handle this yet!");
unsigned Reg = LRegs[0]; unsigned Reg = LRegs[0];
SUnit *LRDef = LiveRegDefs[Reg]; SUnit *LRDef = LiveRegDefs[Reg];
SUnit *NewDef = CopyAndMoveSuccessors(LRDef); MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
TRI->getPhysicalRegisterRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
// If cross copy register class is null, then it must be possible copy
// the value directly. Do not try duplicate the def.
SUnit *NewDef = 0;
if (DestRC)
NewDef = CopyAndMoveSuccessors(LRDef);
else
DestRC = RC;
if (!NewDef) { if (!NewDef) {
// Issue expensive cross register class copies. // Issue copies, these can be expensive cross register class copies.
MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
const TargetRegisterClass *RC =
TRI->getPhysicalRegisterRegClass(Reg, VT);
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
if (!DestRC) {
assert(false && "Don't know how to copy this physical register!");
abort();
}
SmallVector<SUnit*, 2> Copies; SmallVector<SUnit*, 2> Copies;
InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies); InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
DOUT << "Adding an edge from SU #" << TrySU->NodeNum DOUT << "Adding an edge from SU #" << TrySU->NodeNum
<< " to SU #" << Copies.front()->NodeNum << "\n"; << " to SU #" << Copies.front()->NodeNum << "\n";
AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1, AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,

View File

@ -39,11 +39,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
/// CheckForPhysRegDependency - Check if the dependency between def and use of /// CheckForPhysRegDependency - Check if the dependency between def and use of
/// a specified operand is a physical register dependency. If so, returns the /// a specified operand is a physical register dependency. If so, returns the
/// register. /// register and the cost of copying the register.
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op, static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
const TargetRegisterInfo *TRI, const TargetRegisterInfo *TRI,
const TargetInstrInfo *TII, const TargetInstrInfo *TII,
unsigned &PhysReg) { unsigned &PhysReg, int &Cost) {
if (Op != 2 || User->getOpcode() != ISD::CopyToReg) if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
return; return;
@ -55,8 +55,12 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
if (Def->isMachineOpcode()) { if (Def->isMachineOpcode()) {
const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
if (ResNo >= II.getNumDefs() && if (ResNo >= II.getNumDefs() &&
II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
PhysReg = Reg; PhysReg = Reg;
const TargetRegisterClass *RC =
TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
Cost = RC->getCopyCost();
}
} }
} }
@ -179,10 +183,18 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
bool isChain = OpVT == MVT::Other; bool isChain = OpVT == MVT::Other;
unsigned PhysReg = 0; unsigned PhysReg = 0;
int Cost = 1;
// Determine if this is a physical register dependency. // Determine if this is a physical register dependency.
CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg); CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
assert((PhysReg == 0 || !isChain) && assert((PhysReg == 0 || !isChain) &&
"Chain dependence via physreg data?"); "Chain dependence via physreg data?");
// FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
// emits a copy from the physical register to a virtual register unless
// it requires a cross class copy (cost < 0). That means we are only
// treating "expensive to copy" register dependency as physical register
// dependency. This may change in the future though.
if (Cost >= 0)
PhysReg = 0;
SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data, SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data,
OpSU->Latency, PhysReg)); OpSU->Latency, PhysReg));
} }
@ -252,10 +264,12 @@ unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) {
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const { void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
if (SU->getNode()) if (!SU->getNode()) {
SU->getNode()->dump(DAG); cerr << "PHYS REG COPY\n";
else return;
cerr << "CROSS RC COPY "; }
SU->getNode()->dump(DAG);
cerr << "\n"; cerr << "\n";
SmallVector<SDNode *, 4> FlaggedNodes; SmallVector<SDNode *, 4> FlaggedNodes;
for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())

View File

@ -629,6 +629,12 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
// For pre-regalloc scheduling, create instructions corresponding to the // For pre-regalloc scheduling, create instructions corresponding to the
// SDNode and any flagged SDNodes and append them to the block. // SDNode and any flagged SDNodes and append them to the block.
if (!SU->getNode()) {
// Emit a copy.
EmitPhysRegCopy(SU, CopyVRBaseMap);
continue;
}
SmallVector<SDNode *, 4> FlaggedNodes; SmallVector<SDNode *, 4> FlaggedNodes;
for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode()) for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
FlaggedNodes.push_back(N); FlaggedNodes.push_back(N);
@ -636,10 +642,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, VRBaseMap); EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, VRBaseMap);
FlaggedNodes.pop_back(); FlaggedNodes.pop_back();
} }
if (!SU->getNode()) EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap);
EmitCrossRCCopy(SU, CopyVRBaseMap);
else
EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap);
} }
return BB; return BB;

View File

@ -0,0 +1,26 @@
; RUN: llvm-as < %s | llc -march=x86
; PR3244
@g_62 = external global i16 ; <i16*> [#uses=1]
@g_487 = external global i32 ; <i32*> [#uses=1]
define i32 @func_42(i32 %p_43, i32 %p_44, i32 %p_45, i32 %p_46) nounwind {
entry:
%0 = load i16* @g_62, align 2 ; <i16> [#uses=1]
%1 = load i32* @g_487, align 4 ; <i32> [#uses=1]
%2 = trunc i16 %0 to i8 ; <i8> [#uses=1]
%3 = trunc i32 %1 to i8 ; <i8> [#uses=1]
%4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1)
nounwind ; <i32> [#uses=1]
%5 = trunc i32 %4 to i8 ; <i8> [#uses=1]
%6 = mul i8 %3, %2 ; <i8> [#uses=1]
%7 = mul i8 %6, %5 ; <i8> [#uses=1]
%8 = sext i8 %7 to i16 ; <i16> [#uses=1]
%9 = tail call i32 @func_85(i16 signext %8, i32 1, i32 1) nounwind
; <i32> [#uses=0]
ret i32 undef
}
declare i32 @func_7(...)
declare i32 @func_85(i16 signext, i32, i32)