mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-07-28 19:25:00 +00:00
Add initial support for back-scheduling address computations,
especially in the case of addresses computed from loop induction variables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61075 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -242,6 +242,7 @@ namespace llvm {
|
|||||||
bool isPending : 1; // True once pending.
|
bool isPending : 1; // True once pending.
|
||||||
bool isAvailable : 1; // True once available.
|
bool isAvailable : 1; // True once available.
|
||||||
bool isScheduled : 1; // True once scheduled.
|
bool isScheduled : 1; // True once scheduled.
|
||||||
|
bool isScheduleHigh : 1; // True if preferable to schedule high.
|
||||||
private:
|
private:
|
||||||
bool isDepthCurrent : 1; // True if Depth is current.
|
bool isDepthCurrent : 1; // True if Depth is current.
|
||||||
bool isHeightCurrent : 1; // True if Height is current.
|
bool isHeightCurrent : 1; // True if Height is current.
|
||||||
@@ -258,7 +259,7 @@ namespace llvm {
|
|||||||
Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0),
|
Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0),
|
||||||
isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
|
isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
|
||||||
isPending(false), isAvailable(false), isScheduled(false),
|
isPending(false), isAvailable(false), isScheduled(false),
|
||||||
isDepthCurrent(false), isHeightCurrent(false),
|
isScheduleHigh(false), isDepthCurrent(false), isHeightCurrent(false),
|
||||||
Depth(0), Height(0),
|
Depth(0), Height(0),
|
||||||
CopyDstRC(NULL), CopySrcRC(NULL) {}
|
CopyDstRC(NULL), CopySrcRC(NULL) {}
|
||||||
|
|
||||||
@@ -269,7 +270,7 @@ namespace llvm {
|
|||||||
Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0),
|
Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0), NumSuccsLeft(0),
|
||||||
isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
|
isTwoAddress(false), isCommutable(false), hasPhysRegDefs(false),
|
||||||
isPending(false), isAvailable(false), isScheduled(false),
|
isPending(false), isAvailable(false), isScheduled(false),
|
||||||
isDepthCurrent(false), isHeightCurrent(false),
|
isScheduleHigh(false), isDepthCurrent(false), isHeightCurrent(false),
|
||||||
Depth(0), Height(0),
|
Depth(0), Height(0),
|
||||||
CopyDstRC(NULL), CopySrcRC(NULL) {}
|
CopyDstRC(NULL), CopySrcRC(NULL) {}
|
||||||
|
|
||||||
|
@@ -29,6 +29,12 @@ protected: // Can only create subclasses...
|
|||||||
TargetSubtarget();
|
TargetSubtarget();
|
||||||
public:
|
public:
|
||||||
virtual ~TargetSubtarget();
|
virtual ~TargetSubtarget();
|
||||||
|
|
||||||
|
/// getSpecialAddressLatency - For targets where it is beneficial to
|
||||||
|
/// backschedule instructions that compute addresses, return a value
|
||||||
|
/// indicating the number of scheduling cycles of backscheduling that
|
||||||
|
/// should be attempted.
|
||||||
|
virtual unsigned getSpecialAddressLatency() const { return 0; }
|
||||||
};
|
};
|
||||||
|
|
||||||
} // End llvm namespace
|
} // End llvm namespace
|
||||||
|
@@ -19,6 +19,14 @@
|
|||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
|
bool latency_sort::operator()(const SUnit *LHS, const SUnit *RHS) const {
|
||||||
|
// The isScheduleHigh flag allows nodes with wraparound dependencies that
|
||||||
|
// cannot easily be modeled as edges with latencies to be scheduled as
|
||||||
|
// soon as possible in a top-down schedule.
|
||||||
|
if (LHS->isScheduleHigh && !RHS->isScheduleHigh)
|
||||||
|
return false;
|
||||||
|
if (!LHS->isScheduleHigh && RHS->isScheduleHigh)
|
||||||
|
return true;
|
||||||
|
|
||||||
unsigned LHSNum = LHS->NodeNum;
|
unsigned LHSNum = LHS->NodeNum;
|
||||||
unsigned RHSNum = RHS->NodeNum;
|
unsigned RHSNum = RHS->NodeNum;
|
||||||
|
|
||||||
|
@@ -15,6 +15,7 @@
|
|||||||
#define DEBUG_TYPE "sched-instrs"
|
#define DEBUG_TYPE "sched-instrs"
|
||||||
#include "llvm/CodeGen/MachineDominators.h"
|
#include "llvm/CodeGen/MachineDominators.h"
|
||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||||
|
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||||
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
|
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
|
||||||
#include "llvm/CodeGen/PseudoSourceValue.h"
|
#include "llvm/CodeGen/PseudoSourceValue.h"
|
||||||
@@ -29,6 +30,59 @@
|
|||||||
#include <map>
|
#include <map>
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
class VISIBILITY_HIDDEN LoopDependencies {
|
||||||
|
const MachineLoopInfo &MLI;
|
||||||
|
const MachineDominatorTree &MDT;
|
||||||
|
|
||||||
|
public:
|
||||||
|
typedef std::map<unsigned, std::pair<const MachineOperand *, unsigned> >
|
||||||
|
LoopDeps;
|
||||||
|
LoopDeps Deps;
|
||||||
|
|
||||||
|
LoopDependencies(const MachineLoopInfo &mli,
|
||||||
|
const MachineDominatorTree &mdt) :
|
||||||
|
MLI(mli), MDT(mdt) {}
|
||||||
|
|
||||||
|
void VisitLoop(const MachineLoop *Loop) {
|
||||||
|
Deps.clear();
|
||||||
|
MachineBasicBlock *Header = Loop->getHeader();
|
||||||
|
SmallSet<unsigned, 8> LoopLiveIns;
|
||||||
|
for (MachineBasicBlock::livein_iterator LI = Header->livein_begin(),
|
||||||
|
LE = Header->livein_end(); LI != LE; ++LI)
|
||||||
|
LoopLiveIns.insert(*LI);
|
||||||
|
|
||||||
|
VisitRegion(MDT.getNode(Header), Loop, LoopLiveIns);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void VisitRegion(const MachineDomTreeNode *Node,
|
||||||
|
const MachineLoop *Loop,
|
||||||
|
const SmallSet<unsigned, 8> &LoopLiveIns) {
|
||||||
|
MachineBasicBlock *MBB = Node->getBlock();
|
||||||
|
if (!Loop->contains(MBB)) return;
|
||||||
|
|
||||||
|
unsigned Count = 0;
|
||||||
|
for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
|
||||||
|
I != E; ++I, ++Count) {
|
||||||
|
const MachineInstr *MI = I;
|
||||||
|
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||||
|
const MachineOperand &MO = MI->getOperand(i);
|
||||||
|
if (!MO.isReg() || !MO.isUse())
|
||||||
|
continue;
|
||||||
|
unsigned MOReg = MO.getReg();
|
||||||
|
if (LoopLiveIns.count(MOReg))
|
||||||
|
Deps.insert(std::make_pair(MOReg, std::make_pair(&MO, Count)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
|
||||||
|
for (unsigned I = 0, E = Children.size(); I != E; ++I)
|
||||||
|
VisitRegion(Children[I], Loop, LoopLiveIns);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineBasicBlock *bb,
|
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineBasicBlock *bb,
|
||||||
const TargetMachine &tm,
|
const TargetMachine &tm,
|
||||||
const MachineLoopInfo &mli,
|
const MachineLoopInfo &mli,
|
||||||
@@ -65,9 +119,27 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
|
|||||||
// all the work of the block is done before the terminator.
|
// all the work of the block is done before the terminator.
|
||||||
SUnit *Terminator = 0;
|
SUnit *Terminator = 0;
|
||||||
|
|
||||||
|
LoopDependencies LoopRegs(MLI, MDT);
|
||||||
|
|
||||||
|
// Track which regs are live into a loop, to help guide back-edge-aware
|
||||||
|
// scheduling.
|
||||||
|
SmallSet<unsigned, 8> LoopLiveInRegs;
|
||||||
|
if (MachineLoop *ML = MLI.getLoopFor(BB))
|
||||||
|
if (BB == ML->getLoopLatch()) {
|
||||||
|
MachineBasicBlock *Header = ML->getHeader();
|
||||||
|
for (MachineBasicBlock::livein_iterator I = Header->livein_begin(),
|
||||||
|
E = Header->livein_end(); I != E; ++I)
|
||||||
|
LoopLiveInRegs.insert(*I);
|
||||||
|
LoopRegs.VisitLoop(ML);
|
||||||
|
}
|
||||||
|
|
||||||
// Check to see if the scheduler cares about latencies.
|
// Check to see if the scheduler cares about latencies.
|
||||||
bool UnitLatencies = ForceUnitLatencies();
|
bool UnitLatencies = ForceUnitLatencies();
|
||||||
|
|
||||||
|
// Ask the target if address-backscheduling is desirable, and if so how much.
|
||||||
|
unsigned SpecialAddressLatency =
|
||||||
|
TM.getSubtarget<TargetSubtarget>().getSpecialAddressLatency();
|
||||||
|
|
||||||
for (MachineBasicBlock::iterator MII = BB->end(), MIE = BB->begin();
|
for (MachineBasicBlock::iterator MII = BB->end(), MIE = BB->begin();
|
||||||
MII != MIE; --MII) {
|
MII != MIE; --MII) {
|
||||||
MachineInstr *MI = prior(MII);
|
MachineInstr *MI = prior(MII);
|
||||||
@@ -118,7 +190,21 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
|
|||||||
for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
|
for (unsigned i = 0, e = UseList.size(); i != e; ++i) {
|
||||||
SUnit *UseSU = UseList[i];
|
SUnit *UseSU = UseList[i];
|
||||||
if (UseSU != SU) {
|
if (UseSU != SU) {
|
||||||
UseSU->addPred(SDep(SU, SDep::Data, DataLatency, Reg));
|
unsigned LDataLatency = DataLatency;
|
||||||
|
// Optionally add in a special extra latency for nodes that
|
||||||
|
// feed addresses.
|
||||||
|
// TODO: Do this for register aliases too.
|
||||||
|
if (SpecialAddressLatency != 0 && !UnitLatencies) {
|
||||||
|
MachineInstr *UseMI = UseSU->getInstr();
|
||||||
|
const TargetInstrDesc &UseTID = UseMI->getDesc();
|
||||||
|
int RegUseIndex = UseMI->findRegisterUseOperandIdx(Reg);
|
||||||
|
assert(RegUseIndex >= 0 && "UseMI doesn's use register!");
|
||||||
|
if ((UseTID.mayLoad() || UseTID.mayStore()) &&
|
||||||
|
(unsigned)RegUseIndex < UseTID.getNumOperands() &&
|
||||||
|
UseTID.OpInfo[RegUseIndex].isLookupPtrRegClass())
|
||||||
|
LDataLatency += SpecialAddressLatency;
|
||||||
|
}
|
||||||
|
UseSU->addPred(SDep(SU, SDep::Data, LDataLatency, Reg));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
|
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
|
||||||
@@ -130,6 +216,51 @@ void ScheduleDAGInstrs::BuildSchedUnits() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If a def is going to wrap back around to the top of the loop,
|
||||||
|
// backschedule it.
|
||||||
|
// TODO: Blocks in loops without terminators can benefit too.
|
||||||
|
if (!UnitLatencies && Terminator && DefList.empty()) {
|
||||||
|
LoopDependencies::LoopDeps::iterator I = LoopRegs.Deps.find(Reg);
|
||||||
|
if (I != LoopRegs.Deps.end()) {
|
||||||
|
const MachineOperand *UseMO = I->second.first;
|
||||||
|
unsigned Count = I->second.second;
|
||||||
|
const MachineInstr *UseMI = UseMO->getParent();
|
||||||
|
unsigned UseMOIdx = UseMO - &UseMI->getOperand(0);
|
||||||
|
const TargetInstrDesc &UseTID = UseMI->getDesc();
|
||||||
|
// TODO: If we knew the total depth of the region here, we could
|
||||||
|
// handle the case where the whole loop is inside the region but
|
||||||
|
// is large enough that the isScheduleHigh trick isn't needed.
|
||||||
|
if (UseMOIdx < UseTID.getNumOperands()) {
|
||||||
|
// Currently, we only support scheduling regions consisting of
|
||||||
|
// single basic blocks. Check to see if the instruction is in
|
||||||
|
// the same region by checking to see if it has the same parent.
|
||||||
|
if (UseMI->getParent() != MI->getParent()) {
|
||||||
|
unsigned Latency = SU->Latency;
|
||||||
|
if (UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass())
|
||||||
|
Latency += SpecialAddressLatency;
|
||||||
|
// This is a wild guess as to the portion of the latency which
|
||||||
|
// will be overlapped by work done outside the current
|
||||||
|
// scheduling region.
|
||||||
|
Latency -= std::min(Latency, Count);
|
||||||
|
// Add the artifical edge.
|
||||||
|
Terminator->addPred(SDep(SU, SDep::Order, Latency,
|
||||||
|
/*Reg=*/0, /*isNormalMemory=*/false,
|
||||||
|
/*isMustAlias=*/false,
|
||||||
|
/*isArtificial=*/true));
|
||||||
|
} else if (SpecialAddressLatency > 0 &&
|
||||||
|
UseTID.OpInfo[UseMOIdx].isLookupPtrRegClass()) {
|
||||||
|
// The entire loop body is within the current scheduling region
|
||||||
|
// and the latency of this operation is assumed to be greater
|
||||||
|
// than the latency of the loop.
|
||||||
|
// TODO: Recursively mark data-edge predecessors as
|
||||||
|
// isScheduleHigh too.
|
||||||
|
SU->isScheduleHigh = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
LoopRegs.Deps.erase(I);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
UseList.clear();
|
UseList.clear();
|
||||||
if (!MO.isDead())
|
if (!MO.isDead())
|
||||||
DefList.clear();
|
DefList.clear();
|
||||||
|
@@ -93,6 +93,17 @@ const char *X86Subtarget::getBZeroEntry() const {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// getSpecialAddressLatency - For targets where it is beneficial to
|
||||||
|
/// backschedule instructions that compute addresses, return a value
|
||||||
|
/// indicating the number of scheduling cycles of backscheduling that
|
||||||
|
/// should be attempted.
|
||||||
|
unsigned X86Subtarget::getSpecialAddressLatency() const {
|
||||||
|
// For x86 out-of-order targets, back-schedule address computations so
|
||||||
|
// that loads and stores aren't blocked.
|
||||||
|
// This value was chosen arbitrarily.
|
||||||
|
return 200;
|
||||||
|
}
|
||||||
|
|
||||||
/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
|
/// GetCpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
|
||||||
/// specified arguments. If we can't run cpuid on the host, return true.
|
/// specified arguments. If we can't run cpuid on the host, return true.
|
||||||
bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
|
bool X86::GetCpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX,
|
||||||
|
@@ -191,6 +191,12 @@ public:
|
|||||||
/// memset with zero passed as the second argument. Otherwise it
|
/// memset with zero passed as the second argument. Otherwise it
|
||||||
/// returns null.
|
/// returns null.
|
||||||
const char *getBZeroEntry() const;
|
const char *getBZeroEntry() const;
|
||||||
|
|
||||||
|
/// getSpecialAddressLatency - For targets where it is beneficial to
|
||||||
|
/// backschedule instructions that compute addresses, return a value
|
||||||
|
/// indicating the number of scheduling cycles of backscheduling that
|
||||||
|
/// should be attempted.
|
||||||
|
unsigned getSpecialAddressLatency() const;
|
||||||
};
|
};
|
||||||
|
|
||||||
namespace X86 {
|
namespace X86 {
|
||||||
|
Reference in New Issue
Block a user