mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-09-28 23:17:10 +00:00
Two sets of changes. Sorry they are intermingled.
1. Fix pre-ra scheduler so it doesn't try to push instructions above calls to "optimize for latency". Call instructions don't have the right latency and this is more likely to use introduce spills. 2. Fix if-converter cost function. For ARM, it should use instruction latencies, not # of micro-ops since multi-latency instructions is completely executed even when the predicate is false. Also, some instruction will be "slower" when they are predicated due to the register def becoming implicit input. rdar://8598427 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@118135 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -247,6 +247,7 @@ namespace llvm {
|
|||||||
unsigned NumSuccs; // # of SDep::Data sucss.
|
unsigned NumSuccs; // # of SDep::Data sucss.
|
||||||
unsigned NumPredsLeft; // # of preds not scheduled.
|
unsigned NumPredsLeft; // # of preds not scheduled.
|
||||||
unsigned NumSuccsLeft; // # of succs not scheduled.
|
unsigned NumSuccsLeft; // # of succs not scheduled.
|
||||||
|
bool isCall : 1; // Is a function call.
|
||||||
bool isTwoAddress : 1; // Is a two-address instruction.
|
bool isTwoAddress : 1; // Is a two-address instruction.
|
||||||
bool isCommutable : 1; // Is a commutable instruction.
|
bool isCommutable : 1; // Is a commutable instruction.
|
||||||
bool hasPhysRegDefs : 1; // Has physreg defs that are being used.
|
bool hasPhysRegDefs : 1; // Has physreg defs that are being used.
|
||||||
@@ -273,7 +274,8 @@ namespace llvm {
|
|||||||
SUnit(SDNode *node, unsigned nodenum)
|
SUnit(SDNode *node, unsigned nodenum)
|
||||||
: Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
|
: Node(node), Instr(0), OrigNode(0), NodeNum(nodenum),
|
||||||
NodeQueueId(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
|
NodeQueueId(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
|
||||||
NumSuccsLeft(0), isTwoAddress(false), isCommutable(false),
|
NumSuccsLeft(0),
|
||||||
|
isCall(false), isTwoAddress(false), isCommutable(false),
|
||||||
hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
||||||
isPending(false), isAvailable(false), isScheduled(false),
|
isPending(false), isAvailable(false), isScheduled(false),
|
||||||
isScheduleHigh(false), isCloned(false),
|
isScheduleHigh(false), isCloned(false),
|
||||||
@@ -286,7 +288,8 @@ namespace llvm {
|
|||||||
SUnit(MachineInstr *instr, unsigned nodenum)
|
SUnit(MachineInstr *instr, unsigned nodenum)
|
||||||
: Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
|
: Node(0), Instr(instr), OrigNode(0), NodeNum(nodenum),
|
||||||
NodeQueueId(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
|
NodeQueueId(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
|
||||||
NumSuccsLeft(0), isTwoAddress(false), isCommutable(false),
|
NumSuccsLeft(0),
|
||||||
|
isCall(false), isTwoAddress(false), isCommutable(false),
|
||||||
hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
||||||
isPending(false), isAvailable(false), isScheduled(false),
|
isPending(false), isAvailable(false), isScheduled(false),
|
||||||
isScheduleHigh(false), isCloned(false),
|
isScheduleHigh(false), isCloned(false),
|
||||||
@@ -298,7 +301,8 @@ namespace llvm {
|
|||||||
SUnit()
|
SUnit()
|
||||||
: Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
|
: Node(0), Instr(0), OrigNode(0), NodeNum(~0u),
|
||||||
NodeQueueId(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
|
NodeQueueId(0), Latency(0), NumPreds(0), NumSuccs(0), NumPredsLeft(0),
|
||||||
NumSuccsLeft(0), isTwoAddress(false), isCommutable(false),
|
NumSuccsLeft(0),
|
||||||
|
isCall(false), isTwoAddress(false), isCommutable(false),
|
||||||
hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
hasPhysRegDefs(false), hasPhysRegClobbers(false),
|
||||||
isPending(false), isAvailable(false), isScheduled(false),
|
isPending(false), isAvailable(false), isScheduled(false),
|
||||||
isScheduleHigh(false), isCloned(false),
|
isScheduleHigh(false), isCloned(false),
|
||||||
|
@@ -304,12 +304,14 @@ public:
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// isProfitableToIfCvt - Return true if it's profitable to first "NumInstrs"
|
/// isProfitableToIfCvt - Return true if it's profitable to predicate
|
||||||
|
/// instructions with accumulated instruction latency of "NumCycles"
|
||||||
/// of the specified basic block, where the probability of the instructions
|
/// of the specified basic block, where the probability of the instructions
|
||||||
/// being executed is given by Probability, and Confidence is a measure
|
/// being executed is given by Probability, and Confidence is a measure
|
||||||
/// of our confidence that it will be properly predicted.
|
/// of our confidence that it will be properly predicted.
|
||||||
virtual
|
virtual
|
||||||
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs,
|
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
|
||||||
|
unsigned ExtraPredCycles,
|
||||||
float Probability, float Confidence) const {
|
float Probability, float Confidence) const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -321,19 +323,22 @@ public:
|
|||||||
/// by Probability, and Confidence is a measure of our confidence that it
|
/// by Probability, and Confidence is a measure of our confidence that it
|
||||||
/// will be properly predicted.
|
/// will be properly predicted.
|
||||||
virtual bool
|
virtual bool
|
||||||
isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs,
|
isProfitableToIfCvt(MachineBasicBlock &TMBB,
|
||||||
MachineBasicBlock &FMBB, unsigned NumFInstrs,
|
unsigned NumTCycles, unsigned ExtraTCycles,
|
||||||
|
MachineBasicBlock &FMBB,
|
||||||
|
unsigned NumFCycles, unsigned ExtraFCycles,
|
||||||
float Probability, float Confidence) const {
|
float Probability, float Confidence) const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// isProfitableToDupForIfCvt - Return true if it's profitable for
|
/// isProfitableToDupForIfCvt - Return true if it's profitable for
|
||||||
/// if-converter to duplicate a specific number of instructions in the
|
/// if-converter to duplicate instructions of specified accumulated
|
||||||
/// specified MBB to enable if-conversion, where the probability of the
|
/// instruction latencies in the specified MBB to enable if-conversion.
|
||||||
/// instructions being executed is given by Probability, and Confidence is
|
/// The probability of the instructions being executed is given by
|
||||||
/// a measure of our confidence that it will be properly predicted.
|
/// Probability, and Confidence is a measure of our confidence that it
|
||||||
|
/// will be properly predicted.
|
||||||
virtual bool
|
virtual bool
|
||||||
isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs,
|
isProfitableToDupForIfCvt(MachineBasicBlock &MBB, unsigned NumCyles,
|
||||||
float Probability, float Confidence) const {
|
float Probability, float Confidence) const {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -608,25 +613,32 @@ public:
|
|||||||
|
|
||||||
/// getNumMicroOps - Return the number of u-operations the given machine
|
/// getNumMicroOps - Return the number of u-operations the given machine
|
||||||
/// instruction will be decoded to on the target cpu.
|
/// instruction will be decoded to on the target cpu.
|
||||||
virtual unsigned getNumMicroOps(const MachineInstr *MI,
|
virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
|
||||||
const InstrItineraryData *ItinData) const;
|
const MachineInstr *MI) const;
|
||||||
|
|
||||||
/// getOperandLatency - Compute and return the use operand latency of a given
|
/// getOperandLatency - Compute and return the use operand latency of a given
|
||||||
/// itinerary class and operand index if the value is produced by an
|
/// pair of def and use.
|
||||||
/// instruction of the specified itinerary class and def operand index.
|
|
||||||
/// In most cases, the static scheduling itinerary was enough to determine the
|
/// In most cases, the static scheduling itinerary was enough to determine the
|
||||||
/// operand latency. But it may not be possible for instructions with variable
|
/// operand latency. But it may not be possible for instructions with variable
|
||||||
/// number of defs / uses.
|
/// number of defs / uses.
|
||||||
virtual
|
virtual int getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
int getOperandLatency(const InstrItineraryData *ItinData,
|
|
||||||
const MachineInstr *DefMI, unsigned DefIdx,
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
const MachineInstr *UseMI, unsigned UseIdx) const;
|
const MachineInstr *UseMI, unsigned UseIdx) const;
|
||||||
|
|
||||||
virtual
|
virtual int getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
int getOperandLatency(const InstrItineraryData *ItinData,
|
|
||||||
SDNode *DefNode, unsigned DefIdx,
|
SDNode *DefNode, unsigned DefIdx,
|
||||||
SDNode *UseNode, unsigned UseIdx) const;
|
SDNode *UseNode, unsigned UseIdx) const;
|
||||||
|
|
||||||
|
/// getInstrLatency - Compute the instruction latency of a given instruction.
|
||||||
|
/// If the instruction has higher cost when predicated, it's returned via
|
||||||
|
/// PredCost.
|
||||||
|
virtual int getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineInstr *MI,
|
||||||
|
unsigned *PredCost = 0) const;
|
||||||
|
|
||||||
|
virtual int getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
|
SDNode *Node) const;
|
||||||
|
|
||||||
/// hasHighOperandLatency - Compute operand latency between a def of 'Reg'
|
/// hasHighOperandLatency - Compute operand latency between a def of 'Reg'
|
||||||
/// and an use in the current loop, return true if the target considered
|
/// and an use in the current loop, return true if the target considered
|
||||||
/// it 'high'. This is used by optimization passes such as machine LICM to
|
/// it 'high'. This is used by optimization passes such as machine LICM to
|
||||||
|
@@ -93,7 +93,8 @@ namespace {
|
|||||||
/// ClobbersPred - True if BB could modify predicates (e.g. has
|
/// ClobbersPred - True if BB could modify predicates (e.g. has
|
||||||
/// cmp, call, etc.)
|
/// cmp, call, etc.)
|
||||||
/// NonPredSize - Number of non-predicated instructions.
|
/// NonPredSize - Number of non-predicated instructions.
|
||||||
/// ExtraCost - Extra cost for microcoded instructions.
|
/// ExtraCost - Extra cost for multi-cycle instructions.
|
||||||
|
/// ExtraCost2 - Some instructions are slower when predicated
|
||||||
/// BB - Corresponding MachineBasicBlock.
|
/// BB - Corresponding MachineBasicBlock.
|
||||||
/// TrueBB / FalseBB- See AnalyzeBranch().
|
/// TrueBB / FalseBB- See AnalyzeBranch().
|
||||||
/// BrCond - Conditions for end of block conditional branches.
|
/// BrCond - Conditions for end of block conditional branches.
|
||||||
@@ -110,6 +111,7 @@ namespace {
|
|||||||
bool ClobbersPred : 1;
|
bool ClobbersPred : 1;
|
||||||
unsigned NonPredSize;
|
unsigned NonPredSize;
|
||||||
unsigned ExtraCost;
|
unsigned ExtraCost;
|
||||||
|
unsigned ExtraCost2;
|
||||||
MachineBasicBlock *BB;
|
MachineBasicBlock *BB;
|
||||||
MachineBasicBlock *TrueBB;
|
MachineBasicBlock *TrueBB;
|
||||||
MachineBasicBlock *FalseBB;
|
MachineBasicBlock *FalseBB;
|
||||||
@@ -119,7 +121,7 @@ namespace {
|
|||||||
IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
|
IsAnalyzed(false), IsEnqueued(false), IsBrAnalyzable(false),
|
||||||
HasFallThrough(false), IsUnpredicable(false),
|
HasFallThrough(false), IsUnpredicable(false),
|
||||||
CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
|
CannotBeCopied(false), ClobbersPred(false), NonPredSize(0),
|
||||||
ExtraCost(0), BB(0), TrueBB(0), FalseBB(0) {}
|
ExtraCost(0), ExtraCost2(0), BB(0), TrueBB(0), FalseBB(0) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
/// IfcvtToken - Record information about pending if-conversions to attempt:
|
/// IfcvtToken - Record information about pending if-conversions to attempt:
|
||||||
@@ -203,17 +205,20 @@ namespace {
|
|||||||
bool IgnoreBr = false);
|
bool IgnoreBr = false);
|
||||||
void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
|
void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true);
|
||||||
|
|
||||||
bool MeetIfcvtSizeLimit(MachineBasicBlock &BB, unsigned Size,
|
bool MeetIfcvtSizeLimit(MachineBasicBlock &BB,
|
||||||
|
unsigned Cycle, unsigned Extra,
|
||||||
float Prediction, float Confidence) const {
|
float Prediction, float Confidence) const {
|
||||||
return Size > 0 && TII->isProfitableToIfCvt(BB, Size,
|
return Cycle > 0 && TII->isProfitableToIfCvt(BB, Cycle, Extra,
|
||||||
Prediction, Confidence);
|
Prediction, Confidence);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB, unsigned TSize,
|
bool MeetIfcvtSizeLimit(MachineBasicBlock &TBB,
|
||||||
MachineBasicBlock &FBB, unsigned FSize,
|
unsigned TCycle, unsigned TExtra,
|
||||||
|
MachineBasicBlock &FBB,
|
||||||
|
unsigned FCycle, unsigned FExtra,
|
||||||
float Prediction, float Confidence) const {
|
float Prediction, float Confidence) const {
|
||||||
return TSize > 0 && FSize > 0 &&
|
return TCycle > 0 && FCycle > 0 &&
|
||||||
TII->isProfitableToIfCvt(TBB, TSize, FBB, FSize,
|
TII->isProfitableToIfCvt(TBB, TCycle, TExtra, FBB, FCycle, FExtra,
|
||||||
Prediction, Confidence);
|
Prediction, Confidence);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -649,6 +654,7 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
|
|||||||
// Then scan all the instructions.
|
// Then scan all the instructions.
|
||||||
BBI.NonPredSize = 0;
|
BBI.NonPredSize = 0;
|
||||||
BBI.ExtraCost = 0;
|
BBI.ExtraCost = 0;
|
||||||
|
BBI.ExtraCost2 = 0;
|
||||||
BBI.ClobbersPred = false;
|
BBI.ClobbersPred = false;
|
||||||
for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
|
for (MachineBasicBlock::iterator I = BBI.BB->begin(), E = BBI.BB->end();
|
||||||
I != E; ++I) {
|
I != E; ++I) {
|
||||||
@@ -665,9 +671,12 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
|
|||||||
if (!isCondBr) {
|
if (!isCondBr) {
|
||||||
if (!isPredicated) {
|
if (!isPredicated) {
|
||||||
BBI.NonPredSize++;
|
BBI.NonPredSize++;
|
||||||
unsigned NumOps = TII->getNumMicroOps(&*I, InstrItins);
|
unsigned ExtraPredCost = 0;
|
||||||
if (NumOps > 1)
|
unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I,
|
||||||
BBI.ExtraCost += NumOps-1;
|
&ExtraPredCost);
|
||||||
|
if (NumCycles > 1)
|
||||||
|
BBI.ExtraCost += NumCycles-1;
|
||||||
|
BBI.ExtraCost2 += ExtraPredCost;
|
||||||
} else if (!AlreadyPredicated) {
|
} else if (!AlreadyPredicated) {
|
||||||
// FIXME: This instruction is already predicated before the
|
// FIXME: This instruction is already predicated before the
|
||||||
// if-conversion pass. It's probably something like a conditional move.
|
// if-conversion pass. It's probably something like a conditional move.
|
||||||
@@ -815,9 +824,9 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
|
|||||||
|
|
||||||
if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
|
if (CanRevCond && ValidDiamond(TrueBBI, FalseBBI, Dups, Dups2) &&
|
||||||
MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
|
MeetIfcvtSizeLimit(*TrueBBI.BB, (TrueBBI.NonPredSize - (Dups + Dups2) +
|
||||||
TrueBBI.ExtraCost),
|
TrueBBI.ExtraCost), TrueBBI.ExtraCost2,
|
||||||
*FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
|
*FalseBBI.BB, (FalseBBI.NonPredSize - (Dups + Dups2) +
|
||||||
FalseBBI.ExtraCost),
|
FalseBBI.ExtraCost),FalseBBI.ExtraCost2,
|
||||||
Prediction, Confidence) &&
|
Prediction, Confidence) &&
|
||||||
FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
|
FeasibilityAnalysis(TrueBBI, BBI.BrCond) &&
|
||||||
FeasibilityAnalysis(FalseBBI, RevCond)) {
|
FeasibilityAnalysis(FalseBBI, RevCond)) {
|
||||||
@@ -836,7 +845,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
|
|||||||
|
|
||||||
if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) &&
|
if (ValidTriangle(TrueBBI, FalseBBI, false, Dups, Prediction, Confidence) &&
|
||||||
MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
|
MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
|
||||||
Prediction, Confidence) &&
|
TrueBBI.ExtraCost2, Prediction, Confidence) &&
|
||||||
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
|
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true)) {
|
||||||
// Triangle:
|
// Triangle:
|
||||||
// EBB
|
// EBB
|
||||||
@@ -851,7 +860,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
|
|||||||
|
|
||||||
if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) &&
|
if (ValidTriangle(TrueBBI, FalseBBI, true, Dups, Prediction, Confidence) &&
|
||||||
MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
|
MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
|
||||||
Prediction, Confidence) &&
|
TrueBBI.ExtraCost2, Prediction, Confidence) &&
|
||||||
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
|
FeasibilityAnalysis(TrueBBI, BBI.BrCond, true, true)) {
|
||||||
Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
|
Tokens.push_back(new IfcvtToken(BBI, ICTriangleRev, TNeedSub, Dups));
|
||||||
Enqueued = true;
|
Enqueued = true;
|
||||||
@@ -859,7 +868,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
|
|||||||
|
|
||||||
if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) &&
|
if (ValidSimple(TrueBBI, Dups, Prediction, Confidence) &&
|
||||||
MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
|
MeetIfcvtSizeLimit(*TrueBBI.BB, TrueBBI.NonPredSize + TrueBBI.ExtraCost,
|
||||||
Prediction, Confidence) &&
|
TrueBBI.ExtraCost2, Prediction, Confidence) &&
|
||||||
FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
|
FeasibilityAnalysis(TrueBBI, BBI.BrCond)) {
|
||||||
// Simple (split, no rejoin):
|
// Simple (split, no rejoin):
|
||||||
// EBB
|
// EBB
|
||||||
@@ -878,7 +887,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
|
|||||||
1.0-Prediction, Confidence) &&
|
1.0-Prediction, Confidence) &&
|
||||||
MeetIfcvtSizeLimit(*FalseBBI.BB,
|
MeetIfcvtSizeLimit(*FalseBBI.BB,
|
||||||
FalseBBI.NonPredSize + FalseBBI.ExtraCost,
|
FalseBBI.NonPredSize + FalseBBI.ExtraCost,
|
||||||
1.0-Prediction, Confidence) &&
|
FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
|
||||||
FeasibilityAnalysis(FalseBBI, RevCond, true)) {
|
FeasibilityAnalysis(FalseBBI, RevCond, true)) {
|
||||||
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
|
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFalse, FNeedSub, Dups));
|
||||||
Enqueued = true;
|
Enqueued = true;
|
||||||
@@ -888,7 +897,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
|
|||||||
1.0-Prediction, Confidence) &&
|
1.0-Prediction, Confidence) &&
|
||||||
MeetIfcvtSizeLimit(*FalseBBI.BB,
|
MeetIfcvtSizeLimit(*FalseBBI.BB,
|
||||||
FalseBBI.NonPredSize + FalseBBI.ExtraCost,
|
FalseBBI.NonPredSize + FalseBBI.ExtraCost,
|
||||||
1.0-Prediction, Confidence) &&
|
FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
|
||||||
FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
|
FeasibilityAnalysis(FalseBBI, RevCond, true, true)) {
|
||||||
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
|
Tokens.push_back(new IfcvtToken(BBI, ICTriangleFRev, FNeedSub, Dups));
|
||||||
Enqueued = true;
|
Enqueued = true;
|
||||||
@@ -897,7 +906,7 @@ IfConverter::BBInfo &IfConverter::AnalyzeBlock(MachineBasicBlock *BB,
|
|||||||
if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) &&
|
if (ValidSimple(FalseBBI, Dups, 1.0-Prediction, Confidence) &&
|
||||||
MeetIfcvtSizeLimit(*FalseBBI.BB,
|
MeetIfcvtSizeLimit(*FalseBBI.BB,
|
||||||
FalseBBI.NonPredSize + FalseBBI.ExtraCost,
|
FalseBBI.NonPredSize + FalseBBI.ExtraCost,
|
||||||
1.0-Prediction, Confidence) &&
|
FalseBBI.ExtraCost2, 1.0-Prediction, Confidence) &&
|
||||||
FeasibilityAnalysis(FalseBBI, RevCond)) {
|
FeasibilityAnalysis(FalseBBI, RevCond)) {
|
||||||
Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
|
Tokens.push_back(new IfcvtToken(BBI, ICSimpleFalse, FNeedSub, Dups));
|
||||||
Enqueued = true;
|
Enqueued = true;
|
||||||
@@ -1427,9 +1436,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
|
|||||||
MachineInstr *MI = MF.CloneMachineInstr(I);
|
MachineInstr *MI = MF.CloneMachineInstr(I);
|
||||||
ToBBI.BB->insert(ToBBI.BB->end(), MI);
|
ToBBI.BB->insert(ToBBI.BB->end(), MI);
|
||||||
ToBBI.NonPredSize++;
|
ToBBI.NonPredSize++;
|
||||||
unsigned NumOps = TII->getNumMicroOps(MI, InstrItins);
|
unsigned ExtraPredCost = 0;
|
||||||
if (NumOps > 1)
|
unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost);
|
||||||
ToBBI.ExtraCost += NumOps-1;
|
if (NumCycles > 1)
|
||||||
|
ToBBI.ExtraCost += NumCycles-1;
|
||||||
|
ToBBI.ExtraCost2 += ExtraPredCost;
|
||||||
|
|
||||||
if (!TII->isPredicated(I) && !MI->isDebugValue()) {
|
if (!TII->isPredicated(I) && !MI->isDebugValue()) {
|
||||||
if (!TII->PredicateInstruction(MI, Cond)) {
|
if (!TII->PredicateInstruction(MI, Cond)) {
|
||||||
@@ -1504,8 +1515,10 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) {
|
|||||||
|
|
||||||
ToBBI.NonPredSize += FromBBI.NonPredSize;
|
ToBBI.NonPredSize += FromBBI.NonPredSize;
|
||||||
ToBBI.ExtraCost += FromBBI.ExtraCost;
|
ToBBI.ExtraCost += FromBBI.ExtraCost;
|
||||||
|
ToBBI.ExtraCost2 += FromBBI.ExtraCost2;
|
||||||
FromBBI.NonPredSize = 0;
|
FromBBI.NonPredSize = 0;
|
||||||
FromBBI.ExtraCost = 0;
|
FromBBI.ExtraCost = 0;
|
||||||
|
FromBBI.ExtraCost2 = 0;
|
||||||
|
|
||||||
ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
|
ToBBI.ClobbersPred |= FromBBI.ClobbersPred;
|
||||||
ToBBI.HasFallThrough = FromBBI.HasFallThrough;
|
ToBBI.HasFallThrough = FromBBI.HasFallThrough;
|
||||||
|
@@ -238,6 +238,8 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
|
|||||||
"Cannot schedule terminators or labels!");
|
"Cannot schedule terminators or labels!");
|
||||||
// Create the SUnit for this MI.
|
// Create the SUnit for this MI.
|
||||||
SUnit *SU = NewSUnit(MI);
|
SUnit *SU = NewSUnit(MI);
|
||||||
|
SU->isCall = TID.isCall();
|
||||||
|
SU->isCommutable = TID.isCommutable();
|
||||||
|
|
||||||
// Assign the Latency field of SU using target-provided information.
|
// Assign the Latency field of SU using target-provided information.
|
||||||
if (UnitLatencies)
|
if (UnitLatencies)
|
||||||
@@ -564,9 +566,9 @@ void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
|
|||||||
// extra time.
|
// extra time.
|
||||||
if (SU->getInstr()->getDesc().mayLoad())
|
if (SU->getInstr()->getDesc().mayLoad())
|
||||||
SU->Latency += 2;
|
SU->Latency += 2;
|
||||||
} else
|
} else {
|
||||||
SU->Latency =
|
SU->Latency = TII->getInstrLatency(InstrItins, SU->getInstr());
|
||||||
InstrItins->getStageLatency(SU->getInstr()->getDesc().getSchedClass());
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
|
void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
|
||||||
|
@@ -1589,6 +1589,10 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
|
bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
|
||||||
|
if (left->isCall || right->isCall)
|
||||||
|
// No way to compute latency of calls.
|
||||||
|
return BURRSort(left, right, SPQ);
|
||||||
|
|
||||||
bool LHigh = SPQ->HighRegPressure(left);
|
bool LHigh = SPQ->HighRegPressure(left);
|
||||||
bool RHigh = SPQ->HighRegPressure(right);
|
bool RHigh = SPQ->HighRegPressure(right);
|
||||||
// Avoid causing spills. If register pressure is high, schedule for
|
// Avoid causing spills. If register pressure is high, schedule for
|
||||||
@@ -1648,6 +1652,10 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
|
|||||||
|
|
||||||
bool ilp_ls_rr_sort::operator()(const SUnit *left,
|
bool ilp_ls_rr_sort::operator()(const SUnit *left,
|
||||||
const SUnit *right) const {
|
const SUnit *right) const {
|
||||||
|
if (left->isCall || right->isCall)
|
||||||
|
// No way to compute latency of calls.
|
||||||
|
return BURRSort(left, right, SPQ);
|
||||||
|
|
||||||
bool LHigh = SPQ->HighRegPressure(left);
|
bool LHigh = SPQ->HighRegPressure(left);
|
||||||
bool RHigh = SPQ->HighRegPressure(right);
|
bool RHigh = SPQ->HighRegPressure(right);
|
||||||
// Avoid causing spills. If register pressure is high, schedule for
|
// Avoid causing spills. If register pressure is high, schedule for
|
||||||
|
@@ -72,6 +72,7 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
|
|||||||
SUnit *SU = NewSUnit(Old->getNode());
|
SUnit *SU = NewSUnit(Old->getNode());
|
||||||
SU->OrigNode = Old->OrigNode;
|
SU->OrigNode = Old->OrigNode;
|
||||||
SU->Latency = Old->Latency;
|
SU->Latency = Old->Latency;
|
||||||
|
SU->isCall = Old->isCall;
|
||||||
SU->isTwoAddress = Old->isTwoAddress;
|
SU->isTwoAddress = Old->isTwoAddress;
|
||||||
SU->isCommutable = Old->isCommutable;
|
SU->isCommutable = Old->isCommutable;
|
||||||
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
|
SU->hasPhysRegDefs = Old->hasPhysRegDefs;
|
||||||
@@ -300,6 +301,8 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
|
|||||||
N = N->getOperand(N->getNumOperands()-1).getNode();
|
N = N->getOperand(N->getNumOperands()-1).getNode();
|
||||||
assert(N->getNodeId() == -1 && "Node already inserted!");
|
assert(N->getNodeId() == -1 && "Node already inserted!");
|
||||||
N->setNodeId(NodeSUnit->NodeNum);
|
N->setNodeId(NodeSUnit->NodeNum);
|
||||||
|
if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
|
||||||
|
NodeSUnit->isCall = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scan down to find any flagged succs.
|
// Scan down to find any flagged succs.
|
||||||
@@ -316,6 +319,8 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
|
|||||||
assert(N->getNodeId() == -1 && "Node already inserted!");
|
assert(N->getNodeId() == -1 && "Node already inserted!");
|
||||||
N->setNodeId(NodeSUnit->NodeNum);
|
N->setNodeId(NodeSUnit->NodeNum);
|
||||||
N = *UI;
|
N = *UI;
|
||||||
|
if (N->isMachineOpcode() && TII->get(N->getMachineOpcode()).isCall())
|
||||||
|
NodeSUnit->isCall = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (!HasFlagUse) break;
|
if (!HasFlagUse) break;
|
||||||
@@ -438,10 +443,8 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
|
|||||||
// all nodes flagged together into this SUnit.
|
// all nodes flagged together into this SUnit.
|
||||||
SU->Latency = 0;
|
SU->Latency = 0;
|
||||||
for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
|
for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
|
||||||
if (N->isMachineOpcode()) {
|
if (N->isMachineOpcode())
|
||||||
SU->Latency += InstrItins->
|
SU->Latency += TII->getInstrLatency(InstrItins, N);
|
||||||
getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
|
void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
|
||||||
|
@@ -40,10 +40,6 @@ static cl::opt<bool>
|
|||||||
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
|
EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden,
|
||||||
cl::desc("Enable ARM 2-addr to 3-addr conv"));
|
cl::desc("Enable ARM 2-addr to 3-addr conv"));
|
||||||
|
|
||||||
static cl::opt<bool>
|
|
||||||
OldARMIfCvt("old-arm-ifcvt", cl::Hidden,
|
|
||||||
cl::desc("Use old-style ARM if-conversion heuristics"));
|
|
||||||
|
|
||||||
ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
|
ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI)
|
||||||
: TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
|
: TargetInstrInfoImpl(ARMInsts, array_lengthof(ARMInsts)),
|
||||||
Subtarget(STI) {
|
Subtarget(STI) {
|
||||||
@@ -1205,53 +1201,36 @@ bool ARMBaseInstrInfo::isSchedulingBoundary(const MachineInstr *MI,
|
|||||||
}
|
}
|
||||||
|
|
||||||
bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
|
bool ARMBaseInstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
|
||||||
unsigned NumInstrs,
|
unsigned NumCyles,
|
||||||
|
unsigned ExtraPredCycles,
|
||||||
float Probability,
|
float Probability,
|
||||||
float Confidence) const {
|
float Confidence) const {
|
||||||
if (!NumInstrs)
|
if (!NumCyles)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Use old-style heuristics
|
|
||||||
if (OldARMIfCvt) {
|
|
||||||
if (Subtarget.getCPUString() == "generic")
|
|
||||||
// Generic (and overly aggressive) if-conversion limits for testing.
|
|
||||||
return NumInstrs <= 10;
|
|
||||||
if (Subtarget.hasV7Ops())
|
|
||||||
return NumInstrs <= 3;
|
|
||||||
return NumInstrs <= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Attempt to estimate the relative costs of predication versus branching.
|
// Attempt to estimate the relative costs of predication versus branching.
|
||||||
float UnpredCost = Probability * NumInstrs;
|
float UnpredCost = Probability * NumCyles;
|
||||||
UnpredCost += 1.0; // The branch itself
|
UnpredCost += 1.0; // The branch itself
|
||||||
UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
|
UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
|
||||||
|
|
||||||
float PredCost = NumInstrs;
|
return (float)(NumCyles + ExtraPredCycles) < UnpredCost;
|
||||||
|
|
||||||
return PredCost < UnpredCost;
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool ARMBaseInstrInfo::
|
bool ARMBaseInstrInfo::
|
||||||
isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
|
isProfitableToIfCvt(MachineBasicBlock &TMBB,
|
||||||
MachineBasicBlock &FMBB, unsigned NumF,
|
unsigned TCycles, unsigned TExtra,
|
||||||
|
MachineBasicBlock &FMBB,
|
||||||
|
unsigned FCycles, unsigned FExtra,
|
||||||
float Probability, float Confidence) const {
|
float Probability, float Confidence) const {
|
||||||
// Use old-style if-conversion heuristics
|
if (!TCycles || !FCycles)
|
||||||
if (OldARMIfCvt) {
|
|
||||||
return NumT && NumF && NumT <= 2 && NumF <= 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!NumT || !NumF)
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Attempt to estimate the relative costs of predication versus branching.
|
// Attempt to estimate the relative costs of predication versus branching.
|
||||||
float UnpredCost = Probability * NumT + (1.0 - Probability) * NumF;
|
float UnpredCost = Probability * TCycles + (1.0 - Probability) * FCycles;
|
||||||
UnpredCost += 1.0; // The branch itself
|
UnpredCost += 1.0; // The branch itself
|
||||||
UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
|
UnpredCost += (1.0 - Confidence) * Subtarget.getMispredictionPenalty();
|
||||||
|
|
||||||
float PredCost = NumT + NumF;
|
return (float)(TCycles + FCycles + TExtra + FExtra) < UnpredCost;
|
||||||
|
|
||||||
return PredCost < UnpredCost;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// getInstrPredicate - If instruction is predicated, returns its predicate
|
/// getInstrPredicate - If instruction is predicated, returns its predicate
|
||||||
@@ -1591,8 +1570,8 @@ OptimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, int CmpMask,
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsigned
|
unsigned
|
||||||
ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
|
ARMBaseInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
|
||||||
const InstrItineraryData *ItinData) const {
|
const MachineInstr *MI) const {
|
||||||
if (!ItinData || ItinData->isEmpty())
|
if (!ItinData || ItinData->isEmpty())
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
@@ -1649,9 +1628,14 @@ ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
|
|||||||
case ARM::t2STM_UPD: {
|
case ARM::t2STM_UPD: {
|
||||||
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
|
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
|
||||||
if (Subtarget.isCortexA8()) {
|
if (Subtarget.isCortexA8()) {
|
||||||
// 4 registers would be issued: 1, 2, 1.
|
if (NumRegs < 4)
|
||||||
// 5 registers would be issued: 1, 2, 2.
|
return 2;
|
||||||
return 1 + (NumRegs / 2);
|
// 4 registers would be issued: 2, 2.
|
||||||
|
// 5 registers would be issued: 2, 2, 1.
|
||||||
|
UOps = (NumRegs / 2);
|
||||||
|
if (NumRegs % 2)
|
||||||
|
++UOps;
|
||||||
|
return UOps;
|
||||||
} else if (Subtarget.isCortexA9()) {
|
} else if (Subtarget.isCortexA9()) {
|
||||||
UOps = (NumRegs / 2);
|
UOps = (NumRegs / 2);
|
||||||
// If there are odd number of registers or if it's not 64-bit aligned,
|
// If there are odd number of registers or if it's not 64-bit aligned,
|
||||||
@@ -2025,6 +2009,46 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
|||||||
return Latency;
|
return Latency;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineInstr *MI,
|
||||||
|
unsigned *PredCost) const {
|
||||||
|
if (MI->isCopyLike() || MI->isInsertSubreg() ||
|
||||||
|
MI->isRegSequence() || MI->isImplicitDef())
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (!ItinData || ItinData->isEmpty())
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
const TargetInstrDesc &TID = MI->getDesc();
|
||||||
|
unsigned Class = TID.getSchedClass();
|
||||||
|
unsigned UOps = ItinData->Itineraries[Class].NumMicroOps;
|
||||||
|
if (PredCost && TID.hasImplicitDefOfPhysReg(ARM::CPSR))
|
||||||
|
// When predicated, CPSR is an additional source operand for CPSR updating
|
||||||
|
// instructions, this apparently increases their latencies.
|
||||||
|
*PredCost = 1;
|
||||||
|
if (UOps)
|
||||||
|
return ItinData->getStageLatency(Class);
|
||||||
|
return getNumMicroOps(ItinData, MI);
|
||||||
|
}
|
||||||
|
|
||||||
|
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
|
SDNode *Node) const {
|
||||||
|
if (!Node->isMachineOpcode())
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (!ItinData || ItinData->isEmpty())
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
unsigned Opcode = Node->getMachineOpcode();
|
||||||
|
switch (Opcode) {
|
||||||
|
default:
|
||||||
|
return ItinData->getStageLatency(get(Opcode).getSchedClass());
|
||||||
|
case ARM::VLDMQ:
|
||||||
|
case ARM::VSTMQ:
|
||||||
|
return 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
bool ARMBaseInstrInfo::
|
bool ARMBaseInstrInfo::
|
||||||
hasHighOperandLatency(const InstrItineraryData *ItinData,
|
hasHighOperandLatency(const InstrItineraryData *ItinData,
|
||||||
const MachineRegisterInfo *MRI,
|
const MachineRegisterInfo *MRI,
|
||||||
|
@@ -318,18 +318,20 @@ public:
|
|||||||
const MachineFunction &MF) const;
|
const MachineFunction &MF) const;
|
||||||
|
|
||||||
virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
|
virtual bool isProfitableToIfCvt(MachineBasicBlock &MBB,
|
||||||
unsigned NumInstrs,
|
unsigned NumCyles, unsigned ExtraPredCycles,
|
||||||
float Prob, float Confidence) const;
|
float Prob, float Confidence) const;
|
||||||
|
|
||||||
virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,unsigned NumT,
|
virtual bool isProfitableToIfCvt(MachineBasicBlock &TMBB,
|
||||||
MachineBasicBlock &FMBB,unsigned NumF,
|
unsigned NumT, unsigned ExtraT,
|
||||||
|
MachineBasicBlock &FMBB,
|
||||||
|
unsigned NumF, unsigned ExtraF,
|
||||||
float Probability, float Confidence) const;
|
float Probability, float Confidence) const;
|
||||||
|
|
||||||
virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
|
virtual bool isProfitableToDupForIfCvt(MachineBasicBlock &MBB,
|
||||||
unsigned NumInstrs,
|
unsigned NumCyles,
|
||||||
float Probability,
|
float Probability,
|
||||||
float Confidence) const {
|
float Confidence) const {
|
||||||
return NumInstrs == 1;
|
return NumCyles == 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// AnalyzeCompare - For a comparison instruction, return the source register
|
/// AnalyzeCompare - For a comparison instruction, return the source register
|
||||||
@@ -345,8 +347,8 @@ public:
|
|||||||
const MachineRegisterInfo *MRI,
|
const MachineRegisterInfo *MRI,
|
||||||
MachineBasicBlock::iterator &MII) const;
|
MachineBasicBlock::iterator &MII) const;
|
||||||
|
|
||||||
virtual unsigned getNumMicroOps(const MachineInstr *MI,
|
virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData,
|
||||||
const InstrItineraryData *ItinData) const;
|
const MachineInstr *MI) const;
|
||||||
|
|
||||||
virtual
|
virtual
|
||||||
int getOperandLatency(const InstrItineraryData *ItinData,
|
int getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
@@ -379,6 +381,12 @@ private:
|
|||||||
const TargetInstrDesc &UseTID,
|
const TargetInstrDesc &UseTID,
|
||||||
unsigned UseIdx, unsigned UseAlign) const;
|
unsigned UseIdx, unsigned UseAlign) const;
|
||||||
|
|
||||||
|
int getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineInstr *MI, unsigned *PredCost = 0) const;
|
||||||
|
|
||||||
|
int getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
|
SDNode *Node) const;
|
||||||
|
|
||||||
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
|
bool hasHighOperandLatency(const InstrItineraryData *ItinData,
|
||||||
const MachineRegisterInfo *MRI,
|
const MachineRegisterInfo *MRI,
|
||||||
const MachineInstr *DefMI, unsigned DefIdx,
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
|
@@ -42,33 +42,6 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Thumb2InstrInfo::isProfitableToIfCvt(MachineBasicBlock &MBB,
|
|
||||||
unsigned NumInstrs,
|
|
||||||
float Prediction,
|
|
||||||
float Confidence) const {
|
|
||||||
if (!OldT2IfCvt)
|
|
||||||
return ARMBaseInstrInfo::isProfitableToIfCvt(MBB, NumInstrs,
|
|
||||||
Prediction, Confidence);
|
|
||||||
return NumInstrs && NumInstrs <= 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool Thumb2InstrInfo::
|
|
||||||
isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumT,
|
|
||||||
MachineBasicBlock &FMBB, unsigned NumF,
|
|
||||||
float Prediction, float Confidence) const {
|
|
||||||
if (!OldT2IfCvt)
|
|
||||||
return ARMBaseInstrInfo::isProfitableToIfCvt(TMBB, NumT,
|
|
||||||
FMBB, NumF,
|
|
||||||
Prediction, Confidence);
|
|
||||||
|
|
||||||
// FIXME: Catch optimization such as:
|
|
||||||
// r0 = movne
|
|
||||||
// r0 = moveq
|
|
||||||
return NumT && NumF &&
|
|
||||||
NumT <= 3 && NumF <= 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
|
Thumb2InstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail,
|
||||||
MachineBasicBlock *NewDest) const {
|
MachineBasicBlock *NewDest) const {
|
||||||
|
@@ -38,12 +38,6 @@ public:
|
|||||||
bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
|
bool isLegalToSplitMBBAt(MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator MBBI) const;
|
MachineBasicBlock::iterator MBBI) const;
|
||||||
|
|
||||||
bool isProfitableToIfCvt(MachineBasicBlock &MBB, unsigned NumInstrs,
|
|
||||||
float Prediction, float Confidence) const;
|
|
||||||
bool isProfitableToIfCvt(MachineBasicBlock &TMBB, unsigned NumTInstrs,
|
|
||||||
MachineBasicBlock &FMBB, unsigned NumFInstrs,
|
|
||||||
float Prediction, float Confidence) const;
|
|
||||||
|
|
||||||
void copyPhysReg(MachineBasicBlock &MBB,
|
void copyPhysReg(MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator I, DebugLoc DL,
|
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||||
unsigned DestReg, unsigned SrcReg,
|
unsigned DestReg, unsigned SrcReg,
|
||||||
|
@@ -50,8 +50,8 @@ TargetInstrInfo::~TargetInstrInfo() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
unsigned
|
unsigned
|
||||||
TargetInstrInfo::getNumMicroOps(const MachineInstr *MI,
|
TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
|
||||||
const InstrItineraryData *ItinData) const {
|
const MachineInstr *MI) const {
|
||||||
if (!ItinData || ItinData->isEmpty())
|
if (!ItinData || ItinData->isEmpty())
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
@@ -94,6 +94,26 @@ TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
|||||||
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
|
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineInstr *MI,
|
||||||
|
unsigned *PredCost) const {
|
||||||
|
if (!ItinData || ItinData->isEmpty())
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return ItinData->getStageLatency(MI->getDesc().getSchedClass());
|
||||||
|
}
|
||||||
|
|
||||||
|
int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
|
SDNode *N) const {
|
||||||
|
if (!ItinData || ItinData->isEmpty())
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
if (!N->isMachineOpcode())
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass());
|
||||||
|
}
|
||||||
|
|
||||||
bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
|
bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData,
|
||||||
const MachineInstr *DefMI,
|
const MachineInstr *DefMI,
|
||||||
unsigned DefIdx) const {
|
unsigned DefIdx) const {
|
||||||
|
@@ -8,8 +8,9 @@
|
|||||||
define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
|
define fastcc i32 @dct_luma_sp(i32 %block_x, i32 %block_y, i32* %coeff_cost) {
|
||||||
entry:
|
entry:
|
||||||
; Make sure to use base-updating stores for saving callee-saved registers.
|
; Make sure to use base-updating stores for saving callee-saved registers.
|
||||||
|
; CHECK: push
|
||||||
; CHECK-NOT: sub sp
|
; CHECK-NOT: sub sp
|
||||||
; CHECK: vpush
|
; CHECK: push
|
||||||
%predicted_block = alloca [4 x [4 x i32]], align 4 ; <[4 x [4 x i32]]*> [#uses=1]
|
%predicted_block = alloca [4 x [4 x i32]], align 4 ; <[4 x [4 x i32]]*> [#uses=1]
|
||||||
br label %cond_next489
|
br label %cond_next489
|
||||||
|
|
||||||
|
@@ -4,27 +4,40 @@
|
|||||||
; micro-coded and would have long issue latency even if predicated on
|
; micro-coded and would have long issue latency even if predicated on
|
||||||
; false predicate.
|
; false predicate.
|
||||||
|
|
||||||
%0 = type { float, float, float, float }
|
define void @t(double %a, double %b, double %c, double %d, i32* nocapture %solutions, double* nocapture %x) nounwind {
|
||||||
%pln = type { %vec, float }
|
|
||||||
%vec = type { [4 x float] }
|
|
||||||
|
|
||||||
define arm_aapcs_vfpcc float @aaa(%vec* nocapture %ustart, %vec* nocapture %udir, %vec* nocapture %vstart, %vec* nocapture %vdir, %vec* %upoint, %vec* %vpoint) {
|
|
||||||
; CHECK: aaa:
|
|
||||||
; CHECK: vldr.32
|
|
||||||
; CHECK-NOT: vldrne
|
|
||||||
; CHECK-NOT: vpopne
|
|
||||||
; CHECK-NOT: popne
|
|
||||||
; CHECK: vpop
|
|
||||||
; CHECK: pop
|
|
||||||
entry:
|
entry:
|
||||||
br i1 undef, label %bb81, label %bb48
|
; CHECK: t:
|
||||||
|
; CHECK: vpop {d8}
|
||||||
|
; CHECK-NOT: vpopne
|
||||||
|
; CHECK: ldmia sp!, {r7, pc}
|
||||||
|
; CHECK: vpop {d8}
|
||||||
|
; CHECK: ldmia sp!, {r7, pc}
|
||||||
|
br i1 undef, label %if.else, label %if.then
|
||||||
|
|
||||||
bb48: ; preds = %entry
|
if.then: ; preds = %entry
|
||||||
%0 = call arm_aapcs_vfpcc %0 @bbb(%pln* undef, %vec* %vstart, %vec* undef) nounwind ; <%0> [#uses=0]
|
%mul73 = fmul double undef, 0.000000e+00
|
||||||
ret float 0.000000e+00
|
%sub76 = fsub double %mul73, undef
|
||||||
|
store double %sub76, double* undef, align 4
|
||||||
|
%call88 = tail call double @cos(double 0.000000e+00) nounwind
|
||||||
|
%mul89 = fmul double undef, %call88
|
||||||
|
%sub92 = fsub double %mul89, undef
|
||||||
|
store double %sub92, double* undef, align 4
|
||||||
|
ret void
|
||||||
|
|
||||||
bb81: ; preds = %entry
|
if.else: ; preds = %entry
|
||||||
ret float 0.000000e+00
|
%tmp101 = tail call double @llvm.pow.f64(double undef, double 0x3FD5555555555555)
|
||||||
|
%add112 = fadd double %tmp101, undef
|
||||||
|
%mul118 = fmul double %add112, undef
|
||||||
|
store double 0.000000e+00, double* %x, align 4
|
||||||
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
declare arm_aapcs_vfpcc %0 @bbb(%pln* nocapture, %vec* nocapture, %vec* nocapture) nounwind
|
declare double @acos(double)
|
||||||
|
|
||||||
|
declare double @sqrt(double) readnone
|
||||||
|
|
||||||
|
declare double @cos(double) readnone
|
||||||
|
|
||||||
|
declare double @fabs(double)
|
||||||
|
|
||||||
|
declare double @llvm.pow.f64(double, double) nounwind readonly
|
||||||
|
59
test/CodeGen/ARM/ifcvt11.ll
Normal file
59
test/CodeGen/ARM/ifcvt11.ll
Normal file
@@ -0,0 +1,59 @@
|
|||||||
|
; RUN: llc < %s -mtriple=arm-apple-darwin -mcpu=cortex-a8 | FileCheck %s
|
||||||
|
; rdar://8598427
|
||||||
|
; Adjust if-converter heuristics to avoid predicating vmrs which can cause
|
||||||
|
; significant regression.
|
||||||
|
|
||||||
|
%struct.xyz_t = type { double, double, double }
|
||||||
|
|
||||||
|
define i32 @effie(i32 %tsets, %struct.xyz_t* nocapture %p, i32 %a, i32 %b, i32 %c) nounwind readonly noinline {
|
||||||
|
; CHECK: effie:
|
||||||
|
entry:
|
||||||
|
%0 = icmp sgt i32 %tsets, 0
|
||||||
|
br i1 %0, label %bb.nph, label %bb6
|
||||||
|
|
||||||
|
bb.nph: ; preds = %entry
|
||||||
|
%1 = add nsw i32 %b, %a
|
||||||
|
%2 = add nsw i32 %1, %c
|
||||||
|
br label %bb
|
||||||
|
|
||||||
|
bb: ; preds = %bb4, %bb.nph
|
||||||
|
; CHECK: vcmpe.f64
|
||||||
|
; CHECK: vmrs apsr_nzcv, fpscr
|
||||||
|
%r.19 = phi i32 [ 0, %bb.nph ], [ %r.0, %bb4 ]
|
||||||
|
%n.08 = phi i32 [ 0, %bb.nph ], [ %10, %bb4 ]
|
||||||
|
%scevgep10 = getelementptr inbounds %struct.xyz_t* %p, i32 %n.08, i32 0
|
||||||
|
%scevgep11 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 1
|
||||||
|
%3 = load double* %scevgep10, align 4
|
||||||
|
%4 = load double* %scevgep11, align 4
|
||||||
|
%5 = fcmp uge double %3, %4
|
||||||
|
br i1 %5, label %bb3, label %bb1
|
||||||
|
|
||||||
|
bb1: ; preds = %bb
|
||||||
|
; CHECK-NOT: it
|
||||||
|
; CHECK-NOT: vcmpemi
|
||||||
|
; CHECK-NOT: vmrsmi
|
||||||
|
; CHECK: vcmpe.f64
|
||||||
|
; CHECK: vmrs apsr_nzcv, fpscr
|
||||||
|
%scevgep12 = getelementptr %struct.xyz_t* %p, i32 %n.08, i32 2
|
||||||
|
%6 = load double* %scevgep12, align 4
|
||||||
|
%7 = fcmp uge double %3, %6
|
||||||
|
br i1 %7, label %bb3, label %bb2
|
||||||
|
|
||||||
|
bb2: ; preds = %bb1
|
||||||
|
%8 = add nsw i32 %2, %r.19
|
||||||
|
br label %bb4
|
||||||
|
|
||||||
|
bb3: ; preds = %bb1, %bb
|
||||||
|
%9 = add nsw i32 %r.19, 1
|
||||||
|
br label %bb4
|
||||||
|
|
||||||
|
bb4: ; preds = %bb3, %bb2
|
||||||
|
%r.0 = phi i32 [ %9, %bb3 ], [ %8, %bb2 ]
|
||||||
|
%10 = add nsw i32 %n.08, 1
|
||||||
|
%exitcond = icmp eq i32 %10, %tsets
|
||||||
|
br i1 %exitcond, label %bb6, label %bb
|
||||||
|
|
||||||
|
bb6: ; preds = %bb4, %entry
|
||||||
|
%r.1.lcssa = phi i32 [ 0, %entry ], [ %r.0, %bb4 ]
|
||||||
|
ret i32 %r.1.lcssa
|
||||||
|
}
|
@@ -4,14 +4,14 @@
|
|||||||
; constant offset addressing, so that each of the following stores
|
; constant offset addressing, so that each of the following stores
|
||||||
; uses the same register.
|
; uses the same register.
|
||||||
|
|
||||||
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-128]
|
; CHECK: vstr.32 s{{.*}}, [lr, #-128]
|
||||||
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-96]
|
; CHECK: vstr.32 s{{.*}}, [lr, #-96]
|
||||||
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-64]
|
; CHECK: vstr.32 s{{.*}}, [lr, #-64]
|
||||||
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #-32]
|
; CHECK: vstr.32 s{{.*}}, [lr, #-32]
|
||||||
; CHECK: vstr.32 s{{.*}}, [r{{.*}}]
|
; CHECK: vstr.32 s{{.*}}, [lr]
|
||||||
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #32]
|
; CHECK: vstr.32 s{{.*}}, [lr, #32]
|
||||||
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #64]
|
; CHECK: vstr.32 s{{.*}}, [lr, #64]
|
||||||
; CHECK: vstr.32 s{{.*}}, [r{{.*}}, #96]
|
; CHECK: vstr.32 s{{.*}}, [lr, #96]
|
||||||
|
|
||||||
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
|
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
|
||||||
|
|
||||||
|
@@ -271,7 +271,6 @@ define arm_aapcs_vfpcc i32 @t10() nounwind {
|
|||||||
entry:
|
entry:
|
||||||
; CHECK: t10:
|
; CHECK: t10:
|
||||||
; CHECK: vmov.i32 q9, #0x3F000000
|
; CHECK: vmov.i32 q9, #0x3F000000
|
||||||
; CHECK: vmov d0, d17
|
|
||||||
; CHECK: vmla.f32 q8, q8, d0[0]
|
; CHECK: vmla.f32 q8, q8, d0[0]
|
||||||
%0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
|
%0 = shufflevector <4 x float> zeroinitializer, <4 x float> undef, <4 x i32> zeroinitializer ; <<4 x float>> [#uses=1]
|
||||||
%1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
|
%1 = insertelement <4 x float> %0, float undef, i32 1 ; <<4 x float>> [#uses=1]
|
||||||
|
@@ -23,8 +23,6 @@ entry:
|
|||||||
%4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
|
%4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2]
|
||||||
; Constant pool load followed by add.
|
; Constant pool load followed by add.
|
||||||
; Then clobber the loaded register, not the sum.
|
; Then clobber the loaded register, not the sum.
|
||||||
; CHECK: vldr.64
|
|
||||||
; CHECK: vadd.f64
|
|
||||||
; CHECK: vldr.64 [[LDR:d.*]],
|
; CHECK: vldr.64 [[LDR:d.*]],
|
||||||
; CHECK: LPC0_0:
|
; CHECK: LPC0_0:
|
||||||
; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]
|
; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]]
|
||||||
|
Reference in New Issue
Block a user