mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-08-09 11:25:55 +00:00
Allow a zero cycle stage to reserve/require a FU without advancing the cycle counter.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78736 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -39,7 +39,7 @@ ExactHazardRecognizer::ExactHazardRecognizer(const InstrItineraryData &LItinData
|
|||||||
|
|
||||||
unsigned ItinDepth = 0;
|
unsigned ItinDepth = 0;
|
||||||
for (; IS != E; ++IS)
|
for (; IS != E; ++IS)
|
||||||
ItinDepth += IS->Cycles;
|
ItinDepth += std::max(1U, IS->Cycles);
|
||||||
|
|
||||||
ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
|
ScoreboardDepth = std::max(ScoreboardDepth, ItinDepth);
|
||||||
}
|
}
|
||||||
@@ -89,9 +89,13 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
|
|||||||
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
|
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
|
||||||
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
|
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
|
||||||
IS != E; ++IS) {
|
IS != E; ++IS) {
|
||||||
|
// If the stages cycles are 0, then we must have the FU free in
|
||||||
|
// the current cycle, but we don't advance the cycle time .
|
||||||
|
unsigned StageCycles = std::max(1U, IS->Cycles);
|
||||||
|
|
||||||
// We must find one of the stage's units free for every cycle the
|
// We must find one of the stage's units free for every cycle the
|
||||||
// stage is occupied.
|
// stage is occupied.
|
||||||
for (unsigned int i = 0; i < IS->Cycles; ++i) {
|
for (unsigned int i = 0; i < StageCycles; ++i) {
|
||||||
assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
|
assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
|
||||||
|
|
||||||
unsigned index = getFutureIndex(cycle);
|
unsigned index = getFutureIndex(cycle);
|
||||||
@@ -103,7 +107,8 @@ ExactHazardRecognizer::HazardType ExactHazardRecognizer::getHazardType(SUnit *SU
|
|||||||
return Hazard;
|
return Hazard;
|
||||||
}
|
}
|
||||||
|
|
||||||
++cycle;
|
if (IS->Cycles > 0)
|
||||||
|
++cycle;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -118,9 +123,13 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
|
|||||||
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
|
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
|
||||||
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
|
for (const InstrStage *IS = ItinData.begin(idx), *E = ItinData.end(idx);
|
||||||
IS != E; ++IS) {
|
IS != E; ++IS) {
|
||||||
|
// If the stages cycles are 0, then we must reserve the FU in the
|
||||||
|
// current cycle, but we don't advance the cycle time .
|
||||||
|
unsigned StageCycles = std::max(1U, IS->Cycles);
|
||||||
|
|
||||||
// We must reserve one of the stage's units for every cycle the
|
// We must reserve one of the stage's units for every cycle the
|
||||||
// stage is occupied.
|
// stage is occupied.
|
||||||
for (unsigned int i = 0; i < IS->Cycles; ++i) {
|
for (unsigned int i = 0; i < StageCycles; ++i) {
|
||||||
assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
|
assert((cycle < ScoreboardDepth) && "Scoreboard depth exceeded!");
|
||||||
|
|
||||||
unsigned index = getFutureIndex(cycle);
|
unsigned index = getFutureIndex(cycle);
|
||||||
@@ -135,7 +144,9 @@ void ExactHazardRecognizer::EmitInstruction(SUnit *SU) {
|
|||||||
|
|
||||||
assert(freeUnit && "No function unit available!");
|
assert(freeUnit && "No function unit available!");
|
||||||
Scoreboard[index] |= freeUnit;
|
Scoreboard[index] |= freeUnit;
|
||||||
++cycle;
|
|
||||||
|
if (IS->Cycles > 0)
|
||||||
|
++cycle;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -114,7 +114,7 @@ def : Processor<"arm1156t2f-s", V6Itineraries,
|
|||||||
// V7 Processors.
|
// V7 Processors.
|
||||||
def : Processor<"cortex-a8", CortexA8Itineraries,
|
def : Processor<"cortex-a8", CortexA8Itineraries,
|
||||||
[ArchV7A, FeatureThumb2, FeatureNEON, FeatureNEONFP]>;
|
[ArchV7A, FeatureThumb2, FeatureNEON, FeatureNEONFP]>;
|
||||||
def : Processor<"cortex-a9", V7Itineraries,
|
def : Processor<"cortex-a9", CortexA9Itineraries,
|
||||||
[ArchV7A, FeatureThumb2, FeatureNEON]>;
|
[ArchV7A, FeatureThumb2, FeatureNEON]>;
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@@ -10,8 +10,9 @@
|
|||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Functional units across ARM processors
|
// Functional units across ARM processors
|
||||||
//
|
//
|
||||||
def FU_Pipe0 : FuncUnit; // pipeline 0 issue
|
def FU_Issue : FuncUnit; // issue
|
||||||
def FU_Pipe1 : FuncUnit; // pipeline 1 issue
|
def FU_Pipe0 : FuncUnit; // pipeline 0
|
||||||
|
def FU_Pipe1 : FuncUnit; // pipeline 1
|
||||||
def FU_LdSt0 : FuncUnit; // pipeline 0 load/store
|
def FU_LdSt0 : FuncUnit; // pipeline 0 load/store
|
||||||
def FU_LdSt1 : FuncUnit; // pipeline 1 load/store
|
def FU_LdSt1 : FuncUnit; // pipeline 1 load/store
|
||||||
|
|
||||||
@@ -19,9 +20,11 @@ def FU_LdSt1 : FuncUnit; // pipeline 1 load/store
|
|||||||
// Instruction Itinerary classes used for ARM
|
// Instruction Itinerary classes used for ARM
|
||||||
//
|
//
|
||||||
def IIC_iALU : InstrItinClass;
|
def IIC_iALU : InstrItinClass;
|
||||||
|
def IIC_iMPY : InstrItinClass;
|
||||||
def IIC_iLoad : InstrItinClass;
|
def IIC_iLoad : InstrItinClass;
|
||||||
def IIC_iStore : InstrItinClass;
|
def IIC_iStore : InstrItinClass;
|
||||||
def IIC_fpALU : InstrItinClass;
|
def IIC_fpALU : InstrItinClass;
|
||||||
|
def IIC_fpMPY : InstrItinClass;
|
||||||
def IIC_fpLoad : InstrItinClass;
|
def IIC_fpLoad : InstrItinClass;
|
||||||
def IIC_fpStore : InstrItinClass;
|
def IIC_fpStore : InstrItinClass;
|
||||||
def IIC_Br : InstrItinClass;
|
def IIC_Br : InstrItinClass;
|
||||||
@@ -31,12 +34,14 @@ def IIC_Br : InstrItinClass;
|
|||||||
|
|
||||||
def GenericItineraries : ProcessorItineraries<[
|
def GenericItineraries : ProcessorItineraries<[
|
||||||
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>,
|
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
|
InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
|
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
|
||||||
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
|
|
||||||
InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>,
|
InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>,
|
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
InstrItinData<IIC_fpALU , [InstrStage<1, [FU_Pipe0]>]>,
|
InstrItinData<IIC_fpALU , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>
|
InstrItinData<IIC_fpMPY , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
|
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
|
||||||
|
InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>
|
||||||
]>;
|
]>;
|
||||||
|
|
||||||
|
|
||||||
|
@@ -11,18 +11,16 @@
|
|||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
// TODO: this should model an ARM11
|
||||||
// Single issue pipeline so every itinerary starts with FU_pipe0
|
// Single issue pipeline so every itinerary starts with FU_pipe0
|
||||||
def V6Itineraries : ProcessorItineraries<[
|
def V6Itineraries : ProcessorItineraries<[
|
||||||
// single-cycle integer ALU
|
|
||||||
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>,
|
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
// loads have an extra cycle of latency, but are fully pipelined
|
InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
|
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
|
||||||
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
|
|
||||||
// fully-pipelined stores
|
|
||||||
InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>,
|
InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>,
|
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
// fp ALU is not pipelined
|
InstrItinData<IIC_fpALU , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
InstrItinData<IIC_fpALU , [InstrStage<6, [FU_Pipe0]>]>,
|
InstrItinData<IIC_fpMPY , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
// no delay slots, so the latency of a branch is unimportant
|
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
|
||||||
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>
|
InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>
|
||||||
]>;
|
]>;
|
||||||
|
@@ -11,34 +11,51 @@
|
|||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
// Single issue pipeline so every itinerary starts with FU_Pipe0
|
|
||||||
def V7Itineraries : ProcessorItineraries<[
|
|
||||||
// single-cycle integer ALU
|
|
||||||
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>,
|
|
||||||
// loads have an extra cycle of latency, but are fully pipelined
|
|
||||||
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
|
|
||||||
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
|
|
||||||
// fully-pipelined stores
|
|
||||||
InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>,
|
|
||||||
InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>,
|
|
||||||
// fp ALU is not pipelined
|
|
||||||
InstrItinData<IIC_fpALU , [InstrStage<6, [FU_Pipe0]>]>,
|
|
||||||
// no delay slots, so the latency of a branch is unimportant
|
|
||||||
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>
|
|
||||||
]>;
|
|
||||||
|
|
||||||
// Dual issue pipeline so every itinerary starts with FU_Pipe0 | FU_Pipe1
|
// Dual issue pipeline so every itinerary starts with FU_Pipe0 | FU_Pipe1
|
||||||
def CortexA8Itineraries : ProcessorItineraries<[
|
def CortexA8Itineraries : ProcessorItineraries<[
|
||||||
// single-cycle integer ALU
|
// two fully-pipelined integer ALU pipelines
|
||||||
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
|
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
|
||||||
|
// one fully-pipelined integer Multiply pipeline
|
||||||
|
// function units are used in alpha order, so use FU_Pipe1
|
||||||
|
// for the Multiple pipeline
|
||||||
|
InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe1]>]>,
|
||||||
// loads have an extra cycle of latency, but are fully pipelined
|
// loads have an extra cycle of latency, but are fully pipelined
|
||||||
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>,
|
// use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
|
||||||
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>, InstrStage<1, [FU_LdSt0]>]>,
|
InstrItinData<IIC_iLoad , [InstrStage<0, [FU_Issue]>,
|
||||||
|
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
|
||||||
|
InstrStage<1, [FU_LdSt0]>]>,
|
||||||
// fully-pipelined stores
|
// fully-pipelined stores
|
||||||
InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
|
// use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
|
||||||
InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
|
InstrItinData<IIC_iStore , [InstrStage<0, [FU_Issue]>,
|
||||||
// fp ALU is not pipelined
|
InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
|
||||||
InstrItinData<IIC_fpALU , [InstrStage<6, [FU_Pipe0, FU_Pipe1]>]>,
|
|
||||||
// no delay slots, so the latency of a branch is unimportant
|
// no delay slots, so the latency of a branch is unimportant
|
||||||
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>
|
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>,
|
||||||
|
|
||||||
|
// VFP ALU is not pipelined so stall all issues
|
||||||
|
// FIXME assume NFP pipeline and 7 cycle non-pipelined latency
|
||||||
|
InstrItinData<IIC_fpALU , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>,
|
||||||
|
// VFP MPY is not pipelined so stall all issues
|
||||||
|
// FIXME assume NFP pipeline and 7 cycle non-pipelined latency
|
||||||
|
InstrItinData<IIC_fpMPY , [InstrStage<7, [FU_Pipe0, FU_Pipe1]>]>,
|
||||||
|
// loads have an extra cycle of latency, but are fully pipelined
|
||||||
|
// use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
|
||||||
|
InstrItinData<IIC_fpLoad , [InstrStage<0, [FU_Issue]>,
|
||||||
|
InstrStage<1, [FU_Pipe0, FU_Pipe1]>,
|
||||||
|
InstrStage<1, [FU_LdSt0]>]>,
|
||||||
|
// use a 0 cycle FU_Issue to enforce the 1 load/store per cycle limit
|
||||||
|
InstrItinData<IIC_fpStore , [InstrStage<0, [FU_Issue]>,
|
||||||
|
InstrStage<1, [FU_Pipe0, FU_Pipe1]>]>
|
||||||
|
]>;
|
||||||
|
|
||||||
|
// FIXME
|
||||||
|
def CortexA9Itineraries : ProcessorItineraries<[
|
||||||
|
InstrItinData<IIC_iALU , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
|
InstrItinData<IIC_iMPY , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
|
InstrItinData<IIC_iLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
|
||||||
|
InstrItinData<IIC_iStore , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
|
InstrItinData<IIC_Br , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
|
InstrItinData<IIC_fpALU , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
|
InstrItinData<IIC_fpMPY , [InstrStage<1, [FU_Pipe0]>]>,
|
||||||
|
InstrItinData<IIC_fpLoad , [InstrStage<1, [FU_Pipe0]>, InstrStage<1, [FU_LdSt0]>]>,
|
||||||
|
InstrItinData<IIC_fpStore , [InstrStage<1, [FU_Pipe0]>]>
|
||||||
]>;
|
]>;
|
||||||
|
Reference in New Issue
Block a user